From 862382aa504c5dcd83cd5b38c40868ee8c2b4228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 11 Jul 2017 21:43:13 -0300 Subject: [PATCH] Several Bugfixes on benchmarks; ProbabilityDistribution on arima and quantreg; Including models.seasonal on setup.py --- pyFTS/benchmarks/Measures.py | 6 +- pyFTS/benchmarks/Util.py | 1 - pyFTS/benchmarks/arima.py | 9 +++ pyFTS/benchmarks/benchmarks.py | 5 +- pyFTS/benchmarks/quantreg.py | 67 +++++++++++++------ .../probabilistic/ProbabilityDistribution.py | 10 +++ setup.py | 8 +-- 7 files changed, 78 insertions(+), 28 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 0a84dd5..ae6444f 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -227,15 +227,15 @@ def crps(targets, densities): Fa = heavyside_cdf(densities.columns, targets) for k in densities.index: _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) - elif isinstance(densities, ProbabilityDistribution): + elif isinstance(densities, ProbabilityDistribution.ProbabilityDistribution): l = len(densities.bins) n = 1 - Fa = heavyside_cdf(densities.bin, targets) + Fa = heavyside_cdf(densities.bins, targets) _crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins]) elif isinstance(densities, list): l = len(densities[0].bins) n = len(densities) - Fa = heavyside_cdf(densities[0].bin, targets) + Fa = heavyside_cdf(densities[0].bins, targets) for df in densities: _crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins]) diff --git a/pyFTS/benchmarks/Util.py b/pyFTS/benchmarks/Util.py index 7407fcd..ba72ed7 100644 --- a/pyFTS/benchmarks/Util.py +++ b/pyFTS/benchmarks/Util.py @@ -8,7 +8,6 @@ import matplotlib.colors as pltcolors import matplotlib.pyplot as plt import numpy as np import pandas as pd -from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser #from mpl_toolkits.mplot3d import Axes3D diff --git a/pyFTS/benchmarks/arima.py b/pyFTS/benchmarks/arima.py index 4b4a41d..83ca1f9 100644 --- a/pyFTS/benchmarks/arima.py +++ b/pyFTS/benchmarks/arima.py @@ -40,6 +40,9 @@ class ARIMA(fts.FTS): self.order = self.p + self.q self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) + if self.indexer is not None: + data = self.indexer.get_data(data) + data = self.doTransformations(data, updateUoD=True) old_fit = self.model_fit @@ -60,6 +63,9 @@ class ARIMA(fts.FTS): if self.model_fit is None: return np.nan + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -151,6 +157,9 @@ class ARIMA(fts.FTS): def forecastDistribution(self, data, **kwargs): + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + sigma = np.sqrt(self.model_fit.sigma2) l = len(data) diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 18e3414..41441fa 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -825,6 +825,7 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to cb = fig.colorbar(pc, ax=ax) cb.set_label('Density') +from pyFTS.common import Transformations def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from): from matplotlib.patches import Rectangle @@ -836,7 +837,9 @@ def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_fr for y in dt.bins: s = Rectangle((time_from+ct, y), 1, dt.resolution, fill=True, lw = 0) patches.append(s) - colors.append(dt.distribution[y]*5) + colors.append(dt.density(y)) + scale = Transformations.Scale() + colors = scale.apply(colors) pc = PatchCollection(patches=patches, match_original=True) pc.set_clim([0, 1]) pc.set_cmap(cmap) diff --git a/pyFTS/benchmarks/quantreg.py b/pyFTS/benchmarks/quantreg.py index a79b53b..74523f3 100644 --- a/pyFTS/benchmarks/quantreg.py +++ b/pyFTS/benchmarks/quantreg.py @@ -7,7 +7,7 @@ from statsmodels.regression.quantile_regression import QuantReg from statsmodels.tsa.tsatools import lagmat from pyFTS import fts from pyFTS.common import SortedCollection - +from pyFTS.probabilistic import ProbabilityDistribution class QuantileRegression(fts.FTS): """Façade for statsmodels.regression.quantile_regression""" @@ -32,6 +32,9 @@ class QuantileRegression(fts.FTS): def train(self, data, sets, order=1, parameters=None): self.order = order + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + tmp = np.array(self.doTransformations(data, updateUoD=True)) lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep') @@ -75,6 +78,10 @@ class QuantileRegression(fts.FTS): return [lo, up] def forecast(self, data, **kwargs): + + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -91,6 +98,9 @@ class QuantileRegression(fts.FTS): def forecastInterval(self, data, **kwargs): + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -106,6 +116,10 @@ class QuantileRegression(fts.FTS): return ret def forecastAheadInterval(self, data, steps, **kwargs): + + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + ndata = np.array(self.doTransformations(data)) smoothing = kwargs.get("smoothing", 0.9) @@ -128,35 +142,50 @@ class QuantileRegression(fts.FTS): return ret[-steps:] - def forecastAheadDistribution(self, data, steps, **kwargs): + def forecastDistribution(self, data, **kwargs): + + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + ndata = np.array(self.doTransformations(data)) - percentile_size = (self.original_max - self.original_min) / 100 + ret = [] - resolution = kwargs.get('resolution', percentile_size) + l = len(data) - grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + for k in np.arange(self.order, l + 1): + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", + uod=[self.original_min, self.original_max]) + intervals = [] + for qt in self.dist_qt: + sample = ndata[k - self.order: k] + intl = self.point_to_interval(sample, qt[0], qt[1]) + intervals.append(intl) - index = SortedCollection.SortedCollection(iterable=grid.keys()) + dist.appendInterval(intervals) + + ret.append(dist) + + return ret + + def forecastAheadDistribution(self, data, steps, **kwargs): + + if self.indexer is not None and isinstance(data, pd.DataFrame): + data = self.indexer.get_data(data) + + ndata = np.array(self.doTransformations(data)) ret = [] - tmps = [] - grids = {} for k in np.arange(self.order, steps + self.order): - grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution) - - for qt in self.dist_qt: + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", + uod=[self.original_min, self.original_max]) intervals = [[k, k] for k in ndata[-self.order:]] - for k in np.arange(self.order, steps + self.order): + for qt in self.dist_qt: intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1]) intervals.append(intl) - grids[k] = self.gridCount(grids[k], resolution, index, intl) + dist.appendInterval(intervals) - for k in np.arange(self.order, steps + self.order): - tmp = np.array([grids[k][i] for i in sorted(grids[k])]) - ret.append(tmp / sum(tmp)) + ret.append(dist) - grid = self.get_empty_grid(self.original_min, self.original_max, resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df \ No newline at end of file + return ret \ No newline at end of file diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index 4bd580e..f3af582 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -69,6 +69,12 @@ class ProbabilityDistribution(object): def density(self, values): ret = [] + scalar = False + + if not isinstance(values, list): + values = [values] + scalar = True + for k in values: if self.type == "histogram": v = self.index.find_ge(k) @@ -79,6 +85,10 @@ class ProbabilityDistribution(object): else: v = self.index.find_ge(k) ret.append(self.distribution[v]) + + if scalar: + return ret[0] + return ret def cdf(self, value): diff --git a/setup.py b/setup.py index 208ed04..26c677f 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,14 @@ from distutils.core import setup setup( name = 'pyFTS', - packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'], - package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']}, - version = '1.1', + packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.models.seasonal','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'], + package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'seasonal':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']}, + version = '1.2', description = 'Fuzzy Time Series for Python', author = 'Petronio Candido L. e Silva', author_email = 'petronio.candido@gmail.com', url = 'https://github.com/petroniocandido/pyFTS', - download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.1.tar.gz', + download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.2.tar.gz', keywords = ['forecasting', 'fuzzy time series', 'fuzzy', 'time series forecasting'], classifiers = [], )