diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index abdc9af..2796aca 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -272,7 +272,7 @@ def get_point_statistics(data, model, indexer=None): def get_interval_statistics(original, model): - """Condensate all measures for interval forecasters""" + """Condensate all measures for point_to_interval forecasters""" ret = list() forecasts = model.forecastInterval(original) ret.append(round(sharpness(forecasts), 2)) diff --git a/benchmarks/arima.py b/benchmarks/arima.py index 3f07502..e50baf7 100644 --- a/benchmarks/arima.py +++ b/benchmarks/arima.py @@ -2,9 +2,11 @@ # -*- coding: utf8 -*- import numpy as np +import pandas as pd from statsmodels.tsa.arima_model import ARIMA as stats_arima import scipy.stats as st from pyFTS import fts +from pyFTS.common import SortedCollection class ARIMA(fts.FTS): @@ -109,7 +111,7 @@ class ARIMA(fts.FTS): ret.append(tmp) - #ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) + #ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], point_to_interval=True) return ret @@ -117,7 +119,7 @@ class ARIMA(fts.FTS): if self.model_fit is None: return np.nan - smoothing = kwargs.get("smoothing",0.2) + smoothing = kwargs.get("smoothing",0.5) sigma = np.sqrt(self.model_fit.sigma2) @@ -125,7 +127,7 @@ class ARIMA(fts.FTS): l = len(ndata) - means = self.forecastAhead(data,steps,kwargs) + nmeans = self.forecastAhead(ndata, steps, **kwargs) ret = [] @@ -134,11 +136,52 @@ class ARIMA(fts.FTS): hsigma = (1 + k*smoothing)*sigma - tmp.append(means[k] + st.norm.ppf(self.alpha) * hsigma) - tmp.append(means[k] + st.norm.ppf(1 - self.alpha) * hsigma) + tmp.append(nmeans[k] + st.norm.ppf(self.alpha) * hsigma) + tmp.append(nmeans[k] + st.norm.ppf(1 - self.alpha) * hsigma) ret.append(tmp) - ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) + ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0,steps)]], interval=True) - return ret \ No newline at end of file + return ret + + def forecastAheadDistribution(self, data, steps, **kwargs): + smoothing = kwargs.get("smoothing", 0.5) + + sigma = np.sqrt(self.model_fit.sigma2) + + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + + percentile_size = (self.original_max - self.original_min)/100 + + resolution = kwargs.get('resolution', percentile_size) + + grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + + index = SortedCollection.SortedCollection(iterable=grid.keys()) + + ret = [] + + nmeans = self.forecastAhead(ndata, steps, **kwargs) + + for k in np.arange(0, steps): + grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + for alpha in np.arange(0.05, 0.5, 0.05): + tmp = [] + + hsigma = (1 + k * smoothing) * sigma + + tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma) + tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma) + + grid = self.gridCount(grid, resolution, index, tmp) + + tmp = np.array([grid[i] for i in sorted(grid)]) + + ret.append(tmp / sum(tmp)) + + grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df \ No newline at end of file diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 4aab96f..7e38e8c 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -47,12 +47,12 @@ def get_point_methods(): def get_benchmark_interval_methods(): - """Return all non FTS methods for interval forecasting""" + """Return all non FTS methods for point_to_interval forecasting""" return [quantreg.QuantileRegression] def get_interval_methods(): - """Return all FTS methods for interval forecasting""" + """Return all FTS methods for point_to_interval forecasting""" return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS] diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py index 68d6997..0fd3240 100644 --- a/benchmarks/distributed_benchmarks.py +++ b/benchmarks/distributed_benchmarks.py @@ -260,7 +260,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, benchmark_models=None, benchmark_models_parameters = None, save=False, file=None, sintetic=False,nodes=None, depends=None): """ - Distributed sliding window benchmarks for FTS interval forecasters + Distributed sliding window benchmarks for FTS point_to_interval forecasters :param data: :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models diff --git a/benchmarks/parallel_benchmarks.py b/benchmarks/parallel_benchmarks.py index 4365a26..7fdeec4 100644 --- a/benchmarks/parallel_benchmarks.py +++ b/benchmarks/parallel_benchmarks.py @@ -187,7 +187,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, save=False, file=None, sintetic=False): """ - Parallel sliding window benchmarks for FTS interval forecasters + Parallel sliding window benchmarks for FTS point_to_interval forecasters :param data: :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models diff --git a/benchmarks/quantreg.py b/benchmarks/quantreg.py index 418b87a..089129a 100644 --- a/benchmarks/quantreg.py +++ b/benchmarks/quantreg.py @@ -2,9 +2,11 @@ # -*- coding: utf8 -*- import numpy as np +import pandas as pd from statsmodels.regression.quantile_regression import QuantReg from statsmodels.tsa.tsatools import lagmat from pyFTS import fts +from pyFTS.common import SortedCollection class QuantileRegression(fts.FTS): @@ -20,9 +22,11 @@ class QuantileRegression(fts.FTS): self.benchmark_only = True self.minOrder = 1 self.alpha = kwargs.get("alpha", 0.05) + self.dist = kwargs.get("dist", False) self.upper_qt = None self.mean_qt = None self.lower_qt = None + self.dist_qt = None self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")" def train(self, data, sets, order=1, parameters=None): @@ -42,12 +46,34 @@ class QuantileRegression(fts.FTS): self.upper_qt = [k for k in uqt.params] self.lower_qt = [k for k in lqt.params] + if self.dist: + self.dist_qt = [] + for alpha in np.arange(0.05,0.5,0.05): + lqt = QuantReg(ndata, lagdata).fit(alpha) + uqt = QuantReg(ndata, lagdata).fit(1 - alpha) + lo_qt = [k for k in lqt.params] + up_qt = [k for k in uqt.params] + self.dist_qt.append([lo_qt, up_qt]) + + self.original_min = min(data) + self.original_max = max(data) + self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha) def linearmodel(self,data,params): #return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ]) return sum([data[k] * params[k] for k in np.arange(0, self.order)]) + def point_to_interval(self, data, lo_params, up_params): + lo = self.linearmodel(data, lo_params) + up = self.linearmodel(data, up_params) + return [lo, up] + + def interval_to_interval(self, data, lo_params, up_params): + lo = self.linearmodel([k[0] for k in data], lo_params) + up = self.linearmodel([k[1] for k in data], up_params) + return [lo, up] + def forecast(self, data, **kwargs): ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -73,10 +99,57 @@ class QuantileRegression(fts.FTS): for k in np.arange(self.order , l): sample = ndata[k - self.order: k] - up = self.linearmodel(sample, self.upper_qt) - down = self.linearmodel(sample, self.lower_qt) - ret.append([down, up]) + ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt)) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) return ret + + def forecastAheadInterval(self, data, steps, **kwargs): + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + + ret = [[k, k] for k in ndata[-self.order:]] + + for k in np.arange(self.order, steps + self.order): + intl = self.interval_to_interval([ret[x] for x in np.arange(k - self.order, k)], self.lower_qt, self.upper_qt) + + ret.append(intl) + + ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True) + + return ret[-steps:] + + def forecastAheadDistribution(self, data, steps, **kwargs): + ndata = np.array(self.doTransformations(data)) + + percentile_size = (self.original_max - self.original_min) / 100 + + resolution = kwargs.get('resolution', percentile_size) + + grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + + index = SortedCollection.SortedCollection(iterable=grid.keys()) + + ret = [] + tmps = [] + + grids = {} + for k in np.arange(self.order, steps + self.order): + grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution) + + for qt in self.dist_qt: + intervals = [[k, k] for k in ndata[-self.order:]] + for k in np.arange(self.order, steps + self.order): + intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1]) + intervals.append(intl) + grids[k] = self.gridCount(grids[k], resolution, index, intl) + + for k in np.arange(self.order, steps + self.order): + tmp = np.array([grids[k][i] for i in sorted(grids[k])]) + ret.append(tmp / sum(tmp)) + + grid = self.get_empty_grid(self.original_min, self.original_max, resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df \ No newline at end of file diff --git a/common/Transformations.py b/common/Transformations.py index 484811f..5bb9804 100644 --- a/common/Transformations.py +++ b/common/Transformations.py @@ -49,7 +49,7 @@ class Differential(Transformation): def inverse(self,data, param, **kwargs): - interval = kwargs.get("interval",False) + interval = kwargs.get("point_to_interval",False) if isinstance(data, (np.ndarray, np.generic)): data = data.tolist() @@ -62,7 +62,7 @@ class Differential(Transformation): if not interval: inc = [data[t] + param[t] for t in np.arange(0, n)] else: - inc = [[data[t][0] + param[t], data[t][0] + param[t]] for t in np.arange(0, n)] + inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)] if n == 1: return inc[0] diff --git a/ensemble.py b/ensemble.py index 17eb578..ddb116f 100644 --- a/ensemble.py +++ b/ensemble.py @@ -134,7 +134,7 @@ class EnsembleFTS(fts.FTS): return grid def gridCount(self, grid, resolution, index, interval): - #print(interval) + #print(point_to_interval) for k in index.inside(interval[0],interval[1]): #print(k) grid[k] += 1 diff --git a/fts.py b/fts.py index 4bc9c5f..0a79d81 100644 --- a/fts.py +++ b/fts.py @@ -88,11 +88,19 @@ class FTS(object): :param kwargs: :return: """ + ndata = [k for k in self.doTransformations(data[- self.order:])] + ret = [] for k in np.arange(0,steps): - tmp = self.forecast(data[-self.order:],kwargs) + tmp = self.forecast(ndata[-self.order:], **kwargs) + + if isinstance(tmp,(list, np.ndarray)): + tmp = tmp[0] + ret.append(tmp) - data.append(tmp) + ndata.append(tmp) + + ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]]) return ret @@ -164,7 +172,7 @@ class FTS(object): params = [None for k in self.transformations] for c, t in enumerate(reversed(self.transformations), start=0): - ndata = t.inverse(ndata, params[c]) + ndata = t.inverse(ndata, params[c], **kwargs) return ndata @@ -180,66 +188,39 @@ class FTS(object): def len_total(self): return sum([len(k) for k in self.flrgs]) - def buildTreeWithoutOrder(self, node, lags, level): + def get_empty_grid(self, _min, _max, resolution): + grid = {} - if level not in lags: - return + for sbin in np.arange(_min,_max, resolution): + grid[sbin] = 0 - for s in lags[level]: - node.appendChild(tree.FLRGTreeNode(s)) + return grid - for child in node.getChildren(): - self.buildTreeWithoutOrder(child, lags, level + 1) - - def inputoutputmapping(self,bins=100): - - dim_uod = tuple([bins for k in range(0,self.order)]) - - dim_fs = tuple([ len(self.sets) for k in range(0, self.order)]) - - simulation_uod = np.zeros(shape=dim_uod, dtype=float) - - simulation_fs = np.zeros(shape=dim_fs, dtype=float) - - percentiles = np.linspace(self.sets[0].lower, self.sets[-1].upper, bins).tolist() - - pdf_uod = {} - - for k in percentiles: - pdf_uod[k] = 0 - - pdf_fs = {} - for k in self.sets: - pdf_fs[k.name] = 0 - - lags = {} - - for o in np.arange(0, self.order): - lags[o] = percentiles - - # Build the tree with all possible paths - - root = tree.FLRGTreeNode(None) - - self.buildTreeWithoutOrder(root, lags, 0) - - # Trace the possible paths + def getGridClean(self, resolution): + if len(self.transformations) == 0: + _min = self.sets[0].lower + _max = self.sets[-1].upper + else: + _min = self.original_min + _max = self.original_max + return self.get_empty_grid(_min, _max, resolution) - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) - index_uod = tuple([percentiles.index(k) for k in path]) + def gridCount(self, grid, resolution, index, interval): + #print(point_to_interval) + for k in index.inside(interval[0],interval[1]): + #print(k) + grid[k] += 1 + return grid + + def gridCountPoint(self, grid, resolution, index, point): + k = index.find_ge(point) + # print(k) + grid[k] += 1 + return grid + - index_fs = tuple([ FuzzySet.getMaxMembershipFuzzySetIndex(k, self.sets) for k in path]) - - forecast = self.forecast(path)[0] - - simulation_uod[index_uod] = forecast - - simulation_fs[index_fs] = forecast - - return [simulation_fs, simulation_uod ] diff --git a/pwfts.py b/pwfts.py index f88a4de..f88e997 100644 --- a/pwfts.py +++ b/pwfts.py @@ -193,6 +193,18 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = sum(np.array([pi * s.lower for s in flrg.LHS])) return ret + def buildTreeWithoutOrder(self, node, lags, level): + + if level not in lags: + return + + for s in lags[level]: + node.appendChild(tree.FLRGTreeNode(s)) + + for child in node.getChildren(): + self.buildTreeWithoutOrder(child, lags, level + 1) + + def forecast(self, data, **kwargs): ndata = np.array(self.doTransformations(data)) @@ -440,34 +452,6 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): return ret - def getGridClean(self, resolution): - grid = {} - - if len(self.transformations) == 0: - _min = self.sets[0].lower - _max = self.sets[-1].upper - else: - _min = self.original_min - _max = self.original_max - - for sbin in np.arange(_min,_max, resolution): - grid[sbin] = 0 - - return grid - - def gridCount(self, grid, resolution, index, interval): - #print(interval) - for k in index.inside(interval[0],interval[1]): - #print(k) - grid[k] += 1 - return grid - - def gridCountPoint(self, grid, resolution, index, point): - k = index.find_ge(point) - # print(k) - grid[k] += 1 - return grid - def forecastAheadDistribution(self, data, steps, **kwargs): ret = [] diff --git a/tests/general.py b/tests/general.py index 645832c..5eaf577 100644 --- a/tests/general.py +++ b/tests/general.py @@ -48,6 +48,7 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000]) #bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";") #best = np.array(bestpd["Anomaly"]) +#del(bestpd) #print(lag) #print(a) @@ -61,26 +62,25 @@ from pyFTS.benchmarks import arima, quantreg, Measures #Util.cast_dataframe_to_synthetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11) #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) - +#""" tmp = arima.ARIMA("", alpha=0.25) #tmp.appendTransformation(diff) -tmp.train(nasdaq[:1600], None, order=(2,0,2)) -teste = tmp.forecastInterval(nasdaq[1600:1604]) +tmp.train(nasdaq[:1600], None, order=(1,0,1)) +teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=100) + + +#tmp = quantreg.QuantileRegression("", dist=True) +#tmp.appendTransformation(diff) +#tmp.train(nasdaq[:1600], None, order=1) +#teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50) -""" -tmp = quantreg.QuantileRegression("", alpha=0.25) -tmp.train(taiex[:1600], None, order=1) -teste = tmp.forecastInterval(taiex[1600:1605]) -""" print(nasdaq[1600:1605]) print(teste) -kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp) - -print(kk) - -#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) +#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp) +#print(kk) +#""" """ @@ -97,24 +97,25 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted partitions= np.arange(3,20,step=2), #transformation=diff, dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -""" + """ -bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # # +""" + +bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.WeightedFTS], # # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], - partitions= np.arange(10,200,step=10), #transformation=diff, - dump=True, save=True, file="experiments/taiex_interval_analytic.csv", + partitions= np.arange(10,200,step=10), + dump=True, save=True, file="experiments/best" + "_interval_analytic.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) - - -bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # # +bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8, #models=[yu.WeightedFTS], # # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitions= np.arange(3,20,step=2), transformation=diff, - dump=True, save=True, file="experiments/nasdaq_interval_analytic_diff.csv", + dump=True, save=True, file="experiments/best_interval_analytic_diff.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -""" +#""" """ from pyFTS.partitioners import Grid