diff --git a/benchmarks/quantreg.py b/benchmarks/quantreg.py index 1cfe6ef..a79b53b 100644 --- a/benchmarks/quantreg.py +++ b/benchmarks/quantreg.py @@ -19,7 +19,6 @@ class QuantileRegression(fts.FTS): self.has_point_forecasting = True self.has_interval_forecasting = True self.has_probability_forecasting = True - self.has_probability_forecasting = True self.benchmark_only = True self.minOrder = 1 self.alpha = kwargs.get("alpha", 0.05) diff --git a/ensemble.py b/ensemble.py index 0b8b821..670ee89 100644 --- a/ensemble.py +++ b/ensemble.py @@ -9,11 +9,20 @@ from pyFTS.common import FLR, FuzzySet, SortedCollection from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu from pyFTS.benchmarks import arima, quantreg from pyFTS.common import Transformations +import scipy.stats as st +from pyFTS import tree + + +def sampler(data, quantiles): + ret = [] + for qt in quantiles: + ret.append(np.nanpercentile(data, q=qt * 100)) + return ret class EnsembleFTS(fts.FTS): def __init__(self, name, **kwargs): - super(EnsembleFTS, self).__init__("Ensemble FTS") + super(EnsembleFTS, self).__init__(1, "Ensemble FTS") self.shortname = "Ensemble FTS " + name self.name = "Ensemble FTS" self.flrgs = {} @@ -23,51 +32,54 @@ class EnsembleFTS(fts.FTS): self.is_high_order = True self.models = [] self.parameters = [] + self.alpha = kwargs.get("alpha", 0.05) + self.max_order = 1 - def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None): - - methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, - ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS, - hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression] - - transformations = [None, Transformations.Differential(1)] - - for count, method in enumerate(methods, start=0): - mfts = method("") - if mfts.benchmark_only: - if transformation is not None: - mfts.appendTransformation(transformation) - mfts.train(data,None, order=1, parameters=None) - self.models.append(mfts) - else: - for partition in partitions: - for partitioner in partitioners: - data_train_fs = partitioner(data, partition, transformation=transformation) - mfts = method("") - - mfts.partitioner = data_train_fs - if not mfts.is_high_order: - - if transformation is not None: - mfts.appendTransformation(transformation) - - mfts.train(data, data_train_fs.sets) - self.models.append(mfts) - else: - for order in np.arange(1, max_order + 1): - if order >= mfts.min_order: - mfts = method("") - mfts.partitioner = data_train_fs - - if transformation is not None: - mfts.appendTransformation(transformation) - - mfts.train(data, data_train_fs.sets, order=order) - self.models.append(mfts) + def appendModel(self, model): + self.models.append(model) + if model.order > self.max_order: + self.max_order = model.order def train(self, data, sets, order=1,parameters=None): - #if self.models is None: - pass + self.original_max = max(data) + self.original_min = min(data) + + def get_models_forecasts(self,data): + tmp = [] + for model in self.models: + sample = data[-model.order:] + forecast = model.forecast(sample) + if isinstance(forecast, (list,np.ndarray)): + forecast = int(forecast[-1]) + tmp.append(forecast) + return tmp + + def get_point(self,method, forecasts, **kwargs): + if method == 'mean': + ret = np.nanmean(forecasts) + elif method == 'median': + ret = np.nanpercentile(forecasts, 50) + elif method == 'quantile': + alpha = kwargs.get("alpha",0.05) + ret = np.percentile(forecasts, alpha*100) + + return ret + + def get_interval(self, method, forecasts): + ret = [] + if method == 'extremum': + ret.append([min(forecasts), max(forecasts)]) + elif method == 'quantile': + qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100) + qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100) + ret.append([qt_lo, qt_up]) + elif method == 'normal': + mu = np.nanmean(forecasts) + sigma = np.sqrt(np.nanvar(forecasts)) + ret.append(mu + st.norm.ppf(self.alpha) * sigma) + ret.append(mu + st.norm.ppf(1 - self.alpha) * sigma) + + return ret def forecast(self, data, **kwargs): @@ -76,83 +88,106 @@ class EnsembleFTS(fts.FTS): ndata = np.array(self.doTransformations(data)) l = len(ndata) - ret = [] - for k in np.arange(0, l+1): - tmp = [] - for model in self.models: - if k >= model.minOrder - 1: - sample = ndata[k - model.order : k] - tmp.append( model.forecast(sample) ) - if method == 'mean': - ret.append( np.nanmean(tmp)) - elif method == 'median': - ret.append(np.percentile(tmp,50)) + for k in np.arange(self.max_order, l+1): + sample = ndata[k - self.max_order : k ] + tmp = self.get_models_forecasts(sample) + point = self.get_point(method, tmp) + ret.append(point) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) return ret + def forecastInterval(self, data, **kwargs): method = kwargs.get('method', 'extremum') + if 'alpha' in kwargs: + self.alpha = kwargs.get('alpha',0.05) + ndata = np.array(self.doTransformations(data)) l = len(ndata) ret = [] - for k in np.arange(0, l): - tmp = [] - for model in self.models: - if k >= model.minOrder - 1: - sample = ndata[k - model.order : k] - tmp.append( model.forecast(sample) ) - if method == 'extremum': - ret.append( [ min(tmp), max(tmp) ] ) - elif method == 'quantile': - q = kwargs.get('q', [.05, .95]) - ret.append(np.percentile(tmp,q=q*100)) + for k in np.arange(self.max_order, l+1): + sample = ndata[k - self.max_order : k ] + tmp = self.get_models_forecasts(sample) + interval = self.get_interval(method, tmp) + ret.append(interval) return ret - def forecastAhead(self, data, steps, **kwargs): - pass - def forecastAheadInterval(self, data, steps, **kwargs): - pass + method = kwargs.get('method', 'extremum') - def getGridClean(self, resolution): - grid = {} + ret = [] - if len(self.transformations) == 0: - _min = self.sets[0].lower - _max = self.sets[-1].upper - else: - _min = self.original_min - _max = self.original_max + samples = [[k,k] for k in data[-self.max_order:]] - for sbin in np.arange(_min,_max, resolution): - grid[sbin] = 0 + for k in np.arange(self.max_order, steps): + forecasts = [] + sample = samples[k - self.max_order : k] + lo_sample = [i[0] for i in sample] + up_sample = [i[1] for i in sample] + forecasts.extend(self.get_models_forecasts(lo_sample) ) + forecasts.extend(self.get_models_forecasts(up_sample)) + interval = self.get_interval(method, forecasts) - return grid + if len(interval) == 1: + interval = interval[0] - def gridCount(self, grid, resolution, index, interval): - #print(point_to_interval) - for k in index.inside(interval[0],interval[1]): - #print(k) - grid[k] += 1 - return grid + ret.append(interval) + samples.append(interval) - def gridCountPoint(self, grid, resolution, index, point): - k = index.find_ge(point) - # print(k) - grid[k] += 1 - return grid + return ret + + def empty_grid(self, resolution): + return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) def forecastAheadDistribution(self, data, steps, **kwargs): - pass + method = kwargs.get('method', 'extremum') + + percentile_size = (self.original_max - self.original_min) / 100 + + resolution = kwargs.get('resolution', percentile_size) + + grid = self.empty_grid(resolution) + + index = SortedCollection.SortedCollection(iterable=grid.keys()) + + ret = [] + + samples = [[k] for k in data[-self.max_order:]] + + for k in np.arange(self.max_order, steps + self.max_order): + forecasts = [] + lags = {} + for i in np.arange(0, self.max_order): lags[i] = samples[k - self.max_order + i] + + # Build the tree with all possible paths + + root = tree.FLRGTreeNode(None) + + tree.buildTreeWithoutOrder(root, lags, 0) + + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) + + forecasts.extend(self.get_models_forecasts(path)) + + samples.append(sampler(forecasts, [0.05, 0.25, 0.5, 0.75, 0.95 ])) + + grid = self.gridCountPoint(grid, resolution, index, forecasts) + + tmp = np.array([grid[i] for i in sorted(grid)]) + + ret.append(tmp / sum(tmp)) + + return ret diff --git a/fts.py b/fts.py index 0a79d81..9108afa 100644 --- a/fts.py +++ b/fts.py @@ -210,14 +210,16 @@ class FTS(object): def gridCount(self, grid, resolution, index, interval): #print(point_to_interval) for k in index.inside(interval[0],interval[1]): - #print(k) grid[k] += 1 return grid def gridCountPoint(self, grid, resolution, index, point): - k = index.find_ge(point) - # print(k) - grid[k] += 1 + if not isinstance(point, (list, np.ndarray)): + point = [point] + + for p in point: + k = index.find_ge(p) + grid[k] += 1 return grid @@ -225,4 +227,3 @@ class FTS(object): - diff --git a/tests/ensemble.py b/tests/ensemble.py new file mode 100644 index 0000000..c6e6294 --- /dev/null +++ b/tests/ensemble.py @@ -0,0 +1,112 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import os +import numpy as np +import pandas as pd +import matplotlib as plt +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D + +import pandas as pd +from pyFTS.partitioners import Grid, Entropy, FCM, Huarng +from pyFTS.common import FLR,FuzzySet,Membership,Transformations +from pyFTS import fts,hofts,ifts,pwfts,tree, chen, ensemble, song, yu, cheng, ismailefendi, sadaei, hwang +from pyFTS.benchmarks import naive, arima +from numpy import random +from pyFTS.benchmarks import benchmarks as bchmk +from pyFTS.benchmarks import arima, quantreg, Measures + +os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") + +diff = Transformations.Differential(1) + +passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",") +passengers = np.array(passengers["Passengers"]) + + +e = ensemble.EnsembleFTS("") + +fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, + ismailefendi.ImprovedWeightedFTS] + +ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS] + +fs = Grid.GridPartitioner(passengers, 10, transformation=diff) + +for method in fo_methods: + model = method("") + model.appendTransformation(diff) + model.train(passengers, fs.sets) + e.appendModel(model) + + +for method in ho_methods: + for order in [1,2,3]: + model = method("") + model.appendTransformation(diff) + model.train(passengers, fs.sets, order=order) + e.appendModel(model) + + +arima100 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima100.train(passengers, None, order=(1,0,0)) + +arima101 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima101.train(passengers, None, order=(1,0,1)) + +arima200 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima200.train(passengers, None, order=(2,0,0)) + +arima201 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima201.train(passengers, None, order=(2,0,1)) + + +e.appendModel(arima100) +e.appendModel(arima101) +e.appendModel(arima200) +e.appendModel(arima201) + +e.train(passengers, None) + +""" +_mean = e.forecast(passengers, method="mean") +print(_mean) + +_median = e.forecast(passengers, method="median") +print(_median) +""" +""" +_extremum = e.forecastInterval(passengers, method="extremum") +print(_extremum) + +_quantile = e.forecastInterval(passengers, method="quantile", alpha=0.25) +print(_quantile) + + +_normal = e.forecastInterval(passengers, method="normal", alpha=0.25) +print(_normal) +""" + +""" +_extremum = e.forecastAheadInterval(passengers, 10, method="extremum") +print(_extremum) + +_quantile = e.forecastAheadInterval(passengers, 10, method="quantile", alpha=0.25) +print(_quantile) + + +_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25) +print(_normal) +""" + +dist = e.forecastAheadDistribution(passengers, 20) + +print(dist) + + + diff --git a/tests/general.py b/tests/general.py index ee3a164..70af200 100644 --- a/tests/general.py +++ b/tests/general.py @@ -69,10 +69,21 @@ from pyFTS.benchmarks import arima, quantreg, Measures #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #""" -tmp = arima.ARIMA("", alpha=0.25) +arima100 = arima.ARIMA("", alpha=0.25) #tmp.appendTransformation(diff) -tmp.train(enrollments, None, order=(1,0,0)) -teste = tmp.forecastInterval(enrollments) +arima100.train(passengers, None, order=(1,0,0)) + +arima101 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima101.train(passengers, None, order=(1,0,1)) + +arima200 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima200.train(passengers, None, order=(2,0,0)) + +arima201 = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +arima201.train(passengers, None, order=(2,0,1)) #tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True) diff --git a/tree.py b/tree.py index 210b2f9..9a15e66 100644 --- a/tree.py +++ b/tree.py @@ -51,3 +51,15 @@ def flat(dados): yield k else: yield inst + + +def buildTreeWithoutOrder(node, lags, level): + + if level not in lags: + return + + for s in lags[level]: + node.appendChild(FLRGTreeNode(s)) + + for child in node.getChildren(): + buildTreeWithoutOrder(child, lags, level + 1) \ No newline at end of file