From 07bfceae7898271fe23b7978a74cf27f8b484755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Mon, 2 Jul 2018 18:55:28 -0300 Subject: [PATCH] Introducing FTS.lags, the lag indexing for HOFTS, IFTS and PWFTS --- pyFTS/benchmarks/Measures.py | 36 +++++++++++----------- pyFTS/common/fts.py | 4 ++- pyFTS/models/hofts.py | 35 +++++++++++++++------ pyFTS/models/ifts.py | 4 +-- pyFTS/models/pwfts.py | 60 +++++++++++++++++++----------------- pyFTS/tests/pwfts.py | 43 +++++++++++++++++++++++--- 6 files changed, 118 insertions(+), 64 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 77e3de6..7cdd07a 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -64,7 +64,7 @@ def mape(targets, forecasts): targets = np.array(targets) if isinstance(forecasts, list): forecasts = np.array(forecasts) - return np.mean(np.abs(targets - forecasts) / targets) * 100 + return np.mean(np.abs((targets - forecasts) / targets)) * 100 def smape(targets, forecasts, type=2): @@ -334,9 +334,9 @@ def get_point_statistics(data, model, **kwargs): nforecasts = np.array(forecasts[:-1]) - ret.append(np.round(rmse(ndata[model.order:], nforecasts), 2)) - ret.append(np.round(mape(ndata[model.order:], nforecasts), 2)) - ret.append(np.round(UStatistic(ndata[model.order:], nforecasts), 2)) + ret.append(np.round(rmse(ndata[model.max_lag:], nforecasts), 2)) + ret.append(np.round(mape(ndata[model.max_lag:], nforecasts), 2)) + ret.append(np.round(UStatistic(ndata[model.max_lag:], nforecasts), 2)) else: steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1) nforecasts = [] @@ -345,7 +345,7 @@ def get_point_statistics(data, model, **kwargs): tmp = model.predict(sample, **kwargs) nforecasts.append(tmp[-1]) - start = model.order + steps_ahead -1 + start = model.max_lag + steps_ahead -1 ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) ret.append(np.round(mape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) @@ -373,12 +373,12 @@ def get_interval_statistics(data, model, **kwargs): ret.append(round(sharpness(forecasts), 2)) ret.append(round(resolution(forecasts), 2)) ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2)) - ret.append(round(winkler_mean(0.05, data[model.order:], forecasts[:-1]), 2)) - ret.append(round(winkler_mean(0.25, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.95, data[model.max_lag:], forecasts[:-1]), 2)) + ret.append(round(winkler_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2)) + ret.append(round(winkler_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2)) else: forecasts = [] for k in np.arange(model.order, len(data) - steps_ahead): @@ -386,10 +386,10 @@ def get_interval_statistics(data, model, **kwargs): tmp = model.predict(sample, **kwargs) forecasts.append(tmp[-1]) - start = model.order + steps_ahead -1 + start = model.max_lag + steps_ahead -1 ret.append(round(sharpness(forecasts), 2)) ret.append(round(resolution(forecasts), 2)) - ret.append(round(coverage(data[model.order:], forecasts), 2)) + ret.append(round(coverage(data[model.max_lag:], forecasts), 2)) ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2)) @@ -416,20 +416,20 @@ def get_distribution_statistics(data, model, **kwargs): _s1 = time.time() forecasts = model.predict(data, **kwargs) _e1 = time.time() - ret.append(round(crps(data[model.order:], forecasts[:-1]), 3)) + ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3)) ret.append(round(_e1 - _s1, 3)) - ret.append(round(brier_score(data[model.order:], forecasts[:-1]), 3)) + ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3)) else: skip = kwargs.get('steps_ahead_sampler', 1) forecasts = [] _s1 = time.time() - for k in np.arange(model.order, len(data) - steps_ahead, skip): - sample = data[k - model.order: k] + for k in np.arange(model.max_lag, len(data) - steps_ahead, skip): + sample = data[k - model.max_lag: k] tmp = model.predict(sample, **kwargs) forecasts.append(tmp[-1]) _e1 = time.time() - start = model.order + steps_ahead + start = model.max_lag + steps_ahead ret.append(round(crps(data[start:-1:skip], forecasts), 3)) ret.append(round(_e1 - _s1, 3)) ret.append(round(brier_score(data[start:-1:skip], forecasts), 3)) diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 4e41165..42d1a8a 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -22,6 +22,7 @@ class FTS(object): has_point_forecasting: Boolean, if the model support point forecasting, default: True has_interval_forecasting: Boolean, if the model support interval forecasting, default: False has_probability_forecasting: Boolean, if the model support probabilistic forecasting, default: False + max_lag: Integer, maximum lag index used by the model, default: 1 min_order: Integer, minimal order supported for the model, default: 1 name: Model name order: model order (number of past lags are used on forecasting) @@ -37,7 +38,7 @@ class FTS(object): self.sets = {} self.flrgs = {} - self.order = kwargs.get('order',"") + self.order = kwargs.get('order',1) self.shortname = kwargs.get('name',"") self.name = kwargs.get('name',"") self.detail = kwargs.get('name',"") @@ -61,6 +62,7 @@ class FTS(object): self.indexer = kwargs.get("indexer", None) self.uod_clip = kwargs.get("uod_clip", True) self.alpha_cut = kwargs.get("alpha_cut", 0.0) + self.max_lag = self.order def fuzzy(self, data): """ diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index f1d4474..ba0afcc 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -45,16 +45,32 @@ class HighOrderFTS(fts.FTS): self.detail = "Chen" self.is_high_order = True self.min_order = 2 + self.order= kwargs.get("order", 2) + self.lags = kwargs.get("lags", None) + self.configure_lags(**kwargs) + + def configure_lags(self, **kwargs): + if "order" in kwargs: + self.order = kwargs.get("order", 2) + + if "lags" in kwargs: + self.lags = kwargs.get("lags", None) + + if self.lags is not None: + self.max_lag = max(self.lags) + else: + self.max_lag = self.order + self.lags = np.arange(1, self.order+1) def generate_lhs_flrg(self, sample): lags = {} flrgs = [] - for o in np.arange(0, self.order): + for ct, o in enumerate(self.lags): lhs = [key for key in self.partitioner.ordered_sets - if self.sets[key].membership(sample[o]) > self.alpha_cut] - lags[o] = lhs + if self.sets[key].membership(sample[o-1]) > self.alpha_cut] + lags[ct] = lhs root = tree.FLRGTreeNode(None) @@ -74,10 +90,10 @@ class HighOrderFTS(fts.FTS): def generate_flrg(self, data): l = len(data) - for k in np.arange(self.order, l): + for k in np.arange(self.max_lag, l): if self.dump: print("FLR: " + str(k)) - sample = data[k - self.order: k] + sample = data[k - self.max_lag: k] rhs = [key for key in self.partitioner.ordered_sets if self.sets[key].membership(data[k]) > self.alpha_cut] @@ -91,9 +107,8 @@ class HighOrderFTS(fts.FTS): for st in rhs: self.flrgs[flrg.get_key()].append_rhs(st) - def train(self, data, **kwargs): - + self.configure_lags(**kwargs) self.generate_flrg(data) def forecast(self, ndata, **kwargs): @@ -102,11 +117,11 @@ class HighOrderFTS(fts.FTS): l = len(ndata) - if l <= self.order: + if l <= self.max_lag: return ndata - for k in np.arange(self.order, l+1): - flrgs = self.generate_lhs_flrg(ndata[k - self.order: k]) + for k in np.arange(self.max_lag, l+1): + flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k]) tmp = [] for flrg in flrgs: diff --git a/pyFTS/models/ifts.py b/pyFTS/models/ifts.py index 002ed3e..99b2c8b 100644 --- a/pyFTS/models/ifts.py +++ b/pyFTS/models/ifts.py @@ -62,9 +62,9 @@ class IntervalFTS(hofts.HighOrderFTS): if l <= self.order: return ndata - for k in np.arange(self.order, l+1): + for k in np.arange(self.max_lag, l+1): - sample = ndata[k - self.order: k] + sample = ndata[k - self.max_lag: k] flrgs = self.generate_lhs_flrg(sample) diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 5d9d7f7..9128608 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -21,7 +21,8 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): def get_membership(self, data, sets): if isinstance(data, (np.ndarray, list)): - return np.nanprod([sets[key].membership(data[count]) for count, key in enumerate(self.LHS)]) + return np.nanprod([sets[key].membership(data[count]) + for count, key in enumerate(self.LHS, start=0)]) else: return sets[self.LHS[0]].membership(data) @@ -107,9 +108,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.is_high_order = True self.min_order = 1 self.auto_update = kwargs.get('update',False) + self.configure_lags(**kwargs) def train(self, data, **kwargs): + self.configure_lags(**kwargs) parameters = kwargs.get('parameters','fuzzy') if parameters == 'monotonic': @@ -124,10 +127,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): flrgs = [] - for o in np.arange(0, self.order): + for ct, o in enumerate(self.lags): lhs = [key for key in self.partitioner.ordered_sets - if self.sets[key].membership(sample[o]) > self.alpha_cut] - lags[o] = lhs + if self.sets[key].membership(sample[o-1]) > self.alpha_cut] + lags[ct] = lhs root = tree.FLRGTreeNode(None) @@ -147,10 +150,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def generate_flrg(self, data): l = len(data) - for k in np.arange(self.order, l): + for k in np.arange(self.max_lag, l): if self.dump: print("FLR: " + str(k)) - sample = data[k - self.order: k] + sample = data[k - self.max_lag: k] flrgs = self.generate_lhs_flrg(sample) @@ -253,8 +256,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = [] - for k in np.arange(self.order - 1, l): - sample = data[k - (self.order - 1): k + 1] + for k in np.arange(self.max_lag - 1, l): + sample = data[k - (self.max_lag - 1): k + 1] if method == 'heuristic': ret.append(self.point_heuristic(sample, **kwargs)) @@ -300,9 +303,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = [] - for k in np.arange(self.order - 1, l): + for k in np.arange(self.max_lag - 1, l): - sample = ndata[k - (self.order - 1): k + 1] + sample = ndata[k - (self.max_lag - 1): k + 1] if method == 'heuristic': ret.append(self.interval_heuristic(sample)) @@ -358,8 +361,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = [] - for k in np.arange(self.order - 1, l): - sample = ndata[k - (self.order - 1): k + 1] + for k in np.arange(self.max_lag - 1, l): + sample = ndata[k - (self.max_lag - 1): k + 1] flrgs = self.generate_lhs_flrg(sample) @@ -398,19 +401,19 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): l = len(data) - start = kwargs.get('start', self.order) + start = kwargs.get('start', self.max_lag) - ret = data[start - self.order: start].tolist() + ret = data[start - self.max_lag: start].tolist() - for k in np.arange(self.order, steps+self.order): + for k in np.arange(self.max_lag, steps+self.max_lag): if self.__check_point_bounds(ret[-1]) : ret.append(ret[-1]) else: - mp = self.forecast(ret[k - self.order: k], **kwargs) + mp = self.forecast(ret[k - self.max_lag: k], **kwargs) ret.append(mp[0]) - return ret[self.order:] + return ret[self.max_lag:] def __check_interval_bounds(self, interval): if len(self.transformations) > 0: @@ -424,21 +427,21 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): l = len(data) - start = kwargs.get('start', self.order) + start = kwargs.get('start', self.max_lag) - sample = data[start - self.order: start] + sample = data[start - self.max_lag: start] ret = [[k, k] for k in sample] ret.append(self.forecast_interval(sample)[0]) - for k in np.arange(self.order+1, steps+self.order): + for k in np.arange(self.max_lag+1, steps+self.max_lag): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]): ret.append(ret[-1]) else: - lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs) - upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.order, k)], **kwargs) + lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.max_lag, k)], **kwargs) + upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.max_lag, k)], **kwargs) ret.append([np.min(lower), np.max(upper)]) @@ -459,9 +462,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): nbins = kwargs.get("num_bins", 100) _bins = np.linspace(uod[0], uod[1], nbins) - start = kwargs.get('start', self.order) + start = kwargs.get('start', self.max_lag) - sample = ndata[start - self.order: start] + sample = ndata[start - self.max_lag: start] for dat in sample: if 'type' in kwargs: @@ -474,14 +477,15 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret.append(dist) - for k in np.arange(self.order+1, steps+self.order+1): + for k in np.arange(self.max_lag+1, steps+self.max_lag+1): dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) lags = {} # Find all bins of past distributions with probability greater than zero - for ct, dd in enumerate(ret[k - self.order: k]): + for ct, d in enumerate(self.lags): + dd = ret[k - d] vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0] lags[ct] = sorted(vals) @@ -496,8 +500,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): # get the combined probabilities for this path - pk = np.prod([ret[k - self.order + o].density(path[o]) - for o in np.arange(0, self.order)]) + pk = np.prod([ret[k - self.max_lag + o].density(path[ct]) + for ct, o in enumerate(self.lags)]) d = self.forecast_distribution(path)[0] diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 697aa02..7665215 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -20,8 +20,45 @@ test = taiex[3000:3200] from pyFTS.common import Transformations tdiff = Transformations.Differential(1) -''' +from pyFTS.benchmarks import benchmarks as bchmk, Measures +from pyFTS.models import pwfts,hofts,ifts from pyFTS.partitioners import Grid, Util as pUtil + +fs = Grid.GridPartitioner(data=train, npart=30) #, transformation=tdiff) + +model1 = hofts.HighOrderFTS(partitioner=fs, lags=[1,2])#lags=[0,1]) +model1.shortname = "1" +model2 = pwfts.ProbabilisticWeightedFTS(partitioner=fs, lags=[1,2]) +#model2.append_transformation(tdiff) +model2.shortname = "2" +#model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)# lags=[1,2]) + +model1.fit(train) +model2.fit(train) + +#print(model1) + +#print(model2) + +for model in [model1, model2]: + #forecasts = model.predict(test) + print(model.shortname) + print(Measures.get_point_statistics(test, model)) + +#handles, labels = ax.get_legend_handles_labels() +#ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1)) + +#print(Measures.get_point_statistics(test,model)) + + +''' +bchmk.sliding_window_benchmarks(train,1000,0.8, + methods=[pwfts.ProbabilisticWeightedFTS], #,ifts.IntervalFTS], + orders=[1,2,3], + partitions=[10]) +''' +''' + from pyFTS.common import FLR,FuzzySet,Membership,SortedCollection taiex_fs1 = Grid.GridPartitioner(data=train, npart=30) taiex_fs2 = Grid.GridPartitioner(data=train, npart=10, transformation=tdiff) @@ -39,7 +76,3 @@ print(pfts1_taiex) ''' -model = Util.load_obj('pwfts') - -model.predict(test, type='distribution') -#''' \ No newline at end of file