From 4552bb6bab9fc989aeb5ee6d5d48daad5a29cb05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 26 Nov 2019 23:54:49 -0300 Subject: [PATCH] High Order NSFTS refactoring --- pyFTS/models/nonstationary/common.py | 1 - pyFTS/models/nonstationary/honsfts.py | 294 ++++++++++----------- pyFTS/models/nonstationary/partitioners.py | 1 + pyFTS/tests/nonstationary.py | 153 +---------- 4 files changed, 160 insertions(+), 289 deletions(-) diff --git a/pyFTS/models/nonstationary/common.py b/pyFTS/models/nonstationary/common.py index 408a6a4..b824227 100644 --- a/pyFTS/models/nonstationary/common.py +++ b/pyFTS/models/nonstationary/common.py @@ -55,7 +55,6 @@ class FuzzySet(FS.FuzzySet): inc = t else: l = len(self.location) - inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)]) if self.mf == Membership.gaussmf: diff --git a/pyFTS/models/nonstationary/honsfts.py b/pyFTS/models/nonstationary/honsfts.py index 8a9148a..d740295 100644 --- a/pyFTS/models/nonstationary/honsfts.py +++ b/pyFTS/models/nonstationary/honsfts.py @@ -1,7 +1,8 @@ import numpy as np -from pyFTS.common import FuzzySet, FLR, fts, tree +from pyFTS.common import FuzzySet, FLR, fts from pyFTS.models import hofts -from pyFTS.models.nonstationary import common, flrg +from pyFTS.models.nonstationary import common, flrg, nsfts +from itertools import product class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG): @@ -11,67 +12,121 @@ class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG): self.LHS = [] self.RHS = {} + self.count = 0.0 + self.strlhs = "" + self.w = None - def append_rhs(self, c, **kwargs): - if c.name not in self.RHS: - self.RHS[c.name] = c + def append_rhs(self, fset, **kwargs): + count = kwargs.get('count',1.0) + if fset not in self.RHS: + self.RHS[fset] = count + else: + self.RHS[fset] += count + self.count += count def append_lhs(self, c): self.LHS.append(c) + def weights(self): + if self.w is None: + self.w = np.array([self.RHS[c] / self.count for c in self.RHS.keys()]) + return self.w + + def get_midpoint(self, sets, perturb): + mp = np.array([sets[c].get_midpoint(perturb) for c in self.RHS.keys()]) + midpoint = mp.dot(self.weights()) + return midpoint + + def get_lower(self, sets, perturb): + lw = np.array([sets[s].get_lower(perturb) for s in self.RHS.keys()]) + lower = lw.dot(self.weights()) + return lower + + def get_upper(self, sets, perturb): + up = np.array([sets[s].get_upper(perturb) for s in self.RHS.keys()]) + upper = up.dot(self.weights()) + return upper + def __str__(self): - tmp = "" - for c in sorted(self.RHS): - if len(tmp) > 0: - tmp = tmp + "," - tmp = tmp + c - return self.get_key() + " -> " + tmp + _str = "" + for k in self.RHS.keys(): + _str += ", " if len(_str) > 0 else "" + _str += k + " (" + str(round(self.RHS[k] / self.count, 3)) + ")" + + return self.get_key() + " -> " + _str + + def __len__(self): + return len(self.RHS) -class HighOrderNonStationaryFTS(hofts.HighOrderFTS): +class HighOrderNonStationaryFTS(nsfts.NonStationaryFTS): """NonStationaryFTS Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(HighOrderNonStationaryFTS, self).__init__("HONSFTS " + name, **kwargs) + def __init__(self, **kwargs): + super(HighOrderNonStationaryFTS, self).__init__(**kwargs) self.name = "High Order Non Stationary FTS" + self.shortname = "HONSFTS" self.detail = "" self.flrgs = {} + self.is_high_order = True + self.order = kwargs.get("order",2) + self.configure_lags(**kwargs) + + def configure_lags(self, **kwargs): + if "order" in kwargs: + self.order = kwargs.get("order", self.min_order) + + if "lags" in kwargs: + self.lags = kwargs.get("lags", None) + + if self.lags is not None: + self.max_lag = max(self.lags) + else: + self.max_lag = self.order + self.lags = np.arange(1, self.order + 1) + + def train(self, data, **kwargs): + + self.generate_flrg(data) + + if self.method == 'conditional': + self.forecasts = self.forecast(data, no_update=True) + self.residuals = np.array(data[self.order:]) - np.array(self.forecasts) + + self.variance_residual = np.var(self.residuals) # np.max(self.residuals + self.mean_residual = np.mean(self.residuals) + + self.residuals = self.residuals[-self.memory_window:].tolist() + self.forecasts = self.forecasts[-self.memory_window:] + self.inputs = np.array(data[-self.memory_window:]).tolist() def generate_flrg(self, data, **kwargs): l = len(data) - window_size = kwargs.get("window_size", 1) for k in np.arange(self.order, l): if self.dump: print("FLR: " + str(k)) sample = data[k - self.order: k] - disp = common.window_index(k, window_size) - - rhs = [self.sets[key] for key in self.partitioner.ordered_sets - if self.sets[key].membership(data[k], disp) > 0.0] + rhs = [key for key in self.partitioner.ordered_sets + if self.partitioner.sets[key].membership(data[k], [0,1]) > 0.0] if len(rhs) == 0: - rhs = [common.check_bounds(data[k], self.partitioner, disp)] + rhs = [common.check_bounds(data[k], self.partitioner, [0,1]).name] - lags = {} + lags = [] for o in np.arange(0, self.order): - tdisp = common.window_index(k - (self.order - o), window_size) - lhs = [self.sets[key] for key in self.partitioner.ordered_sets - if self.sets[key].membership(sample[o], tdisp) > 0.0] + tdisp = [0,1] + lhs = [key for key in self.partitioner.ordered_sets + if self.partitioner.sets[key].membership(sample[o], tdisp) > 0.0] if len(lhs) == 0: - lhs = [common.check_bounds(sample[o], self.partitioner, tdisp)] + lhs = [common.check_bounds(sample[o], self.partitioner, tdisp).name] - lags[o] = lhs - - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) + lags.append(lhs) # Trace the possible paths - for p in root.paths(): + for path in product(*lags): flrg = HighOrderNonStationaryFLRG(self.order) - path = list(reversed(list(filter(None.__ne__, p)))) for c, e in enumerate(path, start=0): flrg.append_lhs(e) @@ -82,67 +137,45 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS): for st in rhs: self.flrgs[flrg.get_key()].append_rhs(st) - # flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1)) - - def train(self, data, **kwargs): - - if kwargs.get('order', None) is not None: - self.order = kwargs.get('order', 1) - - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - - window_size = kwargs.get('parameters', 1) - self.generate_flrg(data, window_size=window_size) - - def _affected_flrgs(self, sample, k, time_displacement, window_size): - # print("input: " + str(ndata[k])) + def _affected_flrgs(self, sample, perturb): affected_flrgs = [] affected_flrgs_memberships = [] - lags = {} + lags = [] for ct, dat in enumerate(sample): - tdisp = common.window_index((k + time_displacement) - (self.order - ct), window_size) + affected_sets = [[ct, self.partitioner.sets[key].membership(dat, perturb[ct])] + for ct, key in enumerate(self.partitioner.ordered_sets) + if self.partitioner.sets[key].membership(dat, perturb[ct]) > 0.0] - sel = [ct for ct, key in enumerate(self.partitioner.ordered_sets) - if self.sets[key].membership(dat, tdisp) > 0.0] + if len(affected_sets) == 0: - if len(sel) == 0: - sel.append(common.check_bounds_index(dat, self.partitioner, tdisp)) + if dat < self.partitioner.lower_set().get_lower(perturb[0]): + affected_sets.append([0, 1]) + elif dat > self.partitioner.upper_set().get_upper(perturb[-1]): + affected_sets.append([self.partitioner.partitions - 1, 1]) - lags[ct] = sel + lags.append([a[0] for a in affected_sets]) # Build the tree with all possible paths - root = tree.FLRGTreeNode(None) + # Trace the possible paths + for path in product(*lags): - tree.build_tree_without_order(root, lags, 0) - - # Trace the possible paths and build the PFLRG's - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) flrg = HighOrderNonStationaryFLRG(self.order) for kk in path: - flrg.append_lhs(self.sets[self.partitioner.ordered_sets[kk]]) + flrg.append_lhs(self.partitioner.ordered_sets[kk]) affected_flrgs.append(flrg) - # affected_flrgs_memberships.append_rhs(flrg.get_membership(sample, disp)) - - # print(flrg.get_key()) - - # the FLRG is here because of the bounds verification mv = [] for ct, dat in enumerate(sample): - td = common.window_index((k + time_displacement) - (self.order - ct), window_size) - tmp = flrg.LHS[ct].membership(dat, td) - + fset = self.partitioner.sets[flrg.LHS[ct]] + ix = self.partitioner.ordered_sets.index(flrg.LHS[ct]) + tmp = fset.membership(dat, perturb[ix]) mv.append(tmp) - # print(mv) affected_flrgs_memberships.append(np.prod(mv)) @@ -150,100 +183,67 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS): def forecast(self, ndata, **kwargs): - time_displacement = kwargs.get("time_displacement",0) + explain = kwargs.get('explain', False) - window_size = kwargs.get("window_size", 1) - - l = len(ndata) - - ret = [] - - for k in np.arange(self.order, l+1): - - sample = ndata[k - self.order: k] - - affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k, - time_displacement, window_size) - - #print([str(k) for k in affected_flrgs]) - #print(affected_flrgs_memberships) - - tmp = [] - tdisp = common.window_index(k + time_displacement, window_size) - if len(affected_flrgs) == 0: - tmp.append(common.check_bounds(sample[-1], self.sets, tdisp)) - elif len(affected_flrgs) == 1: - flrg = affected_flrgs[0] - if flrg.get_key() in self.flrgs: - tmp.append(self.flrgs[flrg.get_key()].get_midpoint(tdisp)) - else: - tmp.append(flrg.LHS[-1].get_midpoint(tdisp)) - else: - for ct, aset in enumerate(affected_flrgs): - if aset.get_key() in self.flrgs: - tmp.append(self.flrgs[aset.get_key()].get_midpoint(tdisp) * - affected_flrgs_memberships[ct]) - else: - tmp.append(aset.LHS[-1].get_midpoint(tdisp)* - affected_flrgs_memberships[ct]) - pto = sum(tmp) - - #print(pto) - - ret.append(pto) - - return ret - - def forecast_interval(self, ndata, **kwargs): + fuzzyfied = kwargs.get('fuzzyfied', False) time_displacement = kwargs.get("time_displacement", 0) window_size = kwargs.get("window_size", 1) - l = len(ndata) + no_update = kwargs.get("no_update", False) + + mode = kwargs.get('mode', 'mean') ret = [] - for k in np.arange(self.order, l + 1): + l = len(ndata) if not explain else self.max_lag + 1 - sample = ndata[k - self.order: k] + if l < self.max_lag: + return ndata + elif l == self.max_lag: + l += 1 - affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k, - time_displacement, window_size) + for k in np.arange(self.max_lag, l): - # print([str(k) for k in affected_flrgs]) - # print(affected_flrgs_memberships) + sample = ndata[k - self.max_lag: k] - upper = [] - lower = [] - - tdisp = common.window_index(k + time_displacement, window_size) - if len(affected_flrgs) == 0: - aset = common.check_bounds(sample[-1], self.sets, tdisp) - lower.append(aset.get_lower(tdisp)) - upper.append(aset.get_upper(tdisp)) - elif len(affected_flrgs) == 1: - _flrg = affected_flrgs[0] - if _flrg.get_key() in self.flrgs: - lower.append(self.flrgs[_flrg.get_key()].get_lower(tdisp)) - upper.append(self.flrgs[_flrg.get_key()].get_upper(tdisp)) + if self.method == 'unconditional': + perturb = common.window_index(k + time_displacement, window_size) + elif self.method == 'conditional': + if no_update: + perturb = [[0, 1] for k in np.arange(self.partitioner.partitions)] else: - lower.append(_flrg.LHS[-1].get_lower(tdisp)) - upper.append(_flrg.LHS[-1].get_upper(tdisp)) + perturb = self.conditional_perturbation_factors(sample[0]) + + + affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, perturb) + + tmp = [] + + perturb2 = perturb[0] + if len(affected_flrgs) == 0: + tmp.append(common.check_bounds(sample[-1], self.partitioner.sets, perturb2)) + elif len(affected_flrgs) == 1: + flrg = affected_flrgs[0] + if flrg.get_key() in self.flrgs: + tmp.append(self.flrgs[flrg.get_key()].get_midpoint(self.partitioner.sets, perturb2)) + else: + fset = self.partitioner.sets[flrg.LHS[-1]] + ix = self.partitioner.ordered_sets.index(flrg.LHS[-1]) + tmp.append(fset.get_midpoint(perturb[ix])) else: for ct, aset in enumerate(affected_flrgs): if aset.get_key() in self.flrgs: - lower.append(self.flrgs[aset.get_key()].get_lower(tdisp) * - affected_flrgs_memberships[ct]) - upper.append(self.flrgs[aset.get_key()].get_upper(tdisp) * - affected_flrgs_memberships[ct]) + + tmp.append(self.flrgs[aset.get_key()].get_midpoint(self.partitioner.sets, perturb2) * + affected_flrgs_memberships[ct]) else: - lower.append(aset.LHS[-1].get_lower(tdisp) * - affected_flrgs_memberships[ct]) - upper.append(aset.LHS[-1].get_upper(tdisp) * - affected_flrgs_memberships[ct]) - - ret.append([sum(lower), sum(upper)]) + fset = self.partitioner.sets[aset.LHS[-1]] + ix = self.partitioner.ordered_sets.index(aset.LHS[-1]) + tmp.append(fset.get_midpoint(perturb[ix])*affected_flrgs_memberships[ct]) + pto = sum(tmp) + ret.append(pto) return ret diff --git a/pyFTS/models/nonstationary/partitioners.py b/pyFTS/models/nonstationary/partitioners.py index 236b935..14dcd46 100644 --- a/pyFTS/models/nonstationary/partitioners.py +++ b/pyFTS/models/nonstationary/partitioners.py @@ -120,6 +120,7 @@ class SimpleNonStationaryPartitioner(partitioner.Partitioner): func=part.membership_function, names=part.setnames, prefix=part.prefix, transformation=part.transformation, indexer=part.indexer)#, preprocess=False) + self.partitions = part.partitions for key in part.sets.keys(): set = part.sets[key] diff --git a/pyFTS/tests/nonstationary.py b/pyFTS/tests/nonstationary.py index ea48c37..2974cf5 100644 --- a/pyFTS/tests/nonstationary.py +++ b/pyFTS/tests/nonstationary.py @@ -12,155 +12,26 @@ from pyFTS.data import TAIEX, NASDAQ, SP500, artificial, mackey_glass #mackey_glass.get_data() -datasets = { - "TAIEX": TAIEX.get_data()[:4000], - "SP500": SP500.get_data()[10000:14000], - "NASDAQ": NASDAQ.get_data()[:4000], - # Incremental Mean and Incremental Variance - "IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40), - # Incremental Mean and Incremental Variance, lower bound equals to 0 - "IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40), - # Constant Mean and Incremental Variance - "CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40), - # Incremental Mean and Constant Variance - "IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40) -} +dataset = TAIEX.get_data() -train_split = 2000 +train_split = 1000 test_length = 200 from pyFTS.common import Transformations -tdiff = Transformations.Differential(1) - -boxcox = Transformations.BoxCox(0) - -transformations = { - 'None': None, - 'Differential(1)': tdiff, - 'BoxCox(0)': boxcox -} from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.models import chen, hofts, pwfts, hwang -partitions = {'CMIV': {'BoxCox(0)': 36, 'Differential(1)': 11, 'None': 8}, - 'IMCV': {'BoxCox(0)': 36, 'Differential(1)': 20, 'None': 16}, - 'IMIV': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 6}, - 'IMIV0': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 3}, - 'NASDAQ': {'BoxCox(0)': 39, 'Differential(1)': 13, 'None': 36}, - 'SP500': {'BoxCox(0)': 33, 'Differential(1)': 7, 'None': 33}, - 'TAIEX': {'BoxCox(0)': 39, 'Differential(1)': 31, 'None': 33}} +train = dataset[:1000] +test = dataset[1000:] -from pyFTS.models.nonstationary import partitioners as nspart, cvfts, util as nsUtil -''' -#fs = nspart.simplenonstationary_gridpartitioner_builder(data=datasets['SP500'][:300], -# npart=partitions['SP500']['None'], -# transformation=None) -fs = Grid.GridPartitioner(data=datasets['SP500'][:300], - npart=15, - transformation=None) -fig, axes = plt.subplots(nrows=1, ncols=1, figsize=[15, 5]) - -fs.plot(axes) - -from pyFTS.common import Util - -Util.show_and_save_image(fig, "fig2.png", True) - -#nsUtil.plot_sets(fs) - - -''' -def model_details(ds, tf, train_split, test_split): - data = datasets[ds] - train = data[:train_split] - test = data[train_split:test_split] - transformation = transformations[tf] - fs = nspart.simplenonstationary_gridpartitioner_builder(data=train, npart=15, #partitions[ds][tf], - transformation=transformation) - model = nsfts.NonStationaryFTS(partitioner=fs) - model.fit(train) - print(model) - forecasts = model.predict(test) - residuals = np.array(test[1:]) - np.array(forecasts[:-1]) - - fig, axes = plt.subplots(nrows=3, ncols=1, figsize=[15, 10]) - - axes[0].plot(test[1:], label="Original") - axes[0].plot(forecasts[:-1], label="Forecasts") - axes[0].set_ylabel("Univ. of Discourse") - - #axes[1].set_title("Residuals") - axes[1].plot(residuals) - axes[1].set_ylabel("Error") - handles0, labels0 = axes[0].get_legend_handles_labels() - lgd = axes[0].legend(handles0, labels0, loc=2) - - nsUtil.plot_sets_conditional(model, test, step=10, size=[10, 7], - save=True,file="fig.png", axes=axes[2], fig=fig) - -model_details('SP500','None',200,400) -#''' -print("ts") -''' -tag = 'benchmarks' - - -for ds in datasets.keys(): - dataset = datasets[ds] - - for tf in transformations.keys(): - transformation = transformations[tf] - - partitioning = partitions[ds][tf] - - bchmk.sliding_window_benchmarks(dataset, 3000, train=0.1, inc=0.1, - #methods=[ - # hwang.HighOrderFTS, - # hofts.HighOrderFTS, - # pwfts.ProbabilisticWeightedFTS], - #orders = [3], - benchmark_models=False, - transformations=[transformation], - partitions=[partitioning], - progress=False, type='point', - file="nsfts_benchmarks.db", dataset=ds, tag=tag) - -train_split = 200 -test_split = 2000 -for ds in datasets.keys(): - dataset = datasets[ds] - - print(ds) - - for tf in ['None']: #transformations.keys(): - transformation = transformations[tf] - train = dataset[:train_split] - test = dataset[train_split:test_split] - - fs = nspart.simplenonstationary_gridpartitioner_builder(data=train, - npart=partitions[ds][tf], - transformation=transformation) - print(fs) - #cvfts1 = cvfts.ConditionalVarianceFTS(partitioner=fs) - model = nsfts.NonStationaryFTS(partitioner=fs) - model.fit(train) - print(model) - - forecasts = model.predict(test) - - #print(forecasts) - - partitioning = partitions[ds][tf] - - bchmk.sliding_window_benchmarks(dataset, 2000, train=0.2, inc=0.2, - benchmark_models=False, - methods=[cvfts.ConditionalVarianceFTS], - partitioners_methods=[nspart.simplenonstationary_gridpartitioner_builder], - transformations=[transformation], - partitions=[partitioning], - progress=False, type='point', - file="nsfts_benchmarks.db", dataset=ds, tag=tag) -''' +from pyFTS.models.nonstationary import partitioners as nspart, nsfts, honsfts +fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,npart=35,transformation=None) +print(fs) +model = honsfts.HighOrderNonStationaryFTS(partitioner=fs, order=2) +#model = nsfts.NonStationaryFTS(partitioner=fs) +model.fit(train) +forecasts = model.predict(test) +print(forecasts) \ No newline at end of file