High Order NSFTS refactoring

This commit is contained in:
Petrônio Cândido 2019-11-26 23:54:49 -03:00
parent bc6a632a6d
commit 4552bb6bab
4 changed files with 160 additions and 289 deletions

View File

@ -55,7 +55,6 @@ class FuzzySet(FS.FuzzySet):
inc = t inc = t
else: else:
l = len(self.location) l = len(self.location)
inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)]) inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)])
if self.mf == Membership.gaussmf: if self.mf == Membership.gaussmf:

View File

@ -1,7 +1,8 @@
import numpy as np import numpy as np
from pyFTS.common import FuzzySet, FLR, fts, tree from pyFTS.common import FuzzySet, FLR, fts
from pyFTS.models import hofts from pyFTS.models import hofts
from pyFTS.models.nonstationary import common, flrg from pyFTS.models.nonstationary import common, flrg, nsfts
from itertools import product
class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG): class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG):
@ -11,67 +12,121 @@ class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG):
self.LHS = [] self.LHS = []
self.RHS = {} self.RHS = {}
self.count = 0.0
self.strlhs = ""
self.w = None
def append_rhs(self, c, **kwargs): def append_rhs(self, fset, **kwargs):
if c.name not in self.RHS: count = kwargs.get('count',1.0)
self.RHS[c.name] = c if fset not in self.RHS:
self.RHS[fset] = count
else:
self.RHS[fset] += count
self.count += count
def append_lhs(self, c): def append_lhs(self, c):
self.LHS.append(c) self.LHS.append(c)
def weights(self):
if self.w is None:
self.w = np.array([self.RHS[c] / self.count for c in self.RHS.keys()])
return self.w
def get_midpoint(self, sets, perturb):
mp = np.array([sets[c].get_midpoint(perturb) for c in self.RHS.keys()])
midpoint = mp.dot(self.weights())
return midpoint
def get_lower(self, sets, perturb):
lw = np.array([sets[s].get_lower(perturb) for s in self.RHS.keys()])
lower = lw.dot(self.weights())
return lower
def get_upper(self, sets, perturb):
up = np.array([sets[s].get_upper(perturb) for s in self.RHS.keys()])
upper = up.dot(self.weights())
return upper
def __str__(self): def __str__(self):
tmp = "" _str = ""
for c in sorted(self.RHS): for k in self.RHS.keys():
if len(tmp) > 0: _str += ", " if len(_str) > 0 else ""
tmp = tmp + "," _str += k + " (" + str(round(self.RHS[k] / self.count, 3)) + ")"
tmp = tmp + c
return self.get_key() + " -> " + tmp return self.get_key() + " -> " + _str
def __len__(self):
return len(self.RHS)
class HighOrderNonStationaryFTS(hofts.HighOrderFTS): class HighOrderNonStationaryFTS(nsfts.NonStationaryFTS):
"""NonStationaryFTS Fuzzy Time Series""" """NonStationaryFTS Fuzzy Time Series"""
def __init__(self, name, **kwargs): def __init__(self, **kwargs):
super(HighOrderNonStationaryFTS, self).__init__("HONSFTS " + name, **kwargs) super(HighOrderNonStationaryFTS, self).__init__(**kwargs)
self.name = "High Order Non Stationary FTS" self.name = "High Order Non Stationary FTS"
self.shortname = "HONSFTS"
self.detail = "" self.detail = ""
self.flrgs = {} self.flrgs = {}
self.is_high_order = True
self.order = kwargs.get("order",2)
self.configure_lags(**kwargs)
def configure_lags(self, **kwargs):
if "order" in kwargs:
self.order = kwargs.get("order", self.min_order)
if "lags" in kwargs:
self.lags = kwargs.get("lags", None)
if self.lags is not None:
self.max_lag = max(self.lags)
else:
self.max_lag = self.order
self.lags = np.arange(1, self.order + 1)
def train(self, data, **kwargs):
self.generate_flrg(data)
if self.method == 'conditional':
self.forecasts = self.forecast(data, no_update=True)
self.residuals = np.array(data[self.order:]) - np.array(self.forecasts)
self.variance_residual = np.var(self.residuals) # np.max(self.residuals
self.mean_residual = np.mean(self.residuals)
self.residuals = self.residuals[-self.memory_window:].tolist()
self.forecasts = self.forecasts[-self.memory_window:]
self.inputs = np.array(data[-self.memory_window:]).tolist()
def generate_flrg(self, data, **kwargs): def generate_flrg(self, data, **kwargs):
l = len(data) l = len(data)
window_size = kwargs.get("window_size", 1)
for k in np.arange(self.order, l): for k in np.arange(self.order, l):
if self.dump: print("FLR: " + str(k)) if self.dump: print("FLR: " + str(k))
sample = data[k - self.order: k] sample = data[k - self.order: k]
disp = common.window_index(k, window_size) rhs = [key for key in self.partitioner.ordered_sets
if self.partitioner.sets[key].membership(data[k], [0,1]) > 0.0]
rhs = [self.sets[key] for key in self.partitioner.ordered_sets
if self.sets[key].membership(data[k], disp) > 0.0]
if len(rhs) == 0: if len(rhs) == 0:
rhs = [common.check_bounds(data[k], self.partitioner, disp)] rhs = [common.check_bounds(data[k], self.partitioner, [0,1]).name]
lags = {} lags = []
for o in np.arange(0, self.order): for o in np.arange(0, self.order):
tdisp = common.window_index(k - (self.order - o), window_size) tdisp = [0,1]
lhs = [self.sets[key] for key in self.partitioner.ordered_sets lhs = [key for key in self.partitioner.ordered_sets
if self.sets[key].membership(sample[o], tdisp) > 0.0] if self.partitioner.sets[key].membership(sample[o], tdisp) > 0.0]
if len(lhs) == 0: if len(lhs) == 0:
lhs = [common.check_bounds(sample[o], self.partitioner, tdisp)] lhs = [common.check_bounds(sample[o], self.partitioner, tdisp).name]
lags[o] = lhs lags.append(lhs)
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
# Trace the possible paths # Trace the possible paths
for p in root.paths(): for path in product(*lags):
flrg = HighOrderNonStationaryFLRG(self.order) flrg = HighOrderNonStationaryFLRG(self.order)
path = list(reversed(list(filter(None.__ne__, p))))
for c, e in enumerate(path, start=0): for c, e in enumerate(path, start=0):
flrg.append_lhs(e) flrg.append_lhs(e)
@ -82,67 +137,45 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
for st in rhs: for st in rhs:
self.flrgs[flrg.get_key()].append_rhs(st) self.flrgs[flrg.get_key()].append_rhs(st)
# flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1)) def _affected_flrgs(self, sample, perturb):
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
window_size = kwargs.get('parameters', 1)
self.generate_flrg(data, window_size=window_size)
def _affected_flrgs(self, sample, k, time_displacement, window_size):
# print("input: " + str(ndata[k]))
affected_flrgs = [] affected_flrgs = []
affected_flrgs_memberships = [] affected_flrgs_memberships = []
lags = {} lags = []
for ct, dat in enumerate(sample): for ct, dat in enumerate(sample):
tdisp = common.window_index((k + time_displacement) - (self.order - ct), window_size) affected_sets = [[ct, self.partitioner.sets[key].membership(dat, perturb[ct])]
for ct, key in enumerate(self.partitioner.ordered_sets)
if self.partitioner.sets[key].membership(dat, perturb[ct]) > 0.0]
sel = [ct for ct, key in enumerate(self.partitioner.ordered_sets) if len(affected_sets) == 0:
if self.sets[key].membership(dat, tdisp) > 0.0]
if len(sel) == 0: if dat < self.partitioner.lower_set().get_lower(perturb[0]):
sel.append(common.check_bounds_index(dat, self.partitioner, tdisp)) affected_sets.append([0, 1])
elif dat > self.partitioner.upper_set().get_upper(perturb[-1]):
affected_sets.append([self.partitioner.partitions - 1, 1])
lags[ct] = sel lags.append([a[0] for a in affected_sets])
# Build the tree with all possible paths # Build the tree with all possible paths
root = tree.FLRGTreeNode(None) # Trace the possible paths
for path in product(*lags):
tree.build_tree_without_order(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = HighOrderNonStationaryFLRG(self.order) flrg = HighOrderNonStationaryFLRG(self.order)
for kk in path: for kk in path:
flrg.append_lhs(self.sets[self.partitioner.ordered_sets[kk]]) flrg.append_lhs(self.partitioner.ordered_sets[kk])
affected_flrgs.append(flrg) affected_flrgs.append(flrg)
# affected_flrgs_memberships.append_rhs(flrg.get_membership(sample, disp))
# print(flrg.get_key())
# the FLRG is here because of the bounds verification
mv = [] mv = []
for ct, dat in enumerate(sample): for ct, dat in enumerate(sample):
td = common.window_index((k + time_displacement) - (self.order - ct), window_size) fset = self.partitioner.sets[flrg.LHS[ct]]
tmp = flrg.LHS[ct].membership(dat, td) ix = self.partitioner.ordered_sets.index(flrg.LHS[ct])
tmp = fset.membership(dat, perturb[ix])
mv.append(tmp) mv.append(tmp)
# print(mv)
affected_flrgs_memberships.append(np.prod(mv)) affected_flrgs_memberships.append(np.prod(mv))
@ -150,100 +183,67 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
def forecast(self, ndata, **kwargs): def forecast(self, ndata, **kwargs):
time_displacement = kwargs.get("time_displacement",0) explain = kwargs.get('explain', False)
window_size = kwargs.get("window_size", 1) fuzzyfied = kwargs.get('fuzzyfied', False)
l = len(ndata)
ret = []
for k in np.arange(self.order, l+1):
sample = ndata[k - self.order: k]
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k,
time_displacement, window_size)
#print([str(k) for k in affected_flrgs])
#print(affected_flrgs_memberships)
tmp = []
tdisp = common.window_index(k + time_displacement, window_size)
if len(affected_flrgs) == 0:
tmp.append(common.check_bounds(sample[-1], self.sets, tdisp))
elif len(affected_flrgs) == 1:
flrg = affected_flrgs[0]
if flrg.get_key() in self.flrgs:
tmp.append(self.flrgs[flrg.get_key()].get_midpoint(tdisp))
else:
tmp.append(flrg.LHS[-1].get_midpoint(tdisp))
else:
for ct, aset in enumerate(affected_flrgs):
if aset.get_key() in self.flrgs:
tmp.append(self.flrgs[aset.get_key()].get_midpoint(tdisp) *
affected_flrgs_memberships[ct])
else:
tmp.append(aset.LHS[-1].get_midpoint(tdisp)*
affected_flrgs_memberships[ct])
pto = sum(tmp)
#print(pto)
ret.append(pto)
return ret
def forecast_interval(self, ndata, **kwargs):
time_displacement = kwargs.get("time_displacement", 0) time_displacement = kwargs.get("time_displacement", 0)
window_size = kwargs.get("window_size", 1) window_size = kwargs.get("window_size", 1)
l = len(ndata) no_update = kwargs.get("no_update", False)
mode = kwargs.get('mode', 'mean')
ret = [] ret = []
for k in np.arange(self.order, l + 1): l = len(ndata) if not explain else self.max_lag + 1
sample = ndata[k - self.order: k] if l < self.max_lag:
return ndata
elif l == self.max_lag:
l += 1
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k, for k in np.arange(self.max_lag, l):
time_displacement, window_size)
# print([str(k) for k in affected_flrgs]) sample = ndata[k - self.max_lag: k]
# print(affected_flrgs_memberships)
upper = [] if self.method == 'unconditional':
lower = [] perturb = common.window_index(k + time_displacement, window_size)
elif self.method == 'conditional':
tdisp = common.window_index(k + time_displacement, window_size) if no_update:
if len(affected_flrgs) == 0: perturb = [[0, 1] for k in np.arange(self.partitioner.partitions)]
aset = common.check_bounds(sample[-1], self.sets, tdisp)
lower.append(aset.get_lower(tdisp))
upper.append(aset.get_upper(tdisp))
elif len(affected_flrgs) == 1:
_flrg = affected_flrgs[0]
if _flrg.get_key() in self.flrgs:
lower.append(self.flrgs[_flrg.get_key()].get_lower(tdisp))
upper.append(self.flrgs[_flrg.get_key()].get_upper(tdisp))
else: else:
lower.append(_flrg.LHS[-1].get_lower(tdisp)) perturb = self.conditional_perturbation_factors(sample[0])
upper.append(_flrg.LHS[-1].get_upper(tdisp))
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, perturb)
tmp = []
perturb2 = perturb[0]
if len(affected_flrgs) == 0:
tmp.append(common.check_bounds(sample[-1], self.partitioner.sets, perturb2))
elif len(affected_flrgs) == 1:
flrg = affected_flrgs[0]
if flrg.get_key() in self.flrgs:
tmp.append(self.flrgs[flrg.get_key()].get_midpoint(self.partitioner.sets, perturb2))
else:
fset = self.partitioner.sets[flrg.LHS[-1]]
ix = self.partitioner.ordered_sets.index(flrg.LHS[-1])
tmp.append(fset.get_midpoint(perturb[ix]))
else: else:
for ct, aset in enumerate(affected_flrgs): for ct, aset in enumerate(affected_flrgs):
if aset.get_key() in self.flrgs: if aset.get_key() in self.flrgs:
lower.append(self.flrgs[aset.get_key()].get_lower(tdisp) *
affected_flrgs_memberships[ct]) tmp.append(self.flrgs[aset.get_key()].get_midpoint(self.partitioner.sets, perturb2) *
upper.append(self.flrgs[aset.get_key()].get_upper(tdisp) * affected_flrgs_memberships[ct])
affected_flrgs_memberships[ct])
else: else:
lower.append(aset.LHS[-1].get_lower(tdisp) * fset = self.partitioner.sets[aset.LHS[-1]]
affected_flrgs_memberships[ct]) ix = self.partitioner.ordered_sets.index(aset.LHS[-1])
upper.append(aset.LHS[-1].get_upper(tdisp) * tmp.append(fset.get_midpoint(perturb[ix])*affected_flrgs_memberships[ct])
affected_flrgs_memberships[ct]) pto = sum(tmp)
ret.append([sum(lower), sum(upper)])
ret.append(pto)
return ret return ret

View File

@ -120,6 +120,7 @@ class SimpleNonStationaryPartitioner(partitioner.Partitioner):
func=part.membership_function, names=part.setnames, func=part.membership_function, names=part.setnames,
prefix=part.prefix, transformation=part.transformation, prefix=part.prefix, transformation=part.transformation,
indexer=part.indexer)#, preprocess=False) indexer=part.indexer)#, preprocess=False)
self.partitions = part.partitions
for key in part.sets.keys(): for key in part.sets.keys():
set = part.sets[key] set = part.sets[key]

View File

@ -12,155 +12,26 @@ from pyFTS.data import TAIEX, NASDAQ, SP500, artificial, mackey_glass
#mackey_glass.get_data() #mackey_glass.get_data()
datasets = { dataset = TAIEX.get_data()
"TAIEX": TAIEX.get_data()[:4000],
"SP500": SP500.get_data()[10000:14000],
"NASDAQ": NASDAQ.get_data()[:4000],
# Incremental Mean and Incremental Variance
"IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
# Incremental Mean and Incremental Variance, lower bound equals to 0
"IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
# Constant Mean and Incremental Variance
"CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
# Incremental Mean and Constant Variance
"IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
}
train_split = 2000 train_split = 1000
test_length = 200 test_length = 200
from pyFTS.common import Transformations from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
transformations = {
'None': None,
'Differential(1)': tdiff,
'BoxCox(0)': boxcox
}
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen, hofts, pwfts, hwang from pyFTS.models import chen, hofts, pwfts, hwang
partitions = {'CMIV': {'BoxCox(0)': 36, 'Differential(1)': 11, 'None': 8}, train = dataset[:1000]
'IMCV': {'BoxCox(0)': 36, 'Differential(1)': 20, 'None': 16}, test = dataset[1000:]
'IMIV': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 6},
'IMIV0': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 3},
'NASDAQ': {'BoxCox(0)': 39, 'Differential(1)': 13, 'None': 36},
'SP500': {'BoxCox(0)': 33, 'Differential(1)': 7, 'None': 33},
'TAIEX': {'BoxCox(0)': 39, 'Differential(1)': 31, 'None': 33}}
from pyFTS.models.nonstationary import partitioners as nspart, cvfts, util as nsUtil from pyFTS.models.nonstationary import partitioners as nspart, nsfts, honsfts
''' fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,npart=35,transformation=None)
#fs = nspart.simplenonstationary_gridpartitioner_builder(data=datasets['SP500'][:300], print(fs)
# npart=partitions['SP500']['None'], model = honsfts.HighOrderNonStationaryFTS(partitioner=fs, order=2)
# transformation=None) #model = nsfts.NonStationaryFTS(partitioner=fs)
fs = Grid.GridPartitioner(data=datasets['SP500'][:300], model.fit(train)
npart=15, forecasts = model.predict(test)
transformation=None) print(forecasts)
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])
fs.plot(axes)
from pyFTS.common import Util
Util.show_and_save_image(fig, "fig2.png", True)
#nsUtil.plot_sets(fs)
'''
def model_details(ds, tf, train_split, test_split):
data = datasets[ds]
train = data[:train_split]
test = data[train_split:test_split]
transformation = transformations[tf]
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train, npart=15, #partitions[ds][tf],
transformation=transformation)
model = nsfts.NonStationaryFTS(partitioner=fs)
model.fit(train)
print(model)
forecasts = model.predict(test)
residuals = np.array(test[1:]) - np.array(forecasts[:-1])
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=[15, 10])
axes[0].plot(test[1:], label="Original")
axes[0].plot(forecasts[:-1], label="Forecasts")
axes[0].set_ylabel("Univ. of Discourse")
#axes[1].set_title("Residuals")
axes[1].plot(residuals)
axes[1].set_ylabel("Error")
handles0, labels0 = axes[0].get_legend_handles_labels()
lgd = axes[0].legend(handles0, labels0, loc=2)
nsUtil.plot_sets_conditional(model, test, step=10, size=[10, 7],
save=True,file="fig.png", axes=axes[2], fig=fig)
model_details('SP500','None',200,400)
#'''
print("ts")
'''
tag = 'benchmarks'
for ds in datasets.keys():
dataset = datasets[ds]
for tf in transformations.keys():
transformation = transformations[tf]
partitioning = partitions[ds][tf]
bchmk.sliding_window_benchmarks(dataset, 3000, train=0.1, inc=0.1,
#methods=[
# hwang.HighOrderFTS,
# hofts.HighOrderFTS,
# pwfts.ProbabilisticWeightedFTS],
#orders = [3],
benchmark_models=False,
transformations=[transformation],
partitions=[partitioning],
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
train_split = 200
test_split = 2000
for ds in datasets.keys():
dataset = datasets[ds]
print(ds)
for tf in ['None']: #transformations.keys():
transformation = transformations[tf]
train = dataset[:train_split]
test = dataset[train_split:test_split]
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,
npart=partitions[ds][tf],
transformation=transformation)
print(fs)
#cvfts1 = cvfts.ConditionalVarianceFTS(partitioner=fs)
model = nsfts.NonStationaryFTS(partitioner=fs)
model.fit(train)
print(model)
forecasts = model.predict(test)
#print(forecasts)
partitioning = partitions[ds][tf]
bchmk.sliding_window_benchmarks(dataset, 2000, train=0.2, inc=0.2,
benchmark_models=False,
methods=[cvfts.ConditionalVarianceFTS],
partitioners_methods=[nspart.simplenonstationary_gridpartitioner_builder],
transformations=[transformation],
partitions=[partitioning],
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
'''