High Order NSFTS refactoring

This commit is contained in:
Petrônio Cândido 2019-11-26 23:54:49 -03:00
parent bc6a632a6d
commit 4552bb6bab
4 changed files with 160 additions and 289 deletions

View File

@ -55,7 +55,6 @@ class FuzzySet(FS.FuzzySet):
inc = t
else:
l = len(self.location)
inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)])
if self.mf == Membership.gaussmf:

View File

@ -1,7 +1,8 @@
import numpy as np
from pyFTS.common import FuzzySet, FLR, fts, tree
from pyFTS.common import FuzzySet, FLR, fts
from pyFTS.models import hofts
from pyFTS.models.nonstationary import common, flrg
from pyFTS.models.nonstationary import common, flrg, nsfts
from itertools import product
class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG):
@ -11,67 +12,121 @@ class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG):
self.LHS = []
self.RHS = {}
self.count = 0.0
self.strlhs = ""
self.w = None
def append_rhs(self, c, **kwargs):
if c.name not in self.RHS:
self.RHS[c.name] = c
def append_rhs(self, fset, **kwargs):
count = kwargs.get('count',1.0)
if fset not in self.RHS:
self.RHS[fset] = count
else:
self.RHS[fset] += count
self.count += count
def append_lhs(self, c):
self.LHS.append(c)
def weights(self):
if self.w is None:
self.w = np.array([self.RHS[c] / self.count for c in self.RHS.keys()])
return self.w
def get_midpoint(self, sets, perturb):
mp = np.array([sets[c].get_midpoint(perturb) for c in self.RHS.keys()])
midpoint = mp.dot(self.weights())
return midpoint
def get_lower(self, sets, perturb):
lw = np.array([sets[s].get_lower(perturb) for s in self.RHS.keys()])
lower = lw.dot(self.weights())
return lower
def get_upper(self, sets, perturb):
up = np.array([sets[s].get_upper(perturb) for s in self.RHS.keys()])
upper = up.dot(self.weights())
return upper
def __str__(self):
tmp = ""
for c in sorted(self.RHS):
if len(tmp) > 0:
tmp = tmp + ","
tmp = tmp + c
return self.get_key() + " -> " + tmp
_str = ""
for k in self.RHS.keys():
_str += ", " if len(_str) > 0 else ""
_str += k + " (" + str(round(self.RHS[k] / self.count, 3)) + ")"
return self.get_key() + " -> " + _str
def __len__(self):
return len(self.RHS)
class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
class HighOrderNonStationaryFTS(nsfts.NonStationaryFTS):
"""NonStationaryFTS Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(HighOrderNonStationaryFTS, self).__init__("HONSFTS " + name, **kwargs)
def __init__(self, **kwargs):
super(HighOrderNonStationaryFTS, self).__init__(**kwargs)
self.name = "High Order Non Stationary FTS"
self.shortname = "HONSFTS"
self.detail = ""
self.flrgs = {}
self.is_high_order = True
self.order = kwargs.get("order",2)
self.configure_lags(**kwargs)
def configure_lags(self, **kwargs):
if "order" in kwargs:
self.order = kwargs.get("order", self.min_order)
if "lags" in kwargs:
self.lags = kwargs.get("lags", None)
if self.lags is not None:
self.max_lag = max(self.lags)
else:
self.max_lag = self.order
self.lags = np.arange(1, self.order + 1)
def train(self, data, **kwargs):
self.generate_flrg(data)
if self.method == 'conditional':
self.forecasts = self.forecast(data, no_update=True)
self.residuals = np.array(data[self.order:]) - np.array(self.forecasts)
self.variance_residual = np.var(self.residuals) # np.max(self.residuals
self.mean_residual = np.mean(self.residuals)
self.residuals = self.residuals[-self.memory_window:].tolist()
self.forecasts = self.forecasts[-self.memory_window:]
self.inputs = np.array(data[-self.memory_window:]).tolist()
def generate_flrg(self, data, **kwargs):
l = len(data)
window_size = kwargs.get("window_size", 1)
for k in np.arange(self.order, l):
if self.dump: print("FLR: " + str(k))
sample = data[k - self.order: k]
disp = common.window_index(k, window_size)
rhs = [self.sets[key] for key in self.partitioner.ordered_sets
if self.sets[key].membership(data[k], disp) > 0.0]
rhs = [key for key in self.partitioner.ordered_sets
if self.partitioner.sets[key].membership(data[k], [0,1]) > 0.0]
if len(rhs) == 0:
rhs = [common.check_bounds(data[k], self.partitioner, disp)]
rhs = [common.check_bounds(data[k], self.partitioner, [0,1]).name]
lags = {}
lags = []
for o in np.arange(0, self.order):
tdisp = common.window_index(k - (self.order - o), window_size)
lhs = [self.sets[key] for key in self.partitioner.ordered_sets
if self.sets[key].membership(sample[o], tdisp) > 0.0]
tdisp = [0,1]
lhs = [key for key in self.partitioner.ordered_sets
if self.partitioner.sets[key].membership(sample[o], tdisp) > 0.0]
if len(lhs) == 0:
lhs = [common.check_bounds(sample[o], self.partitioner, tdisp)]
lhs = [common.check_bounds(sample[o], self.partitioner, tdisp).name]
lags[o] = lhs
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
lags.append(lhs)
# Trace the possible paths
for p in root.paths():
for path in product(*lags):
flrg = HighOrderNonStationaryFLRG(self.order)
path = list(reversed(list(filter(None.__ne__, p))))
for c, e in enumerate(path, start=0):
flrg.append_lhs(e)
@ -82,67 +137,45 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
for st in rhs:
self.flrgs[flrg.get_key()].append_rhs(st)
# flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1))
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
window_size = kwargs.get('parameters', 1)
self.generate_flrg(data, window_size=window_size)
def _affected_flrgs(self, sample, k, time_displacement, window_size):
# print("input: " + str(ndata[k]))
def _affected_flrgs(self, sample, perturb):
affected_flrgs = []
affected_flrgs_memberships = []
lags = {}
lags = []
for ct, dat in enumerate(sample):
tdisp = common.window_index((k + time_displacement) - (self.order - ct), window_size)
affected_sets = [[ct, self.partitioner.sets[key].membership(dat, perturb[ct])]
for ct, key in enumerate(self.partitioner.ordered_sets)
if self.partitioner.sets[key].membership(dat, perturb[ct]) > 0.0]
sel = [ct for ct, key in enumerate(self.partitioner.ordered_sets)
if self.sets[key].membership(dat, tdisp) > 0.0]
if len(affected_sets) == 0:
if len(sel) == 0:
sel.append(common.check_bounds_index(dat, self.partitioner, tdisp))
if dat < self.partitioner.lower_set().get_lower(perturb[0]):
affected_sets.append([0, 1])
elif dat > self.partitioner.upper_set().get_upper(perturb[-1]):
affected_sets.append([self.partitioner.partitions - 1, 1])
lags[ct] = sel
lags.append([a[0] for a in affected_sets])
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
# Trace the possible paths
for path in product(*lags):
tree.build_tree_without_order(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = HighOrderNonStationaryFLRG(self.order)
for kk in path:
flrg.append_lhs(self.sets[self.partitioner.ordered_sets[kk]])
flrg.append_lhs(self.partitioner.ordered_sets[kk])
affected_flrgs.append(flrg)
# affected_flrgs_memberships.append_rhs(flrg.get_membership(sample, disp))
# print(flrg.get_key())
# the FLRG is here because of the bounds verification
mv = []
for ct, dat in enumerate(sample):
td = common.window_index((k + time_displacement) - (self.order - ct), window_size)
tmp = flrg.LHS[ct].membership(dat, td)
fset = self.partitioner.sets[flrg.LHS[ct]]
ix = self.partitioner.ordered_sets.index(flrg.LHS[ct])
tmp = fset.membership(dat, perturb[ix])
mv.append(tmp)
# print(mv)
affected_flrgs_memberships.append(np.prod(mv))
@ -150,100 +183,67 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
def forecast(self, ndata, **kwargs):
explain = kwargs.get('explain', False)
fuzzyfied = kwargs.get('fuzzyfied', False)
time_displacement = kwargs.get("time_displacement", 0)
window_size = kwargs.get("window_size", 1)
l = len(ndata)
no_update = kwargs.get("no_update", False)
mode = kwargs.get('mode', 'mean')
ret = []
for k in np.arange(self.order, l+1):
l = len(ndata) if not explain else self.max_lag + 1
sample = ndata[k - self.order: k]
if l < self.max_lag:
return ndata
elif l == self.max_lag:
l += 1
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k,
time_displacement, window_size)
for k in np.arange(self.max_lag, l):
#print([str(k) for k in affected_flrgs])
#print(affected_flrgs_memberships)
sample = ndata[k - self.max_lag: k]
if self.method == 'unconditional':
perturb = common.window_index(k + time_displacement, window_size)
elif self.method == 'conditional':
if no_update:
perturb = [[0, 1] for k in np.arange(self.partitioner.partitions)]
else:
perturb = self.conditional_perturbation_factors(sample[0])
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, perturb)
tmp = []
tdisp = common.window_index(k + time_displacement, window_size)
perturb2 = perturb[0]
if len(affected_flrgs) == 0:
tmp.append(common.check_bounds(sample[-1], self.sets, tdisp))
tmp.append(common.check_bounds(sample[-1], self.partitioner.sets, perturb2))
elif len(affected_flrgs) == 1:
flrg = affected_flrgs[0]
if flrg.get_key() in self.flrgs:
tmp.append(self.flrgs[flrg.get_key()].get_midpoint(tdisp))
tmp.append(self.flrgs[flrg.get_key()].get_midpoint(self.partitioner.sets, perturb2))
else:
tmp.append(flrg.LHS[-1].get_midpoint(tdisp))
fset = self.partitioner.sets[flrg.LHS[-1]]
ix = self.partitioner.ordered_sets.index(flrg.LHS[-1])
tmp.append(fset.get_midpoint(perturb[ix]))
else:
for ct, aset in enumerate(affected_flrgs):
if aset.get_key() in self.flrgs:
tmp.append(self.flrgs[aset.get_key()].get_midpoint(tdisp) *
tmp.append(self.flrgs[aset.get_key()].get_midpoint(self.partitioner.sets, perturb2) *
affected_flrgs_memberships[ct])
else:
tmp.append(aset.LHS[-1].get_midpoint(tdisp)*
affected_flrgs_memberships[ct])
fset = self.partitioner.sets[aset.LHS[-1]]
ix = self.partitioner.ordered_sets.index(aset.LHS[-1])
tmp.append(fset.get_midpoint(perturb[ix])*affected_flrgs_memberships[ct])
pto = sum(tmp)
#print(pto)
ret.append(pto)
return ret
def forecast_interval(self, ndata, **kwargs):
time_displacement = kwargs.get("time_displacement", 0)
window_size = kwargs.get("window_size", 1)
l = len(ndata)
ret = []
for k in np.arange(self.order, l + 1):
sample = ndata[k - self.order: k]
affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k,
time_displacement, window_size)
# print([str(k) for k in affected_flrgs])
# print(affected_flrgs_memberships)
upper = []
lower = []
tdisp = common.window_index(k + time_displacement, window_size)
if len(affected_flrgs) == 0:
aset = common.check_bounds(sample[-1], self.sets, tdisp)
lower.append(aset.get_lower(tdisp))
upper.append(aset.get_upper(tdisp))
elif len(affected_flrgs) == 1:
_flrg = affected_flrgs[0]
if _flrg.get_key() in self.flrgs:
lower.append(self.flrgs[_flrg.get_key()].get_lower(tdisp))
upper.append(self.flrgs[_flrg.get_key()].get_upper(tdisp))
else:
lower.append(_flrg.LHS[-1].get_lower(tdisp))
upper.append(_flrg.LHS[-1].get_upper(tdisp))
else:
for ct, aset in enumerate(affected_flrgs):
if aset.get_key() in self.flrgs:
lower.append(self.flrgs[aset.get_key()].get_lower(tdisp) *
affected_flrgs_memberships[ct])
upper.append(self.flrgs[aset.get_key()].get_upper(tdisp) *
affected_flrgs_memberships[ct])
else:
lower.append(aset.LHS[-1].get_lower(tdisp) *
affected_flrgs_memberships[ct])
upper.append(aset.LHS[-1].get_upper(tdisp) *
affected_flrgs_memberships[ct])
ret.append([sum(lower), sum(upper)])
return ret

View File

@ -120,6 +120,7 @@ class SimpleNonStationaryPartitioner(partitioner.Partitioner):
func=part.membership_function, names=part.setnames,
prefix=part.prefix, transformation=part.transformation,
indexer=part.indexer)#, preprocess=False)
self.partitions = part.partitions
for key in part.sets.keys():
set = part.sets[key]

View File

@ -12,155 +12,26 @@ from pyFTS.data import TAIEX, NASDAQ, SP500, artificial, mackey_glass
#mackey_glass.get_data()
datasets = {
"TAIEX": TAIEX.get_data()[:4000],
"SP500": SP500.get_data()[10000:14000],
"NASDAQ": NASDAQ.get_data()[:4000],
# Incremental Mean and Incremental Variance
"IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
# Incremental Mean and Incremental Variance, lower bound equals to 0
"IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
# Constant Mean and Incremental Variance
"CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
# Incremental Mean and Constant Variance
"IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
}
dataset = TAIEX.get_data()
train_split = 2000
train_split = 1000
test_length = 200
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
transformations = {
'None': None,
'Differential(1)': tdiff,
'BoxCox(0)': boxcox
}
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen, hofts, pwfts, hwang
partitions = {'CMIV': {'BoxCox(0)': 36, 'Differential(1)': 11, 'None': 8},
'IMCV': {'BoxCox(0)': 36, 'Differential(1)': 20, 'None': 16},
'IMIV': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 6},
'IMIV0': {'BoxCox(0)': 39, 'Differential(1)': 12, 'None': 3},
'NASDAQ': {'BoxCox(0)': 39, 'Differential(1)': 13, 'None': 36},
'SP500': {'BoxCox(0)': 33, 'Differential(1)': 7, 'None': 33},
'TAIEX': {'BoxCox(0)': 39, 'Differential(1)': 31, 'None': 33}}
train = dataset[:1000]
test = dataset[1000:]
from pyFTS.models.nonstationary import partitioners as nspart, cvfts, util as nsUtil
'''
#fs = nspart.simplenonstationary_gridpartitioner_builder(data=datasets['SP500'][:300],
# npart=partitions['SP500']['None'],
# transformation=None)
fs = Grid.GridPartitioner(data=datasets['SP500'][:300],
npart=15,
transformation=None)
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])
fs.plot(axes)
from pyFTS.common import Util
Util.show_and_save_image(fig, "fig2.png", True)
#nsUtil.plot_sets(fs)
'''
def model_details(ds, tf, train_split, test_split):
data = datasets[ds]
train = data[:train_split]
test = data[train_split:test_split]
transformation = transformations[tf]
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train, npart=15, #partitions[ds][tf],
transformation=transformation)
model = nsfts.NonStationaryFTS(partitioner=fs)
model.fit(train)
print(model)
forecasts = model.predict(test)
residuals = np.array(test[1:]) - np.array(forecasts[:-1])
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=[15, 10])
axes[0].plot(test[1:], label="Original")
axes[0].plot(forecasts[:-1], label="Forecasts")
axes[0].set_ylabel("Univ. of Discourse")
#axes[1].set_title("Residuals")
axes[1].plot(residuals)
axes[1].set_ylabel("Error")
handles0, labels0 = axes[0].get_legend_handles_labels()
lgd = axes[0].legend(handles0, labels0, loc=2)
nsUtil.plot_sets_conditional(model, test, step=10, size=[10, 7],
save=True,file="fig.png", axes=axes[2], fig=fig)
model_details('SP500','None',200,400)
#'''
print("ts")
'''
tag = 'benchmarks'
for ds in datasets.keys():
dataset = datasets[ds]
for tf in transformations.keys():
transformation = transformations[tf]
partitioning = partitions[ds][tf]
bchmk.sliding_window_benchmarks(dataset, 3000, train=0.1, inc=0.1,
#methods=[
# hwang.HighOrderFTS,
# hofts.HighOrderFTS,
# pwfts.ProbabilisticWeightedFTS],
#orders = [3],
benchmark_models=False,
transformations=[transformation],
partitions=[partitioning],
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
train_split = 200
test_split = 2000
for ds in datasets.keys():
dataset = datasets[ds]
print(ds)
for tf in ['None']: #transformations.keys():
transformation = transformations[tf]
train = dataset[:train_split]
test = dataset[train_split:test_split]
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,
npart=partitions[ds][tf],
transformation=transformation)
from pyFTS.models.nonstationary import partitioners as nspart, nsfts, honsfts
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,npart=35,transformation=None)
print(fs)
#cvfts1 = cvfts.ConditionalVarianceFTS(partitioner=fs)
model = nsfts.NonStationaryFTS(partitioner=fs)
model = honsfts.HighOrderNonStationaryFTS(partitioner=fs, order=2)
#model = nsfts.NonStationaryFTS(partitioner=fs)
model.fit(train)
print(model)
forecasts = model.predict(test)
#print(forecasts)
partitioning = partitions[ds][tf]
bchmk.sliding_window_benchmarks(dataset, 2000, train=0.2, inc=0.2,
benchmark_models=False,
methods=[cvfts.ConditionalVarianceFTS],
partitioners_methods=[nspart.simplenonstationary_gridpartitioner_builder],
transformations=[transformation],
partitions=[partitioning],
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
'''
print(forecasts)