CVFTS and NSFTS unification

This commit is contained in:
Petrônio Cândido 2018-08-06 15:45:09 -03:00
parent f3b399eb97
commit 9be5c611a7
6 changed files with 254 additions and 83 deletions

View File

@ -413,7 +413,7 @@ class FTS(object):
def __str__(self):
tmp = self.name + ":\n"
for r in sorted(self.flrgs):
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.sets)):
tmp = tmp + str(self.flrgs[r]) + "\n"
return tmp

View File

@ -3,6 +3,7 @@ from pyFTS.models import hofts
from pyFTS.models.nonstationary import common,nsfts
from pyFTS.common import FLR, flrg, tree
class HighOrderNonstationaryFLRG(hofts.HighOrderFTS):
"""Conventional High Order Fuzzy Logical Relationship Group"""
def __init__(self, order, **kwargs):
@ -57,7 +58,9 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
def train(self, ndata, **kwargs):
tmpdata = common.fuzzySeries(ndata, self.sets, self.partitioner.ordered_sets, method='fuzzy', const_t=0)
tmpdata = common.fuzzySeries(ndata, self.sets,
self.partitioner.ordered_sets,
method='fuzzy', const_t=0)
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.generate_flrg(flrs)
@ -75,17 +78,17 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
def generate_flrg(self, flrs, **kwargs):
for flr in flrs:
if flr.LHS.name in self.flrgs:
self.flrgs[flr.LHS.name].append_rhs(flr.RHS)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
else:
self.flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS)
self.flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS.name)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
def _smooth(self, a):
return .1 * a[0] + .3 * a[1] + .6 * a[2]
def perturbation_factors(self, data, **kwargs):
npart = len(self.partitioner.sets)
_max = 0
_min = 0
if data < self.original_min:
@ -101,7 +104,7 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
_range = (_max - _min)/2
translate = np.linspace(_min, _max, len(self.partitioner.sets))
translate = np.linspace(_min, _max, npart)
var = np.std(self.residuals)
@ -109,13 +112,18 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
loc = (self.mean_residual + np.mean(self.residuals))
location = [_range + w + loc + k for k in np.linspace(-var,var) for w in translate]
location = [_range + w + loc + k for k in np.linspace(-var,var, npart) for w in translate]
perturb = [[location[k], var] for k in np.arange(len(self.partitioner.sets))]
scale = [abs(location[0] - location[2])]
scale.extend([abs(location[k - 1] - location[k + 1]) for k in np.arange(1, npart)])
scale.append(abs(location[-1] - location[-3]))
perturb = [[location[k], scale[k]] for k in np.arange(npart)]
return perturb
def perturbation_factors__old(self, data):
npart = len(self.partitioner.sets)
_max = 0
_min = 0
if data < self.original_min:
@ -129,12 +137,12 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
self.max_stack.insert(0, _max)
_max = max(self.max_stack)
location = np.linspace(_min, _max, self.partitioner.partitions)
location = np.linspace(_min, _max, npart)
scale = [abs(location[0] - location[2])]
scale.extend([abs(location[k-1] - location[k+1]) for k in np.arange(1,self.partitioner.partitions-1)])
scale.extend([abs(location[k-1] - location[k+1]) for k in np.arange(1, npart)])
scale.append(abs(location[-1] - location[-3]))
perturb = [[location[k], scale[k]] for k in np.arange(0, self.partitioner.partitions)]
perturb = [[location[k], scale[k]] for k in np.arange(0, npart)]
return perturb
@ -144,16 +152,16 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
def _affected_sets(self, sample, perturb):
affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb[ct])]
for ct in np.arange(self.partitioner.partitions)
for ct in np.arange(len(self.partitioner.sets))
if self.sets[self._fsset_key(ct)].membership(sample, perturb[ct]) > 0.0]
if len(affected_sets) == 0:
if sample < self.partitioner.lower_set().get_lower(perturb[0]):
affected_sets.append([0, 1])
elif sample > self.partitioner.upper_set().get_upper(perturb[-1]):
affected_sets.append([len(self.sets) - 1, 1])
return affected_sets
def forecast(self, ndata, **kwargs):
@ -181,7 +189,7 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
ix = affected_sets[0][0]
aset = self.partitioner.ordered_sets[ix]
if aset in self.flrgs:
numerator.append(self.flrgs[aset].get_midpoint(perturb[ix]))
numerator.append(self.flrgs[aset].get_midpoint(self.sets, perturb[ix]))
else:
fuzzy_set = self.sets[aset]
numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
@ -192,7 +200,7 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
fs = self.partitioner.ordered_sets[ix]
tdisp = perturb[ix]
if fs in self.flrgs:
numerator.append(self.flrgs[fs].get_midpoint(tdisp) * aset[1])
numerator.append(self.flrgs[fs].get_midpoint(self.sets, tdisp) * aset[1])
else:
fuzzy_set = self.sets[fs]
numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])

View File

@ -1,9 +1,23 @@
from pyFTS.common import flrg
from pyFTS.models.nonstationary import common
import numpy as np
class NonStationaryFLRG(flrg.FLRG):
def unpack_args(self, *args):
l = len(args)
tmp = args
sets, t, w = None, 0, 1
if l > 0 and isinstance(tmp[0], dict):
sets = tmp[0]
if l > 1 and isinstance(tmp[1], (int, list, tuple)):
t = tmp[1]
if l > 2 and isinstance(tmp[2], int):
w = tmp[2]
return (sets, t, w)
def __init__(self, LHS, **kwargs):
super(NonStationaryFLRG, self).__init__(1, **kwargs)
@ -12,45 +26,49 @@ class NonStationaryFLRG(flrg.FLRG):
def get_key(self):
if isinstance(self.LHS, list):
return str([k.name for k in self.LHS])
return str([k for k in self.LHS])
elif isinstance(self.LHS, dict):
return str(self.LHS.keys())
else:
return self.LHS.name
return self.LHS
def get_membership(self, data, t, window_size=1):
def get_membership(self, data, *args):
sets, t, window_size = self.unpack_args(*args)
ret = 0.0
if isinstance(self.LHS, (list, set)):
#assert len(self.LHS) == len(data)
ret = min([self.LHS[ct].membership(dat, common.window_index(t - (self.order - ct), window_size))
ret = min([sets[self.LHS[ct]].membership(dat, common.window_index(t - (self.order - ct), window_size))
for ct, dat in enumerate(data)])
else:
ret = self.LHS.membership(data, common.window_index(t, window_size))
return ret
def get_midpoint(self, t, window_size=1):
def get_midpoint(self, *args):
sets, t, window_size = self.unpack_args(*args)
if len(self.RHS) > 0:
if isinstance(self.RHS, (list, set)):
tmp = [r.get_midpoint(common.window_index(t, window_size)) for r in self.RHS]
tmp = [sets[r].get_midpoint(common.window_index(t, window_size)) for r in self.RHS]
elif isinstance(self.RHS, dict):
tmp = [self.RHS[r].get_midpoint(common.window_index(t, window_size)) for r in self.RHS.keys()]
tmp = [sets[r].get_midpoint(common.window_index(t, window_size)) for r in self.RHS.keys()]
return sum(tmp) / len(tmp)
else:
return self.LHS[-1].get_midpoint(common.window_index(t, window_size))
return sets[self.LHS[-1]].get_midpoint(common.window_index(t, window_size))
def get_lower(self, t, window_size=1):
def get_lower(self, *args):
sets, t, window_size = self.unpack_args(*args)
if len(self.RHS) > 0:
if isinstance(self.RHS, (list, set)):
return min([r.get_lower(common.window_index(t, window_size)) for r in self.RHS])
return min([sets[r].get_lower(common.window_index(t, window_size)) for r in self.RHS])
elif isinstance(self.RHS, dict):
return min([self.RHS[r].get_lower(common.window_index(t, window_size)) for r in self.RHS.keys()])
return min([sets[r].get_lower(common.window_index(t, window_size)) for r in self.RHS.keys()])
else:
return self.LHS[-1].get_lower(common.window_index(t, window_size))
return sets[self.LHS[-1]].get_lower(common.window_index(t, window_size))
def get_upper(self, t, window_size=1):
def get_upper(self, *args):
sets, t, window_size = self.unpack_args(*args)
if len(self.RHS) > 0:
if isinstance(self.RHS, (list, set)):
return max([r.get_upper(common.window_index(t, window_size)) for r in self.RHS])
return max([sets[r].get_upper(common.window_index(t, window_size)) for r in self.RHS])
elif isinstance(self.RHS, dict):
return max([self.RHS[r].get_upper(common.window_index(t, window_size)) for r in self.RHS.keys()])
return max([sets[r].get_upper(common.window_index(t, window_size)) for r in self.RHS.keys()])
else:
return self.LHS[-1].get_upper(common.window_index(t, window_size))
return sets[self.LHS[-1]].get_upper(common.window_index(t, window_size))

View File

@ -12,87 +12,217 @@ class ConventionalNonStationaryFLRG(flrg.NonStationaryFLRG):
self.RHS = set()
def get_key(self):
return self.LHS.name
return self.LHS
def append_rhs(self, c, **kwargs):
self.RHS.add(c)
def __str__(self):
tmp = self.LHS.name + " -> "
tmp = self.LHS + " -> "
tmp2 = ""
for c in sorted(self.RHS, key=lambda s: s.name):
for c in sorted(self.RHS):
if len(tmp2) > 0:
tmp2 = tmp2 + ","
tmp2 = tmp2 + c.name
tmp2 = tmp2 + c
return tmp + tmp2
class NonStationaryFTS(fts.FTS):
"""NonStationaryFTS Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(NonStationaryFTS, self).__init__(1, "NSFTS " + name, **kwargs)
def __init__(self, **kwargs):
super(NonStationaryFTS, self).__init__(**kwargs)
self.name = "Non Stationary FTS"
self.shortname = "NSFTS"
self.detail = ""
self.flrgs = {}
self.method = kwargs.get('method','conditional')
self.is_high_order = False
if self.partitioner is not None:
self.append_transformation(self.partitioner.transformation)
if self.method == 'conditional':
self.min_stack = [0, 0, 0]
self.max_stack = [0, 0, 0]
self.uod_clip = False
self.order = 1
self.min_order = 1
self.max_lag = 1
self.inputs = []
self.forecasts = []
self.residuals = []
self.variance_residual = 0.
self.mean_residual = 0.
self.memory_window = kwargs.get("memory_window", 5)
def generate_flrg(self, flrs, **kwargs):
for flr in flrs:
if flr.LHS.name in self.flrgs:
self.flrgs[flr.LHS.name].append_rhs(flr.RHS)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
else:
self.flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS)
self.flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS.name)
self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
def _smooth(self, a):
return .1 * a[0] + .3 * a[1] + .6 * a[2]
def train(self, data, **kwargs):
if self.method == 'unconditional':
window_size = kwargs.get('parameters', 1)
tmpdata = common.fuzzySeries(data, self.sets, self.partitioner.ordered_sets,
tmpdata = common.fuzzySeries(data, self.sets,
self.partitioner.ordered_sets,
window_size, method='fuzzy')
flrs = FLR.generate_recurrent_flrs(tmpdata)
else:
tmpdata = common.fuzzySeries(data, self.sets,
self.partitioner.ordered_sets,
method='fuzzy', const_t=0)
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.generate_flrg(flrs)
if self.method == 'conditional':
self.forecasts = self.forecast(data, no_update=True)
self.residuals = np.array(data[1:]) - np.array(self.forecasts[:-1])
self.variance_residual = np.var(self.residuals) # np.max(self.residuals
self.mean_residual = np.mean(self.residuals)
self.residuals = self.residuals[-self.memory_window:].tolist()
self.forecasts = self.forecasts[-self.memory_window:]
self.inputs = np.array(data[-self.memory_window:]).tolist()
def conditional_perturbation_factors(self, data, **kwargs):
npart = len(self.partitioner.sets)
_max = 0
_min = 0
if data < self.original_min:
_min = data - self.original_min if data < 0 else self.original_min - data
elif data > self.original_max:
_max = data - self.original_max if data > 0 else self.original_max - data
self.min_stack.pop(2)
self.min_stack.insert(0, _min)
_min = min(self.min_stack)
self.max_stack.pop(2)
self.max_stack.insert(0, _max)
_max = max(self.max_stack)
_range = (_max - _min)/2
translate = np.linspace(_min, _max, npart)
var = np.std(self.residuals)
var = 0 if var < 1 else var
loc = (self.mean_residual + np.mean(self.residuals))
location = [_range + w + loc + k for k in np.linspace(-var,var, npart) for w in translate]
scale = [abs(location[0] - location[2])]
scale.extend([abs(location[k - 1] - location[k + 1]) for k in np.arange(1, npart)])
scale.append(abs(location[-1] - location[-3]))
perturb = [[location[k], scale[k]] for k in np.arange(npart)]
return perturb
def _fsset_key(self, ix):
return self.partitioner.ordered_sets[ix]
def _affected_sets(self, sample, perturb):
if self.method == 'conditional':
affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb[ct])]
for ct in np.arange(len(self.partitioner.sets))
if self.sets[self._fsset_key(ct)].membership(sample, perturb[ct]) > 0.0]
if len(affected_sets) == 0:
if sample < self.partitioner.lower_set().get_lower(perturb[0]):
affected_sets.append([0, 1])
elif sample > self.partitioner.upper_set().get_upper(perturb[-1]):
affected_sets.append([len(self.sets) - 1, 1])
else:
affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb)]
for ct in np.arange(len(self.partitioner.sets))
if self.sets[self._fsset_key(ct)].membership(sample, perturb) > 0.0]
if len(affected_sets) == 0:
if sample < self.partitioner.lower_set().get_lower(perturb):
affected_sets.append([0, 1])
elif sample > self.partitioner.upper_set().get_upper(perturb):
affected_sets.append([len(self.sets) - 1, 1])
return affected_sets
def forecast(self, ndata, **kwargs):
time_displacement = kwargs.get("time_displacement",0)
window_size = kwargs.get("window_size", 1)
no_update = kwargs.get("no_update", False)
l = len(ndata)
ret = []
for k in np.arange(0, l):
tdisp = common.window_index(k + time_displacement, window_size)
sample = ndata[k]
affected_sets = [ [self.sets[key], self.sets[key].membership(ndata[k], tdisp)]
for key in self.partitioner.ordered_sets
if self.sets[key].membership(ndata[k], tdisp) > 0.0]
if self.method == 'unconditional':
perturb = common.window_index(k + time_displacement, window_size)
elif self.method == 'conditional':
if not no_update:
perturb = self.conditional_perturbation_factors(sample)
else:
perturb = [[0, 1] for k in np.arange(len(self.partitioner.sets))]
if len(affected_sets) == 0:
affected_sets.append([common.check_bounds(ndata[k], self.partitioner, tdisp), 1.0])
affected_sets = self._affected_sets(sample, perturb)
tmp = []
numerator = []
denominator = []
if len(affected_sets) == 1:
aset = affected_sets[0][0]
if aset.name in self.flrgs:
tmp.append(self.flrgs[aset.name].get_midpoint(tdisp))
ix = affected_sets[0][0]
aset = self.partitioner.ordered_sets[ix]
if aset in self.flrgs:
numerator.append(self.flrgs[aset].get_midpoint(self.sets, perturb[ix]))
else:
tmp.append(aset.get_midpoint(tdisp))
fuzzy_set = self.sets[aset]
numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
denominator.append(1)
else:
for aset in affected_sets:
if aset[0].name in self.flrgs:
tmp.append(self.flrgs[aset[0].name].get_midpoint(tdisp) * aset[1])
ix = aset[0]
fs = self.partitioner.ordered_sets[ix]
tdisp = perturb[ix]
if fs in self.flrgs:
numerator.append(self.flrgs[fs].get_midpoint(self.sets, tdisp) * aset[1])
else:
tmp.append(aset[0].get_midpoint(tdisp) * aset[1])
fuzzy_set = self.sets[fs]
numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
denominator.append(aset[1])
pto = sum(tmp)
#print(pto)
if sum(denominator) > 0:
pto = sum(numerator) / sum(denominator)
else:
pto = sum(numerator)
ret.append(pto)
if self.method == 'conditional' and not no_update:
self.forecasts.append(pto)
self.residuals.append(self.inputs[-1] - self.forecasts[-1])
self.inputs.append(sample)
self.inputs.pop(0)
self.forecasts.pop(0)
self.residuals.pop(0)
return ret
def forecast_interval(self, ndata, **kwargs):

View File

@ -56,7 +56,7 @@ class PolynomialNonStationaryPartitioner(partitioner.Partitioner):
def get_polynomial_perturbations(self, data, **kwargs):
w = kwargs.get("window_size", int(len(data) / 5))
deg = kwargs.get("degree", 2)
degree = kwargs.get("degree", 2)
xmax = [data[0]]
tmax = [0]
xmin = [data[0]]
@ -73,17 +73,17 @@ class PolynomialNonStationaryPartitioner(partitioner.Partitioner):
xmin.append(tn)
tmin.append(np.ravel(np.argwhere(data == tn)).tolist()[0])
cmax = np.polyfit(tmax, xmax, deg=deg)
cmin = np.polyfit(tmin, xmin, deg=deg)
cmax = np.polyfit(tmax, xmax, deg=degree)
cmin = np.polyfit(tmin, xmin, deg=degree)
cmed = []
for d in np.arange(0, deg + 1):
for d in np.arange(0, degree + 1):
cmed.append(np.linspace(cmin[d], cmax[d], self.partitions)[1:self.partitions - 1])
loc_params = [cmin.tolist()]
for i in np.arange(0, self.partitions - 2):
tmp = [cmed[k][i] for k in np.arange(0, deg + 1)]
tmp = [cmed[k][i] for k in np.arange(0, degree + 1)]
loc_params.append(tmp)
loc_params.append(cmax.tolist())
@ -92,13 +92,13 @@ class PolynomialNonStationaryPartitioner(partitioner.Partitioner):
clen = []
for i in np.arange(1, self.partitions-1):
tmp = self.poly_width(loc_params[i - 1], loc_params[i + 1], rng, deg)
tmp = self.poly_width(loc_params[i - 1], loc_params[i + 1], rng, degree)
clen.append(tmp)
tmp = self.poly_width(loc_params[0], loc_params[1], rng, deg)
tmp = self.poly_width(loc_params[0], loc_params[1], rng, degree)
clen.insert(0, tmp)
tmp = self.poly_width(loc_params[self.partitions-2], loc_params[self.partitions-1], rng, deg)
tmp = self.poly_width(loc_params[self.partitions-2], loc_params[self.partitions-1], rng, degree)
clen.append(tmp)
tmp = (loc_params, clen)

View File

@ -11,17 +11,17 @@ import pandas as pd
from pyFTS.data import TAIEX, NASDAQ, SP500, artificial
datasets = {
#"TAIEX": TAIEX.get_data()[:4000],
"TAIEX": TAIEX.get_data()[:4000],
"SP500": SP500.get_data()[10000:14000],
#"NASDAQ": NASDAQ.get_data()[:4000],
"NASDAQ": NASDAQ.get_data()[:4000],
# Incremental Mean and Incremental Variance
#"IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
"IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
# Incremental Mean and Incremental Variance, lower bound equals to 0
#"IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
"IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
# Constant Mean and Incremental Variance
#"CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
"CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
# Incremental Mean and Constant Variance
#"IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
"IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
}
train_split = 2000
@ -34,9 +34,9 @@ tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
transformations = {
#'None': None,
'None': None,
'Differential(1)': tdiff,
#'BoxCox(0)': boxcox
'BoxCox(0)': boxcox
}
from pyFTS.partitioners import Grid, Util as pUtil
@ -74,14 +74,28 @@ for ds in datasets.keys():
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
'''
train_split = 200
test_split = 2000
for ds in datasets.keys():
dataset = datasets[ds]
print(ds)
for tf in transformations.keys():
for tf in ['None']: #transformations.keys():
transformation = transformations[tf]
train = dataset[:train_split]
test = dataset[train_split:test_split]
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train, npart=partitions[ds][tf], transformation=transformation)
print(fs)
#cvfts1 = cvfts.ConditionalVarianceFTS(partitioner=fs)
model = nsfts.NonStationaryFTS(partitioner=fs)
model.fit(train)
print(model)
forecasts = model.predict(test)
'''
#print(forecasts)
partitioning = partitions[ds][tf]
@ -93,3 +107,4 @@ for ds in datasets.keys():
partitions=[partitioning],
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)
'''