common.Util.plot_rules; CVFTS bugfixes and other nonstationary sets improvements

This commit is contained in:
Petrônio Cândido 2018-05-22 11:39:17 -03:00
parent aa45d7e38e
commit b058ed5daa
10 changed files with 169 additions and 87 deletions

View File

@ -22,7 +22,7 @@ class FLR(object):
self.RHS = RHS self.RHS = RHS
def __str__(self): def __str__(self):
return self.LHS + " -> " + self.RHS return str(self.LHS) + " -> " + str(self.RHS)
class IndexedFLR(FLR): class IndexedFLR(FLR):

View File

@ -59,8 +59,9 @@ class FuzzySet(object):
def set_ordered(fuzzySets): def set_ordered(fuzzySets):
tmp1 = [fuzzySets[k] for k in fuzzySets.keys()] if len(fuzzySets) > 0:
return [k.name for k in sorted(tmp1, key=lambda x: x.centroid)] tmp1 = [fuzzySets[k] for k in fuzzySets.keys()]
return [k.name for k in sorted(tmp1, key=lambda x: x.centroid)]
def fuzzyfy_instance(inst, fuzzySets, ordered_sets=None): def fuzzyfy_instance(inst, fuzzySets, ordered_sets=None):

View File

@ -8,6 +8,43 @@ import dill
import numpy as np import numpy as np
def plot_rules(model, size=[5, 5], axis=None):
if axis is None:
fig, axis = plt.subplots(nrows=1, ncols=1, figsize=size)
for ct, key in enumerate(model.partitioner.ordered_sets):
fs = model.sets[key]
axis.plot([0, 1, 0], fs.parameters, label=fs.name)
axis.axhline(fs.centroid, c="lightgray", alpha=0.5)
axis.set_xlim([0, len(model.partitioner.ordered_sets)])
axis.set_xticks(range(0,len(model.partitioner.ordered_sets)))
tmp = ['']
tmp.extend(model.partitioner.ordered_sets)
axis.set_xticklabels(tmp)
axis.set_ylim([model.partitioner.min, model.partitioner.max])
axis.set_yticks([model.sets[k].centroid for k in model.partitioner.ordered_sets])
axis.set_yticklabels([str(round(model.sets[k].centroid,1)) + " - " + k
for k in model.partitioner.ordered_sets])
if not model.is_high_order:
for ct, key in enumerate(model.partitioner.ordered_sets):
if key in model.flrgs:
flrg = model.flrgs[key]
orig = model.sets[key].centroid
axis.plot([ct+1],[orig],'o')
for rhs in flrg.RHS:
dest = model.sets[rhs].centroid
axis.arrow(ct+1.1, orig, 0.8, dest - orig, #length_includes_head=True,
head_width=0.1, head_length=0.1, shape='full', overhang=0,
fc='k', ec='k')
plt.tight_layout()
plt.show()
print("fim")
current_milli_time = lambda: int(round(time.time() * 1000)) current_milli_time = lambda: int(round(time.time() * 1000))

View File

@ -38,6 +38,7 @@ class FTS(object):
self.auto_update = False self.auto_update = False
self.benchmark_only = False self.benchmark_only = False
self.indexer = None self.indexer = None
self.uod_clip = kwargs.get("uod_clip", True)
def fuzzy(self, data): def fuzzy(self, data):
""" """
@ -75,7 +76,8 @@ class FTS(object):
else: else:
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
ndata = np.clip(ndata, self.original_min, self.original_max) if self.uod_clip:
ndata = np.clip(ndata, self.original_min, self.original_max)
if 'distributed' in kwargs: if 'distributed' in kwargs:
distributed = kwargs.pop('distributed') distributed = kwargs.pop('distributed')

View File

@ -56,11 +56,12 @@ class FuzzySet(FS.FuzzySet):
def perform_location(self, t, param): def perform_location(self, t, param):
if self.location is None: if self.location is None:
return param inc = t
else:
l = len(self.location) l = len(self.location)
inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)]) inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)])
if self.mf == Membership.gaussmf: if self.mf == Membership.gaussmf:
# changes only the mean parameter # changes only the mean parameter
@ -76,11 +77,12 @@ class FuzzySet(FS.FuzzySet):
def perform_width(self, t, param): def perform_width(self, t, param):
if self.width is None: if self.width is None:
return param inc = t
else:
l = len(self.width) l = len(self.width)
inc = sum([self.width[k](t + self.width_roots[k], self.width_params[k]) for k in np.arange(0, l)]) inc = sum([self.width[k](t + self.width_roots[k], self.width_params[k]) for k in np.arange(0, l)])
if self.mf == Membership.gaussmf: if self.mf == Membership.gaussmf:
# changes only the variance parameter # changes only the variance parameter

View File

@ -5,26 +5,20 @@ from pyFTS.common import FLR
class ConditionalVarianceFTS(chen.ConventionalFTS): class ConditionalVarianceFTS(chen.ConventionalFTS):
def __init__(self, name, **kwargs): def __init__(self, **kwargs):
super(ConditionalVarianceFTS, self).__init__("CVFTS " + name, **kwargs) super(ConditionalVarianceFTS, self).__init__(**kwargs)
self.name = "Conditional Variance FTS" self.name = "Conditional Variance FTS"
self.shortname = "CVFTS "
self.detail = "" self.detail = ""
self.flrgs = {} self.flrgs = {}
#self.append_transformation(Transformations.Differential(1)) if self.partitioner is not None:
if self.partitioner is None:
self.min_tx = None
self.max_tx = None
else:
self.min_tx = self.partitioner.min
self.max_tx = self.partitioner.max
self.append_transformation(self.partitioner.transformation) self.append_transformation(self.partitioner.transformation)
self.min_stack = [0,0,0] self.min_stack = [0,0,0]
self.max_stack = [0,0,0] self.max_stack = [0,0,0]
self.uod_clip = False
def train(self, ndata, **kwargs): def train(self, ndata, **kwargs):
self.min_tx = min(ndata)
self.max_tx = max(ndata)
tmpdata = common.fuzzySeries(ndata, self.sets, self.partitioner.ordered_sets, method='fuzzy', const_t=0) tmpdata = common.fuzzySeries(ndata, self.sets, self.partitioner.ordered_sets, method='fuzzy', const_t=0)
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)
@ -44,10 +38,10 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
def perturbation_factors(self, data): def perturbation_factors(self, data):
_max = 0 _max = 0
_min = 0 _min = 0
if data < self.min_tx: if data < self.original_min:
_min = data - self.min_tx if data < 0 else self.min_tx - data _min = data - self.original_min if data < 0 else self.original_min - data
elif data > self.max_tx: elif data > self.original_max:
_max = data - self.max_tx if data > 0 else self.max_tx - data _max = data - self.original_max if data > 0 else self.original_max - data
self.min_stack.pop(2) self.min_stack.pop(2)
self.min_stack.insert(0,_min) self.min_stack.insert(0,_min)
_min = min(self.min_stack) _min = min(self.min_stack)
@ -96,21 +90,22 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
if len(affected_sets) == 1: if len(affected_sets) == 1:
ix = affected_sets[0][0] ix = affected_sets[0][0]
aset = self.sets[ix] aset = self.partitioner.ordered_sets[ix]
if aset.name in self.flrgs: if aset in self.flrgs:
tmp.append(self.flrgs[aset.name].get_midpoint(perturb[ix])) tmp.append(self.flrgs[aset].get_midpoint(perturb[ix]))
else: else:
print('naive') fuzzy_set = self.sets[aset]
tmp.append(aset.get_midpoint(perturb[ix])) tmp.append(fuzzy_set.get_midpoint(perturb[ix]))
else: else:
for aset in affected_sets: for aset in affected_sets:
ix = aset[0] ix = aset[0]
fs = self.sets[ix] fs = self.partitioner.ordered_sets[ix]
tdisp = perturb[ix] tdisp = perturb[ix]
if fs.name in self.flrgs: if fs in self.flrgs:
tmp.append(self.flrgs[fs.name].get_midpoint(tdisp) * aset[1]) tmp.append(self.flrgs[fs].get_midpoint(tdisp) * aset[1])
else: else:
tmp.append(fs.get_midpoint(tdisp) * aset[1]) fuzzy_set = self.sets[fs]
tmp.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
pto = sum(tmp) pto = sum(tmp)
@ -137,24 +132,26 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
if len(affected_sets) == 1: if len(affected_sets) == 1:
ix = affected_sets[0][0] ix = affected_sets[0][0]
aset = self.sets[ix] aset = self.partitioner.ordered_sets[ix]
if aset.name in self.flrgs: if aset in self.flrgs:
lower.append(self.flrgs[aset.name].get_lower(perturb[ix])) lower.append(self.flrgs[aset].get_lower(perturb[ix]))
upper.append(self.flrgs[aset.name].get_upper(perturb[ix])) upper.append(self.flrgs[aset].get_upper(perturb[ix]))
else: else:
lower.append(aset.get_lower(perturb[ix])) fuzzy_set = self.sets[aset]
upper.append(aset.get_upper(perturb[ix])) lower.append(fuzzy_set.get_lower(perturb[ix]))
upper.append(fuzzy_set.get_upper(perturb[ix]))
else: else:
for aset in affected_sets: for aset in affected_sets:
ix = aset[0] ix = aset[0]
fs = self.sets[ix] fs = self.partitioner.ordered_sets[ix]
tdisp = perturb[ix] tdisp = perturb[ix]
if fs.name in self.flrgs: if fs in self.flrgs:
lower.append(self.flrgs[fs.name].get_lower(tdisp) * aset[1]) lower.append(self.flrgs[fs].get_lower(tdisp) * aset[1])
upper.append(self.flrgs[fs.name].get_upper(tdisp) * aset[1]) upper.append(self.flrgs[fs].get_upper(tdisp) * aset[1])
else: else:
lower.append(fs.get_lower(tdisp) * aset[1]) fuzzy_set = self.sets[fs]
upper.append(fs.get_upper(tdisp) * aset[1]) lower.append(fuzzy_set.get_lower(tdisp) * aset[1])
upper.append(fuzzy_set.get_upper(tdisp) * aset[1])
itvl = [sum(lower), sum(upper)] itvl = [sum(lower), sum(upper)]

View File

@ -109,22 +109,26 @@ class PolynomialNonStationaryPartitioner(partitioner.Partitioner):
pass pass
class ConstantNonStationaryPartitioner(partitioner.Partitioner): class SimpleNonStationaryPartitioner(partitioner.Partitioner):
""" """
Non Stationary Universe of Discourse Partitioner Non Stationary Universe of Discourse Partitioner
""" """
def __init__(self, data, part, **kwargs): def __init__(self, data, part, **kwargs):
"""""" """"""
super(ConstantNonStationaryPartitioner, self).__init__(name=part.name, data=data, npart=part.partitions, super(SimpleNonStationaryPartitioner, self).__init__(name=part.name, data=data, npart=part.partitions,
func=part.membership_function, names=part.setnames, func=part.membership_function, names=part.setnames,
prefix=part.prefix, transformation=part.transformation, prefix=part.prefix, transformation=part.transformation,
indexer=part.indexer) indexer=part.indexer)#, preprocess=False)
self.sets = {}
for key in part.sets.keys(): for key in part.sets.keys():
set = part.sets[key] set = part.sets[key]
tmp = common.FuzzySet(set.name, set.mf, set.parameters, **kwargs) tmp = common.FuzzySet(set.name, set.mf, set.parameters, **kwargs)
tmp.centroid = set.centroid
self.sets[key] =tmp self.sets[key] =tmp
self.ordered_sets = stationary_fs.set_ordered(self.sets)
def build(self, data):
return {}

View File

@ -54,23 +54,23 @@ def plot_sets(partitioner, start=0, end=10, step=1, tam=[5, 5], colors=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_sets_conditional(model, data, start=0, end=10, step=1, tam=[5, 5], colors=None, def plot_sets_conditional(model, data, step=1, size=[5, 5], colors=None,
save=False, file=None, axes=None): save=False, file=None, axes=None):
range = np.arange(0, len(data), step)
range = np.arange(start,end,step)
ticks = [] ticks = []
if axes is None: if axes is None:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam) fig, axes = plt.subplots(nrows=1, ncols=1, figsize=size)
for ct, key in enumerate(model.partitioner.ordered_sets): for t in range:
set = model.partitioner.sets[key] perturb = model.perturbation_factors(data[t])
for t in range:
tdisp = model.perturbation_factors(data[t]) for ct, key in enumerate(model.partitioner.ordered_sets):
set.perturbate_parameters(tdisp[ct]) set = model.partitioner.sets[key]
param = set.perturbated_parameters[str(tdisp[ct])] set.perturbate_parameters(perturb[ct])
param = set.perturbated_parameters[str(perturb[ct])]
if set.mf == Membership.trimf: if set.mf == Membership.trimf:
if t == start: if t == 0:
line = axes.plot([t, t+1, t], param, label=set.name) line = axes.plot([t, t+1, t], param, label=set.name)
set.metadata['color'] = line[0].get_color() set.metadata['color'] = line[0].get_color()
else: else:
@ -86,7 +86,7 @@ def plot_sets_conditional(model, data, start=0, end=10, step=1, tam=[5, 5], colo
lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
if data is not None: if data is not None:
axes.plot(np.arange(start, start + len(data), 1), data,c="black") axes.plot(np.arange(0, len(data), 1), data,c="black")
plt.tight_layout() plt.tight_layout()

View File

@ -13,20 +13,27 @@ tdiff = Transformations.Differential(1)
from pyFTS.data import TAIEX, SP500, NASDAQ from pyFTS.data import TAIEX, SP500, NASDAQ
#dataset = TAIEX.get_data() dataset = TAIEX.get_data()
dataset = SP500.get_data()[11500:16000] #dataset = SP500.get_data()[11500:16000]
#dataset = NASDAQ.get_data() #dataset = NASDAQ.get_data()
#print(len(dataset)) #print(len(dataset))
'''
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff) partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
'''
from pyFTS.common import Util as cUtil
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive
from pyFTS.models import pwfts, song, chen, ifts, hofts
from pyFTS.models import pwfts, song, ifts, hofts
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
model = chen.ConventionalFTS(partitioner=partitioner)
model.append_transformation(tdiff)
model.fit(dataset[:800])
cUtil.plot_rules(model)
''' '''
model = knn.KNearestNeighbors(order=3) model = knn.KNearestNeighbors(order=3)
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner) #model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
@ -78,17 +85,16 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
#for tmp2 in tmp: #for tmp2 in tmp:
# print(tmp2) # print(tmp2)
''' '''
#''' '''
types = ['point','interval','distribution'] types = ['point','interval','distribution']
benchmark_methods=[[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)]] benchmark_methods=[[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)]]
'''
benchmark_methods=[ benchmark_methods=[
[arima.ARIMA for k in range(4)] + [naive.Naive], [arima.ARIMA for k in range(4)] + [naive.Naive],
[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)], [arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)],
[arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)] [arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)]
+ [knn.KNearestNeighbors for k in range(3)] + [knn.KNearestNeighbors for k in range(3)]
]''' ]
benchmark_methods_parameters= [ benchmark_methods_parameters= [
[ [
{'order': (1, 0, 0), 'alpha': .05}, {'order': (1, 0, 0), 'alpha': .05},
@ -105,7 +111,7 @@ benchmark_methods_parameters= [
{'order': 2, 'alpha': .25} {'order': 2, 'alpha': .25}
] ]
] ]
'''benchmark_methods_parameters= [ benchmark_methods_parameters= [
[ [
{'order': (1, 0, 0)}, {'order': (1, 0, 0)},
{'order': (1, 0, 1)}, {'order': (1, 0, 1)},
@ -134,7 +140,7 @@ benchmark_methods_parameters= [
{'order': 2, 'dist': True}, {'order': 2, 'dist': True},
{'order': 1}, {'order': 2}, {'order': 3}, {'order': 1}, {'order': 2}, {'order': 3},
] ]
]''' ]
dataset_name = "SP500" dataset_name = "SP500"
tag = "ahead2" tag = "ahead2"
@ -169,7 +175,7 @@ for ct, type in enumerate(types):
file="benchmarks.db", dataset=dataset_name, tag=tag) file="benchmarks.db", dataset=dataset_name, tag=tag)
#''' '''
''' '''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat)) print(bUtil.analytic_tabular_dataframe(dat))

View File

@ -1,13 +1,14 @@
import os import os
import numpy as np import numpy as np
from pyFTS.common import Membership, Transformations from pyFTS.common import Membership, Transformations
from pyFTS.models.nonstationary import common, perturbation, partitioners, util, honsfts, cvfts from pyFTS.models.nonstationary import common, perturbation, partitioners, util
from pyFTS.models.nonstationary import nsfts from pyFTS.models.nonstationary import nsfts, cvfts
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import Util as cUtil from pyFTS.common import Util as cUtil
import pandas as pd import pandas as pd
'''
from pyFTS.data import artificial from pyFTS.data import artificial
lmv1 = artificial.generate_gaussian_linear(1,0.2,0.2,0.05) lmv1 = artificial.generate_gaussian_linear(1,0.2,0.2,0.05)
@ -32,3 +33,35 @@ print(nsfts1.predict(test1))
print(nsfts1) print(nsfts1)
util.plot_sets(fs1, tam=[10, 5], start=0, end=100, step=2, data=lmv1[:100], window_size=35) util.plot_sets(fs1, tam=[10, 5], start=0, end=100, step=2, data=lmv1[:100], window_size=35)
'''
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
from pyFTS.common import Util
from pyFTS.data import TAIEX
taiex = TAIEX.get_data()
taiex_diff = tdiff.apply(taiex)
train = taiex_diff[:600]
test = taiex_diff[600:1500]
fs_tmp = Grid.GridPartitioner(data=train, npart=20) #, transformation=tdiff)
fs = partitioners.SimpleNonStationaryPartitioner(train, fs_tmp)
print(fs)
model = cvfts.ConditionalVarianceFTS(partitioner=fs)
model.fit(train)
print(model)
#tmpp4 = model.predict(test, type='point')
tmp = model.predict(test, type='interval')
#util.plot_sets_conditional(model, test, step=1, tam=[10, 5])