Optimizations and refactorings on hofts and pwfts; fuzzyfy function on Partitioner; Improvements on cmvfts to enable all models to be used with him

This commit is contained in:
Petrônio Cândido 2018-12-05 21:17:34 -02:00
parent cf24e88b8a
commit 1773499059
10 changed files with 112 additions and 70 deletions

View File

@ -106,7 +106,6 @@ def __binary_search(x, fuzzy_sets, ordered_sets):
first = midpoint + 1
def fuzzyfy(data, partitioner, **kwargs):
"""
A general method for fuzzyfication.
@ -117,7 +116,8 @@ def fuzzyfy(data, partitioner, **kwargs):
:keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership values for all fuzzy sets)
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
:returns a list with the fuzzyfied values, depending on the mode
"""
alpha_cut = kwargs.get('alpha_cut', 0.)
@ -126,11 +126,26 @@ def fuzzyfy(data, partitioner, **kwargs):
if isinstance(data, (list, np.ndarray)):
if mode == 'vector':
return fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
elif mode == 'both':
mvs = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
fs = []
for mv in mvs:
fsets = [(partitioner.ordered_sets[ix], mv[ix])
for ix in np.arange(len(mv))
if mv[ix] >= alpha_cut]
fs.append(fsets)
return fs
else:
return fuzzyfy_series(data, partitioner.sets, method, alpha_cut, partitioner.ordered_sets)
else:
if mode == 'vector':
return fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets)
elif mode == 'both':
mv = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
fsets = [(partitioner.ordered_sets[ix], mv[ix])
for ix in np.arange(len(mv))
if mv[ix] >= alpha_cut]
return fsets
else:
return get_fuzzysets(data, partitioner.sets, partitioner.ordered_sets, alpha_cut)

View File

@ -464,8 +464,12 @@ class FTS(object):
"""String representation of the model"""
tmp = self.name + ":\n"
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.sets)):
tmp = tmp + str(self.flrgs[r]) + "\n"
if self.partitioner.type == 'common':
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
else:
for r in self.model.flrgs:
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
return tmp
def __len__(self):

View File

@ -6,7 +6,8 @@ using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DO
"""
import numpy as np
from pyFTS.common import FuzzySet, FLR, fts, flrg, tree
from pyFTS.common import FuzzySet, FLR, fts, flrg
from itertools import product
class HighOrderFLRG(flrg.FLRG):
"""Conventional High Order Fuzzy Logical Relationship Group"""
@ -106,30 +107,25 @@ class HighOrderFTS(fts.FTS):
def generate_lhs_flrg(self, sample, explain=False):
nsample = [FuzzySet.fuzzyfy(k, partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
for k in sample]
return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
lags = {}
lags = []
flrgs = []
for ct, o in enumerate(self.lags):
lags[ct] = sample[o-1]
lhs = sample[o - 1]
lags.append(lhs)
if explain:
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
# Trace the possible paths
for p in root.paths():
for path in product(*lags):
flrg = HighOrderFLRG(self.order)
path = list(reversed(list(filter(None.__ne__, p))))
for lhs in path:
flrg.append_lhs(lhs)
@ -141,13 +137,12 @@ class HighOrderFTS(fts.FTS):
def generate_flrg(self, data):
l = len(data)
for k in np.arange(self.max_lag, l):
lags = {}
if self.dump: print("FLR: " + str(k))
sample = data[k - self.max_lag: k]
rhs = FuzzySet.fuzzyfy(data[k], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
rhs = self.partitioner.fuzzyfy(data[k], mode="sets", alpha_cut=self.alpha_cut)
flrgs = self.generate_lhs_flrg(sample)
@ -158,6 +153,7 @@ class HighOrderFTS(fts.FTS):
for st in rhs:
self.flrgs[flrg.get_key()].append_rhs(st)
def generate_flrg_fuzzyfied(self, data):
l = len(data)
for k in np.arange(self.max_lag, l):
@ -165,7 +161,6 @@ class HighOrderFTS(fts.FTS):
sample = data[k - self.max_lag: k]
rhs = data[k]
flrgs = self.generate_lhs_flrg_fuzzyfied(sample)
@ -245,24 +240,18 @@ class WeightedHighOrderFTS(HighOrderFTS):
self.shortname = "WHOFTS"
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
lags = {}
lags = []
flrgs = []
for ct, o in enumerate(self.lags):
lags[ct] = sample[o-1]
lags.append(sample[o-1])
if explain:
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
print("\t (Lag {}) {} \n".format(o, sample[o-1]))
# Trace the possible paths
for p in root.paths():
for path in product(*lags):
flrg = WeightedHighOrderFLRG(self.order)
path = list(reversed(list(filter(None.__ne__, p))))
for lhs in path:
flrg.append_lhs(lhs)

View File

@ -36,6 +36,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
self.shortname = "ClusteredMVFTS"
self.name = "Clustered Multivariate FTS"
self.pre_fuzzyfy = kwargs.get('pre_fuzzyfy', True)
def fuzzyfy(self,data):
ndata = []
for index, row in data.iterrows():
@ -51,28 +53,29 @@ class ClusteredMVFTS(mvfts.MVFTS):
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
if self.model.is_high_order:
self.model.order = self.model = self.fts_method(partitioner=self.cluster,
order=self.order, **self.fts_params)
self.model.order = self.order
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(data)
else:
ndata = [self.format_data(k) for k in data.to_dict('records')]
self.model.train(ndata, fuzzyfied=True)
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
self.cluster.prune()
def forecast(self, ndata, **kwargs):
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(ndata)
else:
ndata = [self.format(k) for k in ndata.to_dict('records')]
return self.model.forecast(ndata, fuzzyfied=True, **kwargs)
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
def __str__(self):
"""String representation of the model"""
tmp = self.model.shortname + ":\n"
for r in self.model.flrgs:
tmp = tmp + str(self.model.flrgs[r]) + "\n"
return tmp
return str(self.model)
def __len__(self):
"""

View File

@ -42,11 +42,16 @@ def fuzzyfy_instance(data_point, var):
return [(var.name, fs) for fs in fsets]
def fuzzyfy_instance_clustered(data_point, cluster, alpha_cut=0.0):
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
alpha_cut = kwargs.get('alpha_cut', 0.0)
mode = kwargs.get('mode', 'sets')
fsets = []
for fset in cluster.knn(data_point):
if cluster.sets[fset].membership(data_point) > alpha_cut:
if mode == 'sets':
fsets.append(fset)
elif mode =='both':
fsets.append( (fset, cluster.sets[fset].membership(data_point)) )
return fsets

View File

@ -1,5 +1,5 @@
from pyFTS.partitioners import partitioner
from pyFTS.models.multivariate.common import MultivariateFuzzySet
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
from itertools import product
from scipy.spatial import KDTree
import numpy as np
@ -104,3 +104,5 @@ class GridCluster(partitioner.Partitioner):
else:
return [self.index[k] for k in ix]
def fuzzyfy(self, data, **kwargs):
return fuzzyfy_instance_clustered(data, self, **kwargs)

View File

@ -5,9 +5,10 @@ import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, tree
from pyFTS.common import FLR, FuzzySet
from pyFTS.models import hofts, ifts
from pyFTS.probabilistic import ProbabilityDistribution
from itertools import product
class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
@ -116,33 +117,33 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
parameters = kwargs.get('parameters','fuzzy')
if parameters == 'monotonic':
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
tmpdata = self.partitioner.fuzzyfy(data, mode='sets', method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generateFLRG(flrs)
self.generate_flrg(flrs)
else:
self.generate_flrg(data)
def generate_lhs_flrg(self, sample, explain=False):
lags = {}
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
for k in sample]
return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
lags = []
flrgs = []
for ct, o in enumerate(self.lags):
lhs = FuzzySet.fuzzyfy(sample[o - 1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
lags[ct] = lhs
lhs = sample[o - 1]
lags.append( lhs )
if explain:
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
# Trace the possible paths
for p in root.paths():
for path in product(*lags):
flrg = ProbabilisticWeightedFLRG(self.order)
path = list(reversed(list(filter(None.__ne__, p))))
for lhs in path:
flrg.append_lhs(lhs)
@ -162,14 +163,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
for flrg in flrgs:
lhs_mv = flrg.get_membership(sample, self.sets)
lhs_mv = flrg.get_membership(sample, self.partitioner.sets)
if flrg.get_key() not in self.flrgs:
self.flrgs[flrg.get_key()] = flrg;
fuzzyfied = [(s, self.sets[s].membership(data[k]))
for s in self.sets.keys()
if self.sets[s].membership(data[k]) > self.alpha_cut]
fuzzyfied = self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy',
alpha_cut=self.alpha_cut)
mvs = []
for set, mv in fuzzyfied:
@ -501,23 +501,19 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
lags = {}
lags = []
# Find all bins of past distributions with probability greater than zero
for ct, d in enumerate(self.lags):
dd = ret[k - d]
vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0]
lags[ct] = sorted(vals)
lags.append( sorted(vals) )
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
# Trace all possible combinations between the bins of past distributions
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
for path in product(*lags):
# get the combined probabilities for this path

View File

@ -104,6 +104,9 @@ class Partitioner(object):
"""
return self.sets[self.ordered_sets[-1]]
def fuzzyfy(self, data, **kwargs):
return FuzzySet.fuzzyfy(data, self, **kwargs)
def plot(self, ax, rounding=0):
"""
Plot the partitioning using the Matplotlib axis ax

View File

@ -25,7 +25,7 @@ p = Grid.GridPartitioner(data=dataset, npart=20)
print(p)
model = hofts.WeightedHighOrderFTS(partitioner=p, order=2)
model = hofts.HighOrderFTS(partitioner=p, order=2)
model.fit(dataset) #[22, 22, 23, 23, 24])

View File

@ -1,11 +1,12 @@
import pandas as pd
import matplotlib.pylab as plt
from pyFTS.data import TAIEX as tx
from pyFTS.data import TAIEX, Malaysia
from pyFTS.common import Transformations
from pyFTS.benchmarks import Measures
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.common import Transformations, Util
from pyFTS.models import pwfts
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
@ -17,7 +18,7 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
'''
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
data = [[12, 100], [13, 200]]
@ -36,10 +37,34 @@ f = lambda x: x + pd.to_timedelta(1, unit='h')
for ix, row in df.iterrows():
print(row['data'])
print(f(row['data']))
'''
# Multivariate time series
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv)
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
model.append_variable(vhour)
model.append_variable(vavg)
model.target_variable = vavg
model.fit(train_mv)
print(model)
'''
train_mv = {}
test_mv = {}