Optimizations and refactorings on hofts and pwfts; fuzzyfy function on Partitioner; Improvements on cmvfts to enable all models to be used with him
This commit is contained in:
parent
cf24e88b8a
commit
1773499059
@ -106,7 +106,6 @@ def __binary_search(x, fuzzy_sets, ordered_sets):
|
||||
first = midpoint + 1
|
||||
|
||||
|
||||
|
||||
def fuzzyfy(data, partitioner, **kwargs):
|
||||
"""
|
||||
A general method for fuzzyfication.
|
||||
@ -117,7 +116,8 @@ def fuzzyfy(data, partitioner, **kwargs):
|
||||
|
||||
:keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
|
||||
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
|
||||
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership values for all fuzzy sets)
|
||||
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
|
||||
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
|
||||
:returns a list with the fuzzyfied values, depending on the mode
|
||||
"""
|
||||
alpha_cut = kwargs.get('alpha_cut', 0.)
|
||||
@ -126,11 +126,26 @@ def fuzzyfy(data, partitioner, **kwargs):
|
||||
if isinstance(data, (list, np.ndarray)):
|
||||
if mode == 'vector':
|
||||
return fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
|
||||
elif mode == 'both':
|
||||
mvs = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
|
||||
fs = []
|
||||
for mv in mvs:
|
||||
fsets = [(partitioner.ordered_sets[ix], mv[ix])
|
||||
for ix in np.arange(len(mv))
|
||||
if mv[ix] >= alpha_cut]
|
||||
fs.append(fsets)
|
||||
return fs
|
||||
else:
|
||||
return fuzzyfy_series(data, partitioner.sets, method, alpha_cut, partitioner.ordered_sets)
|
||||
else:
|
||||
if mode == 'vector':
|
||||
return fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets)
|
||||
elif mode == 'both':
|
||||
mv = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
|
||||
fsets = [(partitioner.ordered_sets[ix], mv[ix])
|
||||
for ix in np.arange(len(mv))
|
||||
if mv[ix] >= alpha_cut]
|
||||
return fsets
|
||||
else:
|
||||
return get_fuzzysets(data, partitioner.sets, partitioner.ordered_sets, alpha_cut)
|
||||
|
||||
|
@ -464,8 +464,12 @@ class FTS(object):
|
||||
"""String representation of the model"""
|
||||
|
||||
tmp = self.name + ":\n"
|
||||
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.sets)):
|
||||
tmp = tmp + str(self.flrgs[r]) + "\n"
|
||||
if self.partitioner.type == 'common':
|
||||
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
|
||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||
else:
|
||||
for r in self.model.flrgs:
|
||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||
return tmp
|
||||
|
||||
def __len__(self):
|
||||
|
@ -6,7 +6,8 @@ using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DO
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from pyFTS.common import FuzzySet, FLR, fts, flrg, tree
|
||||
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||
from itertools import product
|
||||
|
||||
class HighOrderFLRG(flrg.FLRG):
|
||||
"""Conventional High Order Fuzzy Logical Relationship Group"""
|
||||
@ -106,30 +107,25 @@ class HighOrderFTS(fts.FTS):
|
||||
|
||||
def generate_lhs_flrg(self, sample, explain=False):
|
||||
|
||||
nsample = [FuzzySet.fuzzyfy(k, partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
|
||||
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
|
||||
for k in sample]
|
||||
|
||||
return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
|
||||
|
||||
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
|
||||
lags = {}
|
||||
|
||||
lags = []
|
||||
flrgs = []
|
||||
|
||||
for ct, o in enumerate(self.lags):
|
||||
lags[ct] = sample[o-1]
|
||||
lhs = sample[o - 1]
|
||||
lags.append(lhs)
|
||||
|
||||
if explain:
|
||||
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
|
||||
# Trace the possible paths
|
||||
for p in root.paths():
|
||||
for path in product(*lags):
|
||||
flrg = HighOrderFLRG(self.order)
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
|
||||
for lhs in path:
|
||||
flrg.append_lhs(lhs)
|
||||
@ -141,13 +137,12 @@ class HighOrderFTS(fts.FTS):
|
||||
def generate_flrg(self, data):
|
||||
l = len(data)
|
||||
for k in np.arange(self.max_lag, l):
|
||||
lags = {}
|
||||
|
||||
if self.dump: print("FLR: " + str(k))
|
||||
|
||||
sample = data[k - self.max_lag: k]
|
||||
|
||||
rhs = FuzzySet.fuzzyfy(data[k], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
|
||||
rhs = self.partitioner.fuzzyfy(data[k], mode="sets", alpha_cut=self.alpha_cut)
|
||||
|
||||
flrgs = self.generate_lhs_flrg(sample)
|
||||
|
||||
@ -158,6 +153,7 @@ class HighOrderFTS(fts.FTS):
|
||||
for st in rhs:
|
||||
self.flrgs[flrg.get_key()].append_rhs(st)
|
||||
|
||||
|
||||
def generate_flrg_fuzzyfied(self, data):
|
||||
l = len(data)
|
||||
for k in np.arange(self.max_lag, l):
|
||||
@ -165,7 +161,6 @@ class HighOrderFTS(fts.FTS):
|
||||
|
||||
sample = data[k - self.max_lag: k]
|
||||
|
||||
|
||||
rhs = data[k]
|
||||
|
||||
flrgs = self.generate_lhs_flrg_fuzzyfied(sample)
|
||||
@ -245,24 +240,18 @@ class WeightedHighOrderFTS(HighOrderFTS):
|
||||
self.shortname = "WHOFTS"
|
||||
|
||||
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
|
||||
lags = {}
|
||||
|
||||
lags = []
|
||||
flrgs = []
|
||||
|
||||
for ct, o in enumerate(self.lags):
|
||||
lags[ct] = sample[o-1]
|
||||
lags.append(sample[o-1])
|
||||
|
||||
if explain:
|
||||
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
print("\t (Lag {}) {} \n".format(o, sample[o-1]))
|
||||
|
||||
# Trace the possible paths
|
||||
for p in root.paths():
|
||||
for path in product(*lags):
|
||||
flrg = WeightedHighOrderFLRG(self.order)
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
|
||||
for lhs in path:
|
||||
flrg.append_lhs(lhs)
|
||||
|
@ -36,6 +36,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
self.shortname = "ClusteredMVFTS"
|
||||
self.name = "Clustered Multivariate FTS"
|
||||
|
||||
self.pre_fuzzyfy = kwargs.get('pre_fuzzyfy', True)
|
||||
|
||||
def fuzzyfy(self,data):
|
||||
ndata = []
|
||||
for index, row in data.iterrows():
|
||||
@ -51,28 +53,29 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
||||
if self.model.is_high_order:
|
||||
self.model.order = self.model = self.fts_method(partitioner=self.cluster,
|
||||
order=self.order, **self.fts_params)
|
||||
self.model.order = self.order
|
||||
|
||||
ndata = self.fuzzyfy(data)
|
||||
if self.pre_fuzzyfy:
|
||||
ndata = self.fuzzyfy(data)
|
||||
else:
|
||||
ndata = [self.format_data(k) for k in data.to_dict('records')]
|
||||
|
||||
self.model.train(ndata, fuzzyfied=True)
|
||||
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
||||
|
||||
self.cluster.prune()
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
ndata = self.fuzzyfy(ndata)
|
||||
if self.pre_fuzzyfy:
|
||||
ndata = self.fuzzyfy(ndata)
|
||||
else:
|
||||
ndata = [self.format(k) for k in ndata.to_dict('records')]
|
||||
|
||||
return self.model.forecast(ndata, fuzzyfied=True, **kwargs)
|
||||
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||
|
||||
def __str__(self):
|
||||
"""String representation of the model"""
|
||||
|
||||
tmp = self.model.shortname + ":\n"
|
||||
for r in self.model.flrgs:
|
||||
tmp = tmp + str(self.model.flrgs[r]) + "\n"
|
||||
return tmp
|
||||
return str(self.model)
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
|
@ -42,11 +42,16 @@ def fuzzyfy_instance(data_point, var):
|
||||
return [(var.name, fs) for fs in fsets]
|
||||
|
||||
|
||||
def fuzzyfy_instance_clustered(data_point, cluster, alpha_cut=0.0):
|
||||
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
|
||||
alpha_cut = kwargs.get('alpha_cut', 0.0)
|
||||
mode = kwargs.get('mode', 'sets')
|
||||
fsets = []
|
||||
for fset in cluster.knn(data_point):
|
||||
if cluster.sets[fset].membership(data_point) > alpha_cut:
|
||||
fsets.append(fset)
|
||||
if mode == 'sets':
|
||||
fsets.append(fset)
|
||||
elif mode =='both':
|
||||
fsets.append( (fset, cluster.sets[fset].membership(data_point)) )
|
||||
return fsets
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
from pyFTS.partitioners import partitioner
|
||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet
|
||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
||||
from itertools import product
|
||||
from scipy.spatial import KDTree
|
||||
import numpy as np
|
||||
@ -104,3 +104,5 @@ class GridCluster(partitioner.Partitioner):
|
||||
else:
|
||||
return [self.index[k] for k in ix]
|
||||
|
||||
def fuzzyfy(self, data, **kwargs):
|
||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||
|
@ -5,9 +5,10 @@ import numpy as np
|
||||
import pandas as pd
|
||||
import math
|
||||
from operator import itemgetter
|
||||
from pyFTS.common import FLR, FuzzySet, tree
|
||||
from pyFTS.common import FLR, FuzzySet
|
||||
from pyFTS.models import hofts, ifts
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
from itertools import product
|
||||
|
||||
|
||||
class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
||||
@ -116,33 +117,33 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
parameters = kwargs.get('parameters','fuzzy')
|
||||
|
||||
if parameters == 'monotonic':
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
|
||||
tmpdata = self.partitioner.fuzzyfy(data, mode='sets', method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generateFLRG(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
else:
|
||||
self.generate_flrg(data)
|
||||
|
||||
def generate_lhs_flrg(self, sample, explain=False):
|
||||
lags = {}
|
||||
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
|
||||
for k in sample]
|
||||
|
||||
return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
|
||||
|
||||
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
|
||||
lags = []
|
||||
|
||||
flrgs = []
|
||||
|
||||
for ct, o in enumerate(self.lags):
|
||||
lhs = FuzzySet.fuzzyfy(sample[o - 1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut)
|
||||
|
||||
lags[ct] = lhs
|
||||
lhs = sample[o - 1]
|
||||
lags.append( lhs )
|
||||
|
||||
if explain:
|
||||
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
|
||||
# Trace the possible paths
|
||||
for p in root.paths():
|
||||
for path in product(*lags):
|
||||
flrg = ProbabilisticWeightedFLRG(self.order)
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
|
||||
for lhs in path:
|
||||
flrg.append_lhs(lhs)
|
||||
@ -162,14 +163,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
for flrg in flrgs:
|
||||
|
||||
lhs_mv = flrg.get_membership(sample, self.sets)
|
||||
lhs_mv = flrg.get_membership(sample, self.partitioner.sets)
|
||||
|
||||
if flrg.get_key() not in self.flrgs:
|
||||
self.flrgs[flrg.get_key()] = flrg;
|
||||
|
||||
fuzzyfied = [(s, self.sets[s].membership(data[k]))
|
||||
for s in self.sets.keys()
|
||||
if self.sets[s].membership(data[k]) > self.alpha_cut]
|
||||
fuzzyfied = self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy',
|
||||
alpha_cut=self.alpha_cut)
|
||||
|
||||
mvs = []
|
||||
for set, mv in fuzzyfied:
|
||||
@ -501,23 +501,19 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
|
||||
lags = {}
|
||||
lags = []
|
||||
|
||||
# Find all bins of past distributions with probability greater than zero
|
||||
|
||||
for ct, d in enumerate(self.lags):
|
||||
dd = ret[k - d]
|
||||
vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0]
|
||||
lags[ct] = sorted(vals)
|
||||
lags.append( sorted(vals) )
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
|
||||
# Trace all possible combinations between the bins of past distributions
|
||||
|
||||
for p in root.paths():
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
for path in product(*lags):
|
||||
|
||||
# get the combined probabilities for this path
|
||||
|
||||
|
@ -104,6 +104,9 @@ class Partitioner(object):
|
||||
"""
|
||||
return self.sets[self.ordered_sets[-1]]
|
||||
|
||||
def fuzzyfy(self, data, **kwargs):
|
||||
return FuzzySet.fuzzyfy(data, self, **kwargs)
|
||||
|
||||
def plot(self, ax, rounding=0):
|
||||
"""
|
||||
Plot the partitioning using the Matplotlib axis ax
|
||||
|
@ -25,7 +25,7 @@ p = Grid.GridPartitioner(data=dataset, npart=20)
|
||||
|
||||
print(p)
|
||||
|
||||
model = hofts.WeightedHighOrderFTS(partitioner=p, order=2)
|
||||
model = hofts.HighOrderFTS(partitioner=p, order=2)
|
||||
|
||||
model.fit(dataset) #[22, 22, 23, 23, 24])
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pylab as plt
|
||||
from pyFTS.data import TAIEX as tx
|
||||
from pyFTS.data import TAIEX, Malaysia
|
||||
from pyFTS.common import Transformations
|
||||
|
||||
from pyFTS.benchmarks import Measures
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
from pyFTS.common import Transformations, Util
|
||||
from pyFTS.models import pwfts
|
||||
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
@ -17,7 +18,7 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
|
||||
'''
|
||||
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
|
||||
|
||||
data = [[12, 100], [13, 200]]
|
||||
@ -36,10 +37,34 @@ f = lambda x: x + pd.to_timedelta(1, unit='h')
|
||||
for ix, row in df.iterrows():
|
||||
print(row['data'])
|
||||
print(f(row['data']))
|
||||
|
||||
'''
|
||||
|
||||
# Multivariate time series
|
||||
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
|
||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
||||
data=train_mv)
|
||||
|
||||
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
|
||||
model.append_variable(vhour)
|
||||
model.append_variable(vavg)
|
||||
model.target_variable = vavg
|
||||
model.fit(train_mv)
|
||||
|
||||
print(model)
|
||||
|
||||
'''
|
||||
train_mv = {}
|
||||
test_mv = {}
|
||||
|
Loading…
Reference in New Issue
Block a user