From 1773499059cdcbffd4d4eb9c85584c1e37dd088c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Wed, 5 Dec 2018 21:17:34 -0200 Subject: [PATCH] Optimizations and refactorings on hofts and pwfts; fuzzyfy function on Partitioner; Improvements on cmvfts to enable all models to be used with him --- pyFTS/common/FuzzySet.py | 19 +++++++++++-- pyFTS/common/fts.py | 8 ++++-- pyFTS/models/hofts.py | 37 +++++++++--------------- pyFTS/models/multivariate/cmvfts.py | 25 ++++++++-------- pyFTS/models/multivariate/common.py | 9 ++++-- pyFTS/models/multivariate/grid.py | 4 ++- pyFTS/models/pwfts.py | 44 +++++++++++++---------------- pyFTS/partitioners/partitioner.py | 3 ++ pyFTS/tests/general.py | 2 +- pyFTS/tests/multivariate.py | 31 ++++++++++++++++++-- 10 files changed, 112 insertions(+), 70 deletions(-) diff --git a/pyFTS/common/FuzzySet.py b/pyFTS/common/FuzzySet.py index b91aeea..617b519 100644 --- a/pyFTS/common/FuzzySet.py +++ b/pyFTS/common/FuzzySet.py @@ -106,7 +106,6 @@ def __binary_search(x, fuzzy_sets, ordered_sets): first = midpoint + 1 - def fuzzyfy(data, partitioner, **kwargs): """ A general method for fuzzyfication. @@ -117,7 +116,8 @@ def fuzzyfy(data, partitioner, **kwargs): :keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets') :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership) - :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership values for all fuzzy sets) + :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership + values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) ) :returns a list with the fuzzyfied values, depending on the mode """ alpha_cut = kwargs.get('alpha_cut', 0.) @@ -126,11 +126,26 @@ def fuzzyfy(data, partitioner, **kwargs): if isinstance(data, (list, np.ndarray)): if mode == 'vector': return fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets) + elif mode == 'both': + mvs = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets) + fs = [] + for mv in mvs: + fsets = [(partitioner.ordered_sets[ix], mv[ix]) + for ix in np.arange(len(mv)) + if mv[ix] >= alpha_cut] + fs.append(fsets) + return fs else: return fuzzyfy_series(data, partitioner.sets, method, alpha_cut, partitioner.ordered_sets) else: if mode == 'vector': return fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets) + elif mode == 'both': + mv = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets) + fsets = [(partitioner.ordered_sets[ix], mv[ix]) + for ix in np.arange(len(mv)) + if mv[ix] >= alpha_cut] + return fsets else: return get_fuzzysets(data, partitioner.sets, partitioner.ordered_sets, alpha_cut) diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 15325f7..82b602b 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -464,8 +464,12 @@ class FTS(object): """String representation of the model""" tmp = self.name + ":\n" - for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.sets)): - tmp = tmp + str(self.flrgs[r]) + "\n" + if self.partitioner.type == 'common': + for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)): + tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) + else: + for r in self.model.flrgs: + tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) return tmp def __len__(self): diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index 43629c2..4b2e698 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -6,7 +6,8 @@ using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DO """ import numpy as np -from pyFTS.common import FuzzySet, FLR, fts, flrg, tree +from pyFTS.common import FuzzySet, FLR, fts, flrg +from itertools import product class HighOrderFLRG(flrg.FLRG): """Conventional High Order Fuzzy Logical Relationship Group""" @@ -106,30 +107,25 @@ class HighOrderFTS(fts.FTS): def generate_lhs_flrg(self, sample, explain=False): - nsample = [FuzzySet.fuzzyfy(k, partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) + nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut) for k in sample] return self.generate_lhs_flrg_fuzzyfied(nsample, explain) def generate_lhs_flrg_fuzzyfied(self, sample, explain=False): - lags = {} - + lags = [] flrgs = [] for ct, o in enumerate(self.lags): - lags[ct] = sample[o-1] + lhs = sample[o - 1] + lags.append(lhs) if explain: print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs)) - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) - # Trace the possible paths - for p in root.paths(): + for path in product(*lags): flrg = HighOrderFLRG(self.order) - path = list(reversed(list(filter(None.__ne__, p)))) for lhs in path: flrg.append_lhs(lhs) @@ -141,13 +137,12 @@ class HighOrderFTS(fts.FTS): def generate_flrg(self, data): l = len(data) for k in np.arange(self.max_lag, l): - lags = {} if self.dump: print("FLR: " + str(k)) sample = data[k - self.max_lag: k] - rhs = FuzzySet.fuzzyfy(data[k], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) + rhs = self.partitioner.fuzzyfy(data[k], mode="sets", alpha_cut=self.alpha_cut) flrgs = self.generate_lhs_flrg(sample) @@ -158,6 +153,7 @@ class HighOrderFTS(fts.FTS): for st in rhs: self.flrgs[flrg.get_key()].append_rhs(st) + def generate_flrg_fuzzyfied(self, data): l = len(data) for k in np.arange(self.max_lag, l): @@ -165,7 +161,6 @@ class HighOrderFTS(fts.FTS): sample = data[k - self.max_lag: k] - rhs = data[k] flrgs = self.generate_lhs_flrg_fuzzyfied(sample) @@ -245,24 +240,18 @@ class WeightedHighOrderFTS(HighOrderFTS): self.shortname = "WHOFTS" def generate_lhs_flrg_fuzzyfied(self, sample, explain=False): - lags = {} - + lags = [] flrgs = [] for ct, o in enumerate(self.lags): - lags[ct] = sample[o-1] + lags.append(sample[o-1]) if explain: - print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs)) - - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) + print("\t (Lag {}) {} \n".format(o, sample[o-1])) # Trace the possible paths - for p in root.paths(): + for path in product(*lags): flrg = WeightedHighOrderFLRG(self.order) - path = list(reversed(list(filter(None.__ne__, p)))) for lhs in path: flrg.append_lhs(lhs) diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index b5b621f..bea237a 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -36,6 +36,8 @@ class ClusteredMVFTS(mvfts.MVFTS): self.shortname = "ClusteredMVFTS" self.name = "Clustered Multivariate FTS" + self.pre_fuzzyfy = kwargs.get('pre_fuzzyfy', True) + def fuzzyfy(self,data): ndata = [] for index, row in data.iterrows(): @@ -51,28 +53,29 @@ class ClusteredMVFTS(mvfts.MVFTS): self.model = self.fts_method(partitioner=self.cluster, **self.fts_params) if self.model.is_high_order: - self.model.order = self.model = self.fts_method(partitioner=self.cluster, - order=self.order, **self.fts_params) + self.model.order = self.order - ndata = self.fuzzyfy(data) + if self.pre_fuzzyfy: + ndata = self.fuzzyfy(data) + else: + ndata = [self.format_data(k) for k in data.to_dict('records')] - self.model.train(ndata, fuzzyfied=True) + self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy) self.cluster.prune() def forecast(self, ndata, **kwargs): - ndata = self.fuzzyfy(ndata) + if self.pre_fuzzyfy: + ndata = self.fuzzyfy(ndata) + else: + ndata = [self.format(k) for k in ndata.to_dict('records')] - return self.model.forecast(ndata, fuzzyfied=True, **kwargs) + return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs) def __str__(self): """String representation of the model""" - - tmp = self.model.shortname + ":\n" - for r in self.model.flrgs: - tmp = tmp + str(self.model.flrgs[r]) + "\n" - return tmp + return str(self.model) def __len__(self): """ diff --git a/pyFTS/models/multivariate/common.py b/pyFTS/models/multivariate/common.py index e86ca67..43d3bbc 100644 --- a/pyFTS/models/multivariate/common.py +++ b/pyFTS/models/multivariate/common.py @@ -42,11 +42,16 @@ def fuzzyfy_instance(data_point, var): return [(var.name, fs) for fs in fsets] -def fuzzyfy_instance_clustered(data_point, cluster, alpha_cut=0.0): +def fuzzyfy_instance_clustered(data_point, cluster, **kwargs): + alpha_cut = kwargs.get('alpha_cut', 0.0) + mode = kwargs.get('mode', 'sets') fsets = [] for fset in cluster.knn(data_point): if cluster.sets[fset].membership(data_point) > alpha_cut: - fsets.append(fset) + if mode == 'sets': + fsets.append(fset) + elif mode =='both': + fsets.append( (fset, cluster.sets[fset].membership(data_point)) ) return fsets diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 2fcc6e8..0441581 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -1,5 +1,5 @@ from pyFTS.partitioners import partitioner -from pyFTS.models.multivariate.common import MultivariateFuzzySet +from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered from itertools import product from scipy.spatial import KDTree import numpy as np @@ -104,3 +104,5 @@ class GridCluster(partitioner.Partitioner): else: return [self.index[k] for k in ix] + def fuzzyfy(self, data, **kwargs): + return fuzzyfy_instance_clustered(data, self, **kwargs) diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 32e0fbc..6e31a41 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -5,9 +5,10 @@ import numpy as np import pandas as pd import math from operator import itemgetter -from pyFTS.common import FLR, FuzzySet, tree +from pyFTS.common import FLR, FuzzySet from pyFTS.models import hofts, ifts from pyFTS.probabilistic import ProbabilityDistribution +from itertools import product class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): @@ -116,33 +117,33 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): parameters = kwargs.get('parameters','fuzzy') if parameters == 'monotonic': - tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) + tmpdata = self.partitioner.fuzzyfy(data, mode='sets', method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) - self.generateFLRG(flrs) + self.generate_flrg(flrs) else: self.generate_flrg(data) def generate_lhs_flrg(self, sample, explain=False): - lags = {} + nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut) + for k in sample] + + return self.generate_lhs_flrg_fuzzyfied(nsample, explain) + + def generate_lhs_flrg_fuzzyfied(self, sample, explain=False): + lags = [] flrgs = [] for ct, o in enumerate(self.lags): - lhs = FuzzySet.fuzzyfy(sample[o - 1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) - - lags[ct] = lhs + lhs = sample[o - 1] + lags.append( lhs ) if explain: print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs)) - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) - # Trace the possible paths - for p in root.paths(): + for path in product(*lags): flrg = ProbabilisticWeightedFLRG(self.order) - path = list(reversed(list(filter(None.__ne__, p)))) for lhs in path: flrg.append_lhs(lhs) @@ -162,14 +163,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): for flrg in flrgs: - lhs_mv = flrg.get_membership(sample, self.sets) + lhs_mv = flrg.get_membership(sample, self.partitioner.sets) if flrg.get_key() not in self.flrgs: self.flrgs[flrg.get_key()] = flrg; - fuzzyfied = [(s, self.sets[s].membership(data[k])) - for s in self.sets.keys() - if self.sets[s].membership(data[k]) > self.alpha_cut] + fuzzyfied = self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy', + alpha_cut=self.alpha_cut) mvs = [] for set, mv in fuzzyfied: @@ -501,23 +501,19 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): for k in np.arange(self.max_lag+1, steps+self.max_lag+1): dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) - lags = {} + lags = [] # Find all bins of past distributions with probability greater than zero for ct, d in enumerate(self.lags): dd = ret[k - d] vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0] - lags[ct] = sorted(vals) + lags.append( sorted(vals) ) - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) # Trace all possible combinations between the bins of past distributions - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) + for path in product(*lags): # get the combined probabilities for this path diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index ee530a3..9379398 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -104,6 +104,9 @@ class Partitioner(object): """ return self.sets[self.ordered_sets[-1]] + def fuzzyfy(self, data, **kwargs): + return FuzzySet.fuzzyfy(data, self, **kwargs) + def plot(self, ax, rounding=0): """ Plot the partitioning using the Matplotlib axis ax diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 6c1debb..78ea4df 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -25,7 +25,7 @@ p = Grid.GridPartitioner(data=dataset, npart=20) print(p) -model = hofts.WeightedHighOrderFTS(partitioner=p, order=2) +model = hofts.HighOrderFTS(partitioner=p, order=2) model.fit(dataset) #[22, 22, 23, 23, 24]) diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index c47e07c..aff0951 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -1,11 +1,12 @@ import pandas as pd import matplotlib.pylab as plt -from pyFTS.data import TAIEX as tx +from pyFTS.data import TAIEX, Malaysia from pyFTS.common import Transformations from pyFTS.benchmarks import Measures from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.common import Transformations, Util +from pyFTS.models import pwfts from pyFTS.models.multivariate import common, variable, mvfts, wmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime @@ -17,7 +18,7 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime - +''' model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3') data = [[12, 100], [13, 200]] @@ -36,10 +37,34 @@ f = lambda x: x + pd.to_timedelta(1, unit='h') for ix, row in df.iterrows(): print(row['data']) print(f(row['data'])) - +''' # Multivariate time series +dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') + +dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') + +train_mv = dataset.iloc[:24505] +test_mv = dataset.iloc[24505:] + +sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} + +vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, + data=train_mv, partitioner_specific=sp) + +vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', + partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, + data=train_mv) + +model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS) +model.append_variable(vhour) +model.append_variable(vavg) +model.target_variable = vavg +model.fit(train_mv) + +print(model) + ''' train_mv = {} test_mv = {}