From 0e4f3c536b309b654cc6ee705cb0da1af016460a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Wed, 10 Apr 2019 22:27:37 -0300 Subject: [PATCH] Multivariate grid.IncrementalGridCluster and improvements on CMVFTS --- pyFTS/models/multivariate/cmvfts.py | 5 +-- pyFTS/models/multivariate/grid.py | 49 ++++++++++++++++++++++++++ pyFTS/models/seasonal/common.py | 6 ++-- pyFTS/models/seasonal/partitioner.py | 52 ++++++++++++++++++++++++++-- pyFTS/tests/multivariate.py | 11 ++++-- 5 files changed, 113 insertions(+), 10 deletions(-) diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index b5ba0de..c91155a 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -38,8 +38,9 @@ class ClusteredMVFTS(mvfts.MVFTS): ndata = [] for index, row in data.iterrows(): data_point = self.format_data(row) - ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, - alpha_cut=self.alpha_cut)) + #ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, + # alpha_cut=self.alpha_cut)) + ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets')) return ndata diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 46aa13c..75d110c 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -31,3 +31,52 @@ class GridCluster(partitioner.MultivariatePartitioner): self.build_index() + +class IncrementalGridCluster(partitioner.MultivariatePartitioner): + def __init__(self, **kwargs): + super(IncrementalGridCluster, self).__init__(**kwargs) + self.name="IncrementalGridCluster" + self.build(None) + + def fuzzyfy(self, data, **kwargs): + + if isinstance(data, pd.DataFrame): + ret = [] + for inst in data.iterrows(): + mv = self.fuzzyfy(inst, **kwargs) + ret.append(mv) + return ret + + alpha_cut = kwargs.get('alpha_cut', 0.) + mode = kwargs.get('mode', 'sets') + + fsets = {} + ret = [] + for var in self.explanatory_variables: + fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets') + + fset = [val for key, val in fsets.items()] + + for p in product(*fset): + key = ''.join(p) + if key not in self.sets: + mvfset = MultivariateFuzzySet(target_variable=self.target_variable) + for ct, fs in enumerate(p): + mvfset.append_set(self.explanatory_variables[ct].name, + self.explanatory_variables[ct].partitioner[fs]) + mvfset.name = key + self.sets[key] = mvfset + + if mode=='sets': + ret.append(key) + elif mode=='vector': + raise NotImplementedError() + elif mode == 'both': + mvfset = self.sets[key] + ret.append((key, mvfset.membership(data))) + + return ret + + def prune(self): + pass + diff --git a/pyFTS/models/seasonal/common.py b/pyFTS/models/seasonal/common.py index 2038ad5..ec64296 100644 --- a/pyFTS/models/seasonal/common.py +++ b/pyFTS/models/seasonal/common.py @@ -19,9 +19,9 @@ class DateTime(Enum): day_of_month = 30 day_of_year = 364 day_of_week = 7 - hour = 6 - minute = 7 - second = 8 + hour = 24 + minute = 60 + second = 60 hour_of_day = 24 hour_of_week = 168 hour_of_month = 744 diff --git a/pyFTS/models/seasonal/partitioner.py b/pyFTS/models/seasonal/partitioner.py index 11f98a4..63c02f1 100644 --- a/pyFTS/models/seasonal/partitioner.py +++ b/pyFTS/models/seasonal/partitioner.py @@ -4,6 +4,7 @@ from pyFTS.partitioners import partitioner, Grid from pyFTS.models.seasonal.common import DateTime, FuzzySet, strip_datepart import numpy as np import matplotlib.pylab as plt +from scipy.spatial import KDTree class TimeGridPartitioner(partitioner.Partitioner): @@ -56,6 +57,8 @@ class TimeGridPartitioner(partitioner.Partitioner): partlen = dlen / self.partitions elif self.season == DateTime.day_of_week: self.min, self.max, partlen, pl2 = 0, 7, 1, 1 + elif self.season == DateTime.minute: + self.min, self.max, partlen, pl2 = 0, 60, 1, 1 elif self.season == DateTime.hour: self.min, self.max, partlen, pl2 = 0, 24, 1, 1 elif self.season == DateTime.month: @@ -77,7 +80,7 @@ class TimeGridPartitioner(partitioner.Partitioner): self.season.value + 0.0000001], self.season.value, alpha=.5, **kwargs)) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, - [c - partlen, c, c + partlen], c, + [c - 0.0000001, c, c + partlen], c, **kwargs)) tmp.centroid = c sets[set_name] = tmp @@ -88,7 +91,7 @@ class TimeGridPartitioner(partitioner.Partitioner): pl2], 0.0, alpha=.5, **kwargs)) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, - [c - partlen, c, c + partlen], c, + [c - partlen, c, c + 0.0000001], c, **kwargs)) tmp.centroid = c sets[set_name] = tmp @@ -122,6 +125,51 @@ class TimeGridPartitioner(partitioner.Partitioner): return sets + def build_index(self): + points = [] + + fset = self.sets[self.ordered_sets[0]] + points.append([fset.centroid, fset.centroid, fset.centroid]) + + for ct, key in enumerate(self.ordered_sets[1:-2]): + fset = self.sets[key] + points.append([fset.lower, fset.centroid, fset.upper]) + + fset = self.sets[self.ordered_sets[-1]] + points.append([fset.centroid, fset.centroid, fset.centroid]) + + import sys + sys.setrecursionlimit(100000) + + self.kdtree = KDTree(points) + + sys.setrecursionlimit(1000) + + def search(self, data, type='index', results=3): + ''' + Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several + overlapped fuzzy sets. + + :param data: the value to search for the nearest fuzzy sets + :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names. + :param results: the number of nearest fuzzy sets to return + :return: a list with the nearest fuzzy sets + ''' + if self.kdtree is None: + self.build_index() + + _, ix = self.kdtree.query([data, data, data], results) + + if 0 in ix: + ix[-1] = self.partitions-1 + elif self.partitions-1 in ix: + ix[-1] = 0 + + if type == 'name': + return [self.ordered_sets[k] for k in sorted(ix)] + else: + return sorted(ix) + def plot(self, ax): """ diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index ca243df..8f602f1 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -182,9 +182,14 @@ vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, data=train) -from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts +from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid + +fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) +model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, + partitioner=fs, knn=3) -model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) model.fit(train) -forecasts = model.predict(test, type='interval') \ No newline at end of file +print(fs) + +print(model)