From 4b07599c435209fe101c6aeed969b4a11ac7f1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Fri, 12 Apr 2019 11:25:13 -0300 Subject: [PATCH] Bugfixes and improvements on multivariate methods --- pyFTS/models/multivariate/cmvfts.py | 2 -- pyFTS/models/multivariate/common.py | 2 +- pyFTS/models/multivariate/grid.py | 37 +++++++++++++++++------- pyFTS/models/multivariate/mvfts.py | 1 - pyFTS/models/multivariate/partitioner.py | 36 +++++++++++++++++++---- pyFTS/models/seasonal/partitioner.py | 28 +++++++++++------- pyFTS/partitioners/partitioner.py | 5 +++- pyFTS/tests/multivariate.py | 16 ++++++---- 8 files changed, 89 insertions(+), 38 deletions(-) diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index c91155a..1f5db04 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -38,8 +38,6 @@ class ClusteredMVFTS(mvfts.MVFTS): ndata = [] for index, row in data.iterrows(): data_point = self.format_data(row) - #ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, - # alpha_cut=self.alpha_cut)) ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets')) return ndata diff --git a/pyFTS/models/multivariate/common.py b/pyFTS/models/multivariate/common.py index e2bc388..e9c4f3b 100644 --- a/pyFTS/models/multivariate/common.py +++ b/pyFTS/models/multivariate/common.py @@ -54,7 +54,7 @@ def fuzzyfy_instance_clustered(data_point, cluster, **kwargs): alpha_cut = kwargs.get('alpha_cut', 0.0) mode = kwargs.get('mode', 'sets') fsets = [] - for fset in cluster.knn(data_point): + for fset in cluster.search(data_point): if cluster.sets[fset].membership(data_point) > alpha_cut: if mode == 'sets': fsets.append(fset) diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 75d110c..6467e09 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -42,18 +42,41 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner): if isinstance(data, pd.DataFrame): ret = [] - for inst in data.iterrows(): + for index, inst in data.iterrows(): mv = self.fuzzyfy(inst, **kwargs) ret.append(mv) return ret + if self.kdtree is not None: + fsets = self.search(data, **kwargs) + else: + fsets = self.incremental_search(data, **kwargs) + + if len(fsets) == 0: + fsets = self.incremental_search(data, **kwargs) + raise Exception("{}".format(data)) + + mode = kwargs.get('mode', 'sets') + if mode == 'sets': + return fsets + elif mode == 'vector': + raise NotImplementedError() + elif mode == 'both': + ret = [] + for key in fsets: + mvfset = self.sets[key] + ret.append((key, mvfset.membership(data))) + return ret + + def incremental_search(self, data, **kwargs): alpha_cut = kwargs.get('alpha_cut', 0.) mode = kwargs.get('mode', 'sets') fsets = {} ret = [] for var in self.explanatory_variables: - fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets') + ac = alpha_cut if alpha_cut > 0. else var.alpha_cut + fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets', alpha_cut=ac) fset = [val for key, val in fsets.items()] @@ -66,17 +89,11 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner): self.explanatory_variables[ct].partitioner[fs]) mvfset.name = key self.sets[key] = mvfset + ret.append(key) - if mode=='sets': - ret.append(key) - elif mode=='vector': - raise NotImplementedError() - elif mode == 'both': - mvfset = self.sets[key] - ret.append((key, mvfset.membership(data))) return ret def prune(self): - pass + self.build_index() diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 2fc85fb..bbbb8a9 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -45,7 +45,6 @@ class MVFTS(fts.FTS): def format_data(self, data): ndata = {} for var in self.explanatory_variables: - #ndata[var.name] = data[var.data_label] ndata[var.name] = var.partitioner.extractor(data[var.data_label]) return ndata diff --git a/pyFTS/models/multivariate/partitioner.py b/pyFTS/models/multivariate/partitioner.py index 72f92dc..d244929 100644 --- a/pyFTS/models/multivariate/partitioner.py +++ b/pyFTS/models/multivariate/partitioner.py @@ -27,6 +27,13 @@ class MultivariatePartitioner(partitioner.Partitioner): data = kwargs.get('data', None) self.build(data) + def format_data(self, data): + ndata = {} + for var in self.explanatory_variables: + ndata[var.name] = var.partitioner.extractor(data[var.data_label]) + + return ndata + def build(self, data): pass @@ -45,10 +52,22 @@ class MultivariatePartitioner(partitioner.Partitioner): self.build_index() - def knn(self, data): - tmp = [data[k.name] - for k in self.explanatory_variables] - tmp, ix = self.kdtree.query(tmp, self.neighbors) + def search(self, data, **kwargs): + ''' + Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several + overlapped fuzzy sets. + + :param data: the value to search for the nearest fuzzy sets + :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names. + :return: a list with the nearest fuzzy sets + ''' + if self.kdtree is None: + self.build_index() + + type = kwargs.get('type', 'index') + + ndata = [data[k.name] for k in self.explanatory_variables] + _, ix = self.kdtree.query(ndata, self.neighbors) if not isinstance(ix, (list, np.ndarray)): ix = [ix] @@ -58,9 +77,14 @@ class MultivariatePartitioner(partitioner.Partitioner): for k in ix: tmp.append(self.index[k]) self.count[self.index[k]] = 1 - return tmp - else: + + if type == 'name': return [self.index[k] for k in ix] + elif type == 'index': + return sorted(ix) + + + def fuzzyfy(self, data, **kwargs): return fuzzyfy_instance_clustered(data, self, **kwargs) diff --git a/pyFTS/models/seasonal/partitioner.py b/pyFTS/models/seasonal/partitioner.py index 63c02f1..4d3e6a5 100644 --- a/pyFTS/models/seasonal/partitioner.py +++ b/pyFTS/models/seasonal/partitioner.py @@ -77,21 +77,21 @@ class TimeGridPartitioner(partitioner.Partitioner): tmp = Composite(set_name, superset=True, **kwargs) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, [self.season.value - pl2, self.season.value, - self.season.value + 0.0000001], self.season.value, alpha=.5, + self.season.value + pl2], self.season.value, alpha=1, **kwargs)) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, - [c - 0.0000001, c, c + partlen], c, + [c - partlen, c, c + partlen], c, **kwargs)) tmp.centroid = c sets[set_name] = tmp elif c == self.max - partlen: tmp = Composite(set_name, superset=True, **kwargs) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, - [0.0000001, 0.0, - pl2], 0.0, alpha=.5, + [-pl2, 0.0, + pl2], 0.0, alpha=1, **kwargs)) tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, - [c - partlen, c, c + 0.0000001], c, + [c - partlen, c, c + partlen], c, **kwargs)) tmp.centroid = c sets[set_name] = tmp @@ -129,14 +129,14 @@ class TimeGridPartitioner(partitioner.Partitioner): points = [] fset = self.sets[self.ordered_sets[0]] - points.append([fset.centroid, fset.centroid, fset.centroid]) + points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper]) - for ct, key in enumerate(self.ordered_sets[1:-2]): + for ct, key in enumerate(self.ordered_sets[1:-1]): fset = self.sets[key] points.append([fset.lower, fset.centroid, fset.upper]) fset = self.sets[self.ordered_sets[-1]] - points.append([fset.centroid, fset.centroid, fset.centroid]) + points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper]) import sys sys.setrecursionlimit(100000) @@ -145,7 +145,7 @@ class TimeGridPartitioner(partitioner.Partitioner): sys.setrecursionlimit(1000) - def search(self, data, type='index', results=3): + def search(self, data, **kwargs): ''' Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several overlapped fuzzy sets. @@ -155,15 +155,21 @@ class TimeGridPartitioner(partitioner.Partitioner): :param results: the number of nearest fuzzy sets to return :return: a list with the nearest fuzzy sets ''' + + type = kwargs.get('type','index') + results = kwargs.get('results',3) + if self.kdtree is None: self.build_index() _, ix = self.kdtree.query([data, data, data], results) + ix = ix.tolist() + if 0 in ix: - ix[-1] = self.partitions-1 + ix.insert(0, self.partitions-1) elif self.partitions-1 in ix: - ix[-1] = 0 + ix.insert(0, 0) if type == 'name': return [self.ordered_sets[k] for k in sorted(ix)] diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index 7678790..7672907 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -191,7 +191,7 @@ class Partitioner(object): elif data > self.max: return self.partitions-1 - def search(self, data, type='index', results=3): + def search(self, data, **kwargs): ''' Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several overlapped fuzzy sets. @@ -204,6 +204,9 @@ class Partitioner(object): if self.kdtree is None: self.build_index() + type = kwargs.get('type','index') + results = kwargs.get('results', 3) + _, ix = self.kdtree.query([data, data, data], results) if type == 'name': diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 8f602f1..fd562b1 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -171,25 +171,29 @@ from pyFTS.partitioners import Grid sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']} vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12, - data=train, partitioner_specific=sp) + data=train, partitioner_specific=sp, alpha_cut=.5) sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train, partitioner_specific=sp) + data=train, partitioner_specific=sp, alpha_cut=.5) + +#print(vhour.partitioner) + +#print(vmonth.partitioner.fuzzyfy(180)) vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', - partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, + partitioner=Grid.GridPartitioner, npart=25, alpha_cut=.3, data=train) from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) + + model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs, knn=3) model.fit(train) -print(fs) - -print(model) +print(len(model))