From 5b2b5ece55f96fdec15af92d399538d4bf593717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 13 Nov 2018 11:54:20 -0200 Subject: [PATCH] forecast_ahead for multivariate methods; other minor improvements --- pyFTS/models/multivariate/cmvfts.py | 12 ++++----- pyFTS/models/multivariate/grid.py | 3 ++- pyFTS/models/multivariate/mvfts.py | 40 ++++++++++++++++++++++++++--- pyFTS/tests/multivariate.py | 27 +++++++++++++------ 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index 6e9e0a6..8d94d51 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -25,6 +25,7 @@ class ClusteredMVFTS(mvfts.MVFTS): """The FTS method specific parameters""" self.model = None """The most recent trained model""" + self.knn = kwargs.get('knn', 2) self.is_high_order = True @@ -32,9 +33,12 @@ class ClusteredMVFTS(mvfts.MVFTS): self.lags = kwargs.get("lags", None) self.alpha_cut = kwargs.get('alpha_cut', 0.25) + self.shortname = "ClusteredMVFTS" + self.name = "Clustered Multivariate FTS" + def fuzzyfy(self,data): ndata = [] - for ct in range(1, len(data.index)): + for ct in range(1, len(data.index)+1): ix = data.index[ct - 1] data_point = self.format_data(data.loc[ix]) ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut)) @@ -44,7 +48,7 @@ class ClusteredMVFTS(mvfts.MVFTS): def train(self, data, **kwargs): - self.cluster = self.cluster_method(data=data, mvfts=self) + self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn) self.model = self.fts_method(partitioner=self.cluster, **self.fts_params) if self.model.is_high_order: @@ -54,8 +58,6 @@ class ClusteredMVFTS(mvfts.MVFTS): ndata = self.fuzzyfy(data) self.model.train(ndata, fuzzyfied=True) - self.shortname = self.model.shortname - def forecast(self, ndata, **kwargs): @@ -63,8 +65,6 @@ class ClusteredMVFTS(mvfts.MVFTS): return self.model.forecast(ndata, fuzzyfied=True, **kwargs) - - def __str__(self): """String representation of the model""" diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 2f22249..83ee522 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -18,6 +18,7 @@ class GridCluster(partitioner.Partitioner): self.kdtree = None self.index = {} self.build(None) + self.neighbors = kwargs.get('neighbors', 2) def build(self, data): @@ -47,7 +48,7 @@ class GridCluster(partitioner.Partitioner): def knn(self, data): tmp = [data[k.name] for k in self.mvfts.explanatory_variables] - tmp, ix = self.kdtree.query(tmp,2) + tmp, ix = self.kdtree.query(tmp, self.neighbors ) if not isinstance(ix, (list, np.ndarray)): ix = [ix] diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 2ffca85..39b6a07 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -113,15 +113,12 @@ class MVFTS(fts.FTS): for flr in flrs: flrg = mvflrg.FLRG(lhs=flr.LHS) if flrg.get_key() not in self.flrgs: - #print('hit') mvs.append(0.) mps.append(0.) else: mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) - #print('mv', mvs) - #print('mp', mps) mv = np.array(mvs) mp = np.array(mps) @@ -131,6 +128,43 @@ class MVFTS(fts.FTS): params=data[self.target_variable.data_label].values) return ret + def forecast_ahead(self, data, steps, **kwargs): + generators = kwargs.get('generators',None) + + if generators is None: + raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' + + ' are the variables names (except the target_variable) and the values are ' + + 'lambda functions that accept one value (the actual value of the variable) ' + ' and return the next value.') + + ndata = self.apply_transformations(data) + + ret = [] + for k in np.arange(0, steps): + ix = ndata.index[-self.max_lag:] + sample = ndata.loc[ix] + tmp = self.forecast(sample, **kwargs) + + if isinstance(tmp, (list, np.ndarray)): + tmp = tmp[-1] + + ret.append(tmp) + + last_data_point = sample.loc[sample.index[-1]] + + new_data_point = {} + + for var in self.explanatory_variables: + if var.name != self.target_variable.name: + new_data_point[var.data_label] = generators[var.name](last_data_point[var.data_label]) + + new_data_point[self.target_variable.data_label] = tmp + + ndata = ndata.append(new_data_point, ignore_index=True) + + return ret + + def clone_parameters(self, model): super(MVFTS, self).clone_parameters(model) diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 224e59f..1f81523 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -93,14 +93,25 @@ vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGr vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=10, data=train_mv) -model1 = cmvfts.ClusteredMVFTS(order=2) -model1.shortname += "1" -model1.append_variable(vhour) -model1.append_variable(vprice) -model1.target_variable = vprice -model1.fit(train_mv) -print(model1) +params = [ + {}, + {}, + {'order': 2, 'knn': 1}, + {'order': 2, 'knn': 2}, + {'order': 2, 'knn': 3} +] + +for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): + model = method(**params[ct]) + model.append_variable(vhour) + model.append_variable(vprice) + model.target_variable = vprice + model.fit(train_mv) + print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model)) + +#print(model1) + +#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 })) -print(Measures.get_point_statistics(test_mv, model1)) #""" \ No newline at end of file