Improvements on multivariate and multivariate ahead forecasting methods for CMVFTS

This commit is contained in:
Petrônio Cândido 2019-04-16 13:30:37 -03:00
parent d2725a94aa
commit c3457a3569
3 changed files with 78 additions and 22 deletions

View File

@ -148,7 +148,7 @@ class FTS(object):
elif type == 'distribution': elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
elif type == 'multivariate': elif type == 'multivariate':
ret = self.forecast_ahead_multivariate(ndata, **kwargs) ret = self.forecast_ahead_multivariate(ndata, steps_ahead, **kwargs)
if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type): if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.') raise ValueError('The argument \'type\' has an unknown value.')

View File

@ -4,6 +4,7 @@ import pandas as pd
from pyFTS.common import FuzzySet, FLR, fts, flrg from pyFTS.common import FuzzySet, FLR, fts, flrg
from pyFTS.models import hofts from pyFTS.models import hofts
from pyFTS.models.multivariate import mvfts, grid, common from pyFTS.models.multivariate import mvfts, grid, common
from types import LambdaType
class ClusteredMVFTS(mvfts.MVFTS): class ClusteredMVFTS(mvfts.MVFTS):
@ -72,18 +73,53 @@ class ClusteredMVFTS(mvfts.MVFTS):
ndata = self.check_data(data) ndata = self.check_data(data)
generators = kwargs.get('generators', {})
already_processed_cols = []
ret = {} ret = {}
ret[self.target_variable.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
for var in self.explanatory_variables: for var in self.explanatory_variables:
if self.target_variable.name != var.name: if var.data_label not in already_processed_cols:
if var.data_label in generators:
if isinstance(generators[var.data_label], LambdaType):
fx = generators[var.data_label]
if len(data[var.data_label].values) > self.order:
ret[var.data_label] = [fx(k) for k in data[var.data_label].values[self.order:]]
else:
ret[var.data_label] = [fx(data[var.data_label].values[-1])]
elif isinstance(generators[var.data_label], fts.FTS):
model = generators[var.data_label]
if not model.is_multivariate:
ret[var.data_label] = model.forecast(data[var.data_label].values)
else:
ret[var.data_label] = model.forecast(data)
elif self.target_variable.name != var.name:
self.target_variable = var self.target_variable = var
self.partitioner.change_target_variable(var) self.partitioner.change_target_variable(var)
self.model.partitioner = self.partitioner self.model.partitioner = self.partitioner
self.model.reset_calculated_values() self.model.reset_calculated_values()
ret[var.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs) already_processed_cols.append(var.data_label)
return pd.DataFrame(ret, columns=ret.keys()) return pd.DataFrame(ret, columns=ret.keys())
def forecast_ahead_multivariate(self, data, steps, **kwargs):
ndata = self.apply_transformations(data)
ret = ndata.iloc[:self.order]
for k in np.arange(0, steps):
sample = ret.iloc[k:self.order+k]
tmp = self.forecast_multivariate(sample, **kwargs)
ret = ret.append(tmp, ignore_index=True)
return ret
def __str__(self): def __str__(self):
"""String representation of the model""" """String representation of the model"""
return str(self.model) return str(self.model)

View File

@ -14,12 +14,13 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
from pyFTS.common import Transformations, Membership from pyFTS.common import Transformations, Membership
'''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505] train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:] test_mv = dataset.iloc[24505:24605]
from itertools import product from itertools import product
@ -65,22 +66,41 @@ parameters = [
{'order': 2, 'knn': 3}, {'order': 2, 'knn': 3},
] ]
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): from pyFTS.benchmarks import Measures
if method != cmvfts.ClusteredMVFTS: time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
model = method(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, **parameters[ct])
else: model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, order=2, knn=2)
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = method(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
**parameters[ct])
model.shortname += str(ct)
model.fit(train_mv) model.fit(train_mv)
forecasts = model.predict(test_mv.iloc[:100]) forecasts = model.predict(test_mv, type='multivariate', generators={'data': time_generator}, steps_ahead=24 )
print(model.shortname, forecasts) print(forecasts)
'''
from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000)
train = df.iloc[:4000]
test = df.iloc[4000:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner, npart=45, data=train)
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner, npart=45, data=train)
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner, npart=45, data=train)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=5, knn=2)
model.fit(train)
forecasts = model.predict(test, type='multivariate', steps_ahead=20)
print(forecasts)