Improvements on multivariate and multivariate ahead forecasting methods for CMVFTS

This commit is contained in:
Petrônio Cândido 2019-04-16 13:30:37 -03:00
parent d2725a94aa
commit c3457a3569
3 changed files with 78 additions and 22 deletions

View File

@ -148,7 +148,7 @@ class FTS(object):
elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
elif type == 'multivariate':
ret = self.forecast_ahead_multivariate(ndata, **kwargs)
ret = self.forecast_ahead_multivariate(ndata, steps_ahead, **kwargs)
if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.')

View File

@ -4,6 +4,7 @@ import pandas as pd
from pyFTS.common import FuzzySet, FLR, fts, flrg
from pyFTS.models import hofts
from pyFTS.models.multivariate import mvfts, grid, common
from types import LambdaType
class ClusteredMVFTS(mvfts.MVFTS):
@ -72,18 +73,53 @@ class ClusteredMVFTS(mvfts.MVFTS):
ndata = self.check_data(data)
ret = {}
for var in self.explanatory_variables:
if self.target_variable.name != var.name:
self.target_variable = var
self.partitioner.change_target_variable(var)
self.model.partitioner = self.partitioner
self.model.reset_calculated_values()
generators = kwargs.get('generators', {})
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
already_processed_cols = []
ret = {}
ret[self.target_variable.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
for var in self.explanatory_variables:
if var.data_label not in already_processed_cols:
if var.data_label in generators:
if isinstance(generators[var.data_label], LambdaType):
fx = generators[var.data_label]
if len(data[var.data_label].values) > self.order:
ret[var.data_label] = [fx(k) for k in data[var.data_label].values[self.order:]]
else:
ret[var.data_label] = [fx(data[var.data_label].values[-1])]
elif isinstance(generators[var.data_label], fts.FTS):
model = generators[var.data_label]
if not model.is_multivariate:
ret[var.data_label] = model.forecast(data[var.data_label].values)
else:
ret[var.data_label] = model.forecast(data)
elif self.target_variable.name != var.name:
self.target_variable = var
self.partitioner.change_target_variable(var)
self.model.partitioner = self.partitioner
self.model.reset_calculated_values()
ret[var.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
already_processed_cols.append(var.data_label)
return pd.DataFrame(ret, columns=ret.keys())
def forecast_ahead_multivariate(self, data, steps, **kwargs):
ndata = self.apply_transformations(data)
ret = ndata.iloc[:self.order]
for k in np.arange(0, steps):
sample = ret.iloc[k:self.order+k]
tmp = self.forecast_multivariate(sample, **kwargs)
ret = ret.append(tmp, ignore_index=True)
return ret
def __str__(self):
"""String representation of the model"""
return str(self.model)

View File

@ -14,12 +14,13 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
from pyFTS.common import Transformations, Membership
'''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
test_mv = dataset.iloc[24505:24605]
from itertools import product
@ -65,22 +66,41 @@ parameters = [
{'order': 2, 'knn': 3},
]
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
if method != cmvfts.ClusteredMVFTS:
model = method(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, **parameters[ct])
else:
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = method(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
**parameters[ct])
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
model.shortname += str(ct)
model.fit(train_mv)
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, order=2, knn=2)
forecasts = model.predict(test_mv.iloc[:100])
model.fit(train_mv)
print(model.shortname, forecasts)
forecasts = model.predict(test_mv, type='multivariate', generators={'data': time_generator}, steps_ahead=24 )
print(forecasts)
'''
from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000)
train = df.iloc[:4000]
test = df.iloc[4000:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner, npart=45, data=train)
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner, npart=45, data=train)
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner, npart=45, data=train)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=5, knn=2)
model.fit(train)
forecasts = model.predict(test, type='multivariate', steps_ahead=20)
print(forecasts)