Multivariate forecasting on cmvfts

This commit is contained in:
Petrônio Cândido 2018-12-11 18:26:05 -02:00
parent 2fcb55b2fe
commit 25d69a72ad
4 changed files with 65 additions and 185 deletions

View File

@ -88,7 +88,7 @@ class FTS(object):
:param data: time series with minimal length to the order of the model
:keyword type: the forecasting type, one of these values: point(default), interval or distribution.
:keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate.
:keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
:keyword start: in the multi step forecasting, the index of the data where to start forecasting
:keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
@ -130,6 +130,8 @@ class FTS(object):
ret = self.forecast_interval(ndata, **kwargs)
elif type == 'distribution':
ret = self.forecast_distribution(ndata, **kwargs)
elif type == 'multivariate':
ret = self.forecast_multivariate(ndata, **kwargs)
elif steps_ahead > 1:
if type == 'point':
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
@ -137,8 +139,10 @@ class FTS(object):
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
elif type == 'multivariate':
ret = self.forecast_ahead_multivariate(ndata, **kwargs)
if not ['point', 'interval', 'distribution'].__contains__(type):
if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.')
else:
@ -184,6 +188,16 @@ class FTS(object):
"""
raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
def forecast_multivariate(self, data, **kwargs):
"""
Multivariate forecast one step ahead
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
:param kwargs: model specific parameters
:return: a Pandas Dataframe object representing the forecasted values for each variable
"""
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
def forecast_ahead(self, data, steps, **kwargs):
"""
Point forecast n steps ahead
@ -233,6 +247,17 @@ class FTS(object):
"""
raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
def forecast_ahead_multivariate(self, data, steps, **kwargs):
"""
Multivariate forecast n step ahead
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
:param steps: the number of steps ahead to forecast
:param kwargs: model specific parameters
:return: a Pandas Dataframe object representing the forecasted values for each variable
"""
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
def train(self, data, **kwargs):
"""
Method specific parameter fitting

View File

@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
from pyFTS.common import FuzzySet, FLR, fts, flrg
from pyFTS.models import hofts
from pyFTS.models.multivariate import mvfts, grid, common
@ -55,24 +56,38 @@ class ClusteredMVFTS(mvfts.MVFTS):
if self.model.is_high_order:
self.model.order = self.order
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(data)
else:
ndata = [self.format_data(k) for k in data.to_dict('records')]
ndata = self.check_data(data)
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
self.cluster.prune()
def check_data(self, data):
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(data)
else:
ndata = [self.format_data(k) for k in data.to_dict('records')]
return ndata
def forecast(self, ndata, **kwargs):
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(ndata)
else:
ndata = [self.format_data(k) for k in ndata.to_dict('records')]
ndata = self.check_data(ndata)
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
def forecast_multivariate(self, data, **kwargs):
ndata = self.check_data(data)
ret = {}
for var in self.explanatory_variables:
self.cluster.change_target_variable(var)
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
columns = ret.keys()
return pd.DataFrame(ret, columns=columns)
def __str__(self):
"""String representation of the model"""
return str(self.model)

View File

@ -86,7 +86,6 @@ class GridCluster(partitioner.Partitioner):
sys.setrecursionlimit(1000)
def knn(self, data):
tmp = [data[k.name]
for k in self.mvfts.explanatory_variables]
@ -106,3 +105,7 @@ class GridCluster(partitioner.Partitioner):
def fuzzyfy(self, data, **kwargs):
return fuzzyfy_instance_clustered(data, self, **kwargs)
def change_target_variable(self, variable):
for fset in self.sets:
self.sets[fset].target_variable = variable

View File

@ -18,184 +18,21 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
from pyFTS.data import henon
df = henon.get_dataframe(iterations=1000)
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
from pyFTS.models.multivariate import variable, cmvfts
data = dataset['glo_avg'].values
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
'''
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
data = [[12, 100], [13, 200]]
for k in data:
k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S')
df = pd.DataFrame(data, columns=['data', 'glo_avg'])
#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')})
#print(forecasts)
f = lambda x: x + pd.to_timedelta(1, unit='h')
for ix, row in df.iterrows():
print(row['data'])
print(f(row['data']))
'''
# Multivariate time series
'''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv)
vx = variable.Variable("x", data_label="x", partitioner=Grid.GridPartitioner, npart=15, data=df)
vy = variable.Variable("y", data_label="y", partitioner=Grid.GridPartitioner, npart=15, data=df)
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
model.append_variable(vhour)
model.append_variable(vavg)
model.target_variable = vavg
model.fit(train_mv)
model.append_variable(vx)
model.append_variable(vy)
model.target_variable = vx
Util.persist_obj(model, model.shortname)
'''
model.fit(df.iloc[:800])
#model = Util.load_obj("ClusteredMVFTS")
df = model.predict(df.iloc[800:], type='multivariate')
model = Util.load_obj("ClusteredMVFTS2loadorder2knn2")
print(model)
print(model.predict(test_mv))
'''
train_mv = {}
test_mv = {}
models = {}
for key in ['price', 'solar', 'load']:
models[key] = []
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
dataset['hour'] = dataset.index.values % 24
data = dataset['price'].values.flatten()
train_split = 24 * 800
train_mv['price'] = dataset.iloc[:train_split]
test_mv['price'] = dataset.iloc[train_split:]
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv['solar'] = dataset.iloc[:24505]
test_mv['solar'] = dataset.iloc[24505:]
from pyFTS.data import Malaysia
dataset = Malaysia.get_dataframe()
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
train_mv['load'] = dataset.iloc[:train_split]
test_mv['load'] = dataset.iloc[train_split:]
exogenous = {}
endogenous = {}
for key in models.keys():
exogenous[key] = {}
vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['price'],
partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'})
exogenous['price']['Hour'] = vhour
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55,
data=train_mv['price'])
endogenous['price'] = vprice
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['solar'], partitioner_specific=sp)
exogenous['solar']['Hour'] = vhour
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv['solar'])
endogenous['solar'] = vavg
sp = {'seasonality': DateTime.hour_of_day}
vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['load'], partitioner_specific=sp)
exogenous['load']['Hour'] = vhourp
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv['load'])
endogenous['load'] = vload
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15])
parameters = [
{},{},
{'order':2, 'knn': 1},
{'order':2, 'knn': 2},
{'order':2, 'knn': 3},
]
for ct, key in enumerate(models.keys()):
for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
print(key, method, parameters[ct2])
model = method(**parameters[ct2])
_key2 = ""
for k in parameters[ct2].keys():
_key2 += k + str(parameters[ct2][k])
model.shortname += str(ct) + key + _key2
for var in exogenous[key].values():
model.append_variable(var)
model.append_variable(endogenous[key])
model.target_variable = endogenous[key]
model.fit(train_mv[key])
models[key].append(model.shortname)
Util.persist_obj(model, model.shortname)
del(model)
'''
print(df)