Multivariate forecasting on cmvfts

This commit is contained in:
Petrônio Cândido 2018-12-11 18:26:05 -02:00
parent 2fcb55b2fe
commit 25d69a72ad
4 changed files with 65 additions and 185 deletions

View File

@ -88,7 +88,7 @@ class FTS(object):
:param data: time series with minimal length to the order of the model :param data: time series with minimal length to the order of the model
:keyword type: the forecasting type, one of these values: point(default), interval or distribution. :keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate.
:keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast :keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
:keyword start: in the multi step forecasting, the index of the data where to start forecasting :keyword start: in the multi step forecasting, the index of the data where to start forecasting
:keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster :keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
@ -130,6 +130,8 @@ class FTS(object):
ret = self.forecast_interval(ndata, **kwargs) ret = self.forecast_interval(ndata, **kwargs)
elif type == 'distribution': elif type == 'distribution':
ret = self.forecast_distribution(ndata, **kwargs) ret = self.forecast_distribution(ndata, **kwargs)
elif type == 'multivariate':
ret = self.forecast_multivariate(ndata, **kwargs)
elif steps_ahead > 1: elif steps_ahead > 1:
if type == 'point': if type == 'point':
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
@ -137,8 +139,10 @@ class FTS(object):
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution': elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
elif type == 'multivariate':
ret = self.forecast_ahead_multivariate(ndata, **kwargs)
if not ['point', 'interval', 'distribution'].__contains__(type): if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.') raise ValueError('The argument \'type\' has an unknown value.')
else: else:
@ -184,6 +188,16 @@ class FTS(object):
""" """
raise NotImplementedError('This model do not perform one step ahead distribution forecasts!') raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
def forecast_multivariate(self, data, **kwargs):
"""
Multivariate forecast one step ahead
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
:param kwargs: model specific parameters
:return: a Pandas Dataframe object representing the forecasted values for each variable
"""
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
def forecast_ahead(self, data, steps, **kwargs): def forecast_ahead(self, data, steps, **kwargs):
""" """
Point forecast n steps ahead Point forecast n steps ahead
@ -233,6 +247,17 @@ class FTS(object):
""" """
raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!') raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
def forecast_ahead_multivariate(self, data, steps, **kwargs):
"""
Multivariate forecast n step ahead
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
:param steps: the number of steps ahead to forecast
:param kwargs: model specific parameters
:return: a Pandas Dataframe object representing the forecasted values for each variable
"""
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
def train(self, data, **kwargs): def train(self, data, **kwargs):
""" """
Method specific parameter fitting Method specific parameter fitting

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
import pandas as pd
from pyFTS.common import FuzzySet, FLR, fts, flrg from pyFTS.common import FuzzySet, FLR, fts, flrg
from pyFTS.models import hofts from pyFTS.models import hofts
from pyFTS.models.multivariate import mvfts, grid, common from pyFTS.models.multivariate import mvfts, grid, common
@ -55,24 +56,38 @@ class ClusteredMVFTS(mvfts.MVFTS):
if self.model.is_high_order: if self.model.is_high_order:
self.model.order = self.order self.model.order = self.order
if self.pre_fuzzyfy: ndata = self.check_data(data)
ndata = self.fuzzyfy(data)
else:
ndata = [self.format_data(k) for k in data.to_dict('records')]
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy) self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
self.cluster.prune() self.cluster.prune()
def check_data(self, data):
if self.pre_fuzzyfy:
ndata = self.fuzzyfy(data)
else:
ndata = [self.format_data(k) for k in data.to_dict('records')]
return ndata
def forecast(self, ndata, **kwargs): def forecast(self, ndata, **kwargs):
if self.pre_fuzzyfy: ndata = self.check_data(ndata)
ndata = self.fuzzyfy(ndata)
else:
ndata = [self.format_data(k) for k in ndata.to_dict('records')]
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs) return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
def forecast_multivariate(self, data, **kwargs):
ndata = self.check_data(data)
ret = {}
for var in self.explanatory_variables:
self.cluster.change_target_variable(var)
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
columns = ret.keys()
return pd.DataFrame(ret, columns=columns)
def __str__(self): def __str__(self):
"""String representation of the model""" """String representation of the model"""
return str(self.model) return str(self.model)

View File

@ -86,7 +86,6 @@ class GridCluster(partitioner.Partitioner):
sys.setrecursionlimit(1000) sys.setrecursionlimit(1000)
def knn(self, data): def knn(self, data):
tmp = [data[k.name] tmp = [data[k.name]
for k in self.mvfts.explanatory_variables] for k in self.mvfts.explanatory_variables]
@ -106,3 +105,7 @@ class GridCluster(partitioner.Partitioner):
def fuzzyfy(self, data, **kwargs): def fuzzyfy(self, data, **kwargs):
return fuzzyfy_instance_clustered(data, self, **kwargs) return fuzzyfy_instance_clustered(data, self, **kwargs)
def change_target_variable(self, variable):
for fset in self.sets:
self.sets[fset].target_variable = variable

View File

@ -18,184 +18,21 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') from pyFTS.data import henon
df = henon.get_dataframe(iterations=1000)
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') from pyFTS.models.multivariate import variable, cmvfts
data = dataset['glo_avg'].values vx = variable.Variable("x", data_label="x", partitioner=Grid.GridPartitioner, npart=15, data=df)
vy = variable.Variable("y", data_label="y", partitioner=Grid.GridPartitioner, npart=15, data=df)
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
'''
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
data = [[12, 100], [13, 200]]
for k in data:
k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S')
df = pd.DataFrame(data, columns=['data', 'glo_avg'])
#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')})
#print(forecasts)
f = lambda x: x + pd.to_timedelta(1, unit='h')
for ix, row in df.iterrows():
print(row['data'])
print(f(row['data']))
'''
# Multivariate time series
'''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv)
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS) model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
model.append_variable(vhour) model.append_variable(vx)
model.append_variable(vavg) model.append_variable(vy)
model.target_variable = vavg model.target_variable = vx
model.fit(train_mv)
Util.persist_obj(model, model.shortname) model.fit(df.iloc[:800])
'''
#model = Util.load_obj("ClusteredMVFTS") df = model.predict(df.iloc[800:], type='multivariate')
model = Util.load_obj("ClusteredMVFTS2loadorder2knn2") print(df)
print(model)
print(model.predict(test_mv))
'''
train_mv = {}
test_mv = {}
models = {}
for key in ['price', 'solar', 'load']:
models[key] = []
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
dataset['hour'] = dataset.index.values % 24
data = dataset['price'].values.flatten()
train_split = 24 * 800
train_mv['price'] = dataset.iloc[:train_split]
test_mv['price'] = dataset.iloc[train_split:]
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv['solar'] = dataset.iloc[:24505]
test_mv['solar'] = dataset.iloc[24505:]
from pyFTS.data import Malaysia
dataset = Malaysia.get_dataframe()
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
train_mv['load'] = dataset.iloc[:train_split]
test_mv['load'] = dataset.iloc[train_split:]
exogenous = {}
endogenous = {}
for key in models.keys():
exogenous[key] = {}
vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['price'],
partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'})
exogenous['price']['Hour'] = vhour
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55,
data=train_mv['price'])
endogenous['price'] = vprice
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['solar'], partitioner_specific=sp)
exogenous['solar']['Hour'] = vhour
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv['solar'])
endogenous['solar'] = vavg
sp = {'seasonality': DateTime.hour_of_day}
vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['load'], partitioner_specific=sp)
exogenous['load']['Hour'] = vhourp
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv['load'])
endogenous['load'] = vload
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15])
parameters = [
{},{},
{'order':2, 'knn': 1},
{'order':2, 'knn': 2},
{'order':2, 'knn': 3},
]
for ct, key in enumerate(models.keys()):
for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
print(key, method, parameters[ct2])
model = method(**parameters[ct2])
_key2 = ""
for k in parameters[ct2].keys():
_key2 += k + str(parameters[ct2][k])
model.shortname += str(ct) + key + _key2
for var in exogenous[key].values():
model.append_variable(var)
model.append_variable(endogenous[key])
model.target_variable = endogenous[key]
model.fit(train_mv[key])
models[key].append(model.shortname)
Util.persist_obj(model, model.shortname)
del(model)
'''