Multivariate forecasting on cmvfts
This commit is contained in:
parent
2fcb55b2fe
commit
25d69a72ad
@ -88,7 +88,7 @@ class FTS(object):
|
||||
|
||||
:param data: time series with minimal length to the order of the model
|
||||
|
||||
:keyword type: the forecasting type, one of these values: point(default), interval or distribution.
|
||||
:keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate.
|
||||
:keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
|
||||
:keyword start: in the multi step forecasting, the index of the data where to start forecasting
|
||||
:keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
|
||||
@ -130,6 +130,8 @@ class FTS(object):
|
||||
ret = self.forecast_interval(ndata, **kwargs)
|
||||
elif type == 'distribution':
|
||||
ret = self.forecast_distribution(ndata, **kwargs)
|
||||
elif type == 'multivariate':
|
||||
ret = self.forecast_multivariate(ndata, **kwargs)
|
||||
elif steps_ahead > 1:
|
||||
if type == 'point':
|
||||
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
|
||||
@ -137,8 +139,10 @@ class FTS(object):
|
||||
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
|
||||
elif type == 'distribution':
|
||||
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
|
||||
elif type == 'multivariate':
|
||||
ret = self.forecast_ahead_multivariate(ndata, **kwargs)
|
||||
|
||||
if not ['point', 'interval', 'distribution'].__contains__(type):
|
||||
if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
|
||||
raise ValueError('The argument \'type\' has an unknown value.')
|
||||
|
||||
else:
|
||||
@ -184,6 +188,16 @@ class FTS(object):
|
||||
"""
|
||||
raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
|
||||
|
||||
def forecast_multivariate(self, data, **kwargs):
|
||||
"""
|
||||
Multivariate forecast one step ahead
|
||||
|
||||
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
|
||||
:param kwargs: model specific parameters
|
||||
:return: a Pandas Dataframe object representing the forecasted values for each variable
|
||||
"""
|
||||
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
|
||||
|
||||
def forecast_ahead(self, data, steps, **kwargs):
|
||||
"""
|
||||
Point forecast n steps ahead
|
||||
@ -233,6 +247,17 @@ class FTS(object):
|
||||
"""
|
||||
raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
|
||||
|
||||
def forecast_ahead_multivariate(self, data, steps, **kwargs):
|
||||
"""
|
||||
Multivariate forecast n step ahead
|
||||
|
||||
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
|
||||
:param steps: the number of steps ahead to forecast
|
||||
:param kwargs: model specific parameters
|
||||
:return: a Pandas Dataframe object representing the forecasted values for each variable
|
||||
"""
|
||||
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
"""
|
||||
Method specific parameter fitting
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||
from pyFTS.models import hofts
|
||||
from pyFTS.models.multivariate import mvfts, grid, common
|
||||
@ -55,24 +56,38 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
if self.model.is_high_order:
|
||||
self.model.order = self.order
|
||||
|
||||
if self.pre_fuzzyfy:
|
||||
ndata = self.fuzzyfy(data)
|
||||
else:
|
||||
ndata = [self.format_data(k) for k in data.to_dict('records')]
|
||||
ndata = self.check_data(data)
|
||||
|
||||
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
||||
|
||||
self.cluster.prune()
|
||||
|
||||
def check_data(self, data):
|
||||
if self.pre_fuzzyfy:
|
||||
ndata = self.fuzzyfy(data)
|
||||
else:
|
||||
ndata = [self.format_data(k) for k in data.to_dict('records')]
|
||||
|
||||
return ndata
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
if self.pre_fuzzyfy:
|
||||
ndata = self.fuzzyfy(ndata)
|
||||
else:
|
||||
ndata = [self.format_data(k) for k in ndata.to_dict('records')]
|
||||
ndata = self.check_data(ndata)
|
||||
|
||||
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||
|
||||
def forecast_multivariate(self, data, **kwargs):
|
||||
|
||||
ndata = self.check_data(data)
|
||||
|
||||
ret = {}
|
||||
for var in self.explanatory_variables:
|
||||
self.cluster.change_target_variable(var)
|
||||
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||
|
||||
columns = ret.keys()
|
||||
return pd.DataFrame(ret, columns=columns)
|
||||
|
||||
def __str__(self):
|
||||
"""String representation of the model"""
|
||||
return str(self.model)
|
||||
|
@ -86,7 +86,6 @@ class GridCluster(partitioner.Partitioner):
|
||||
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
|
||||
def knn(self, data):
|
||||
tmp = [data[k.name]
|
||||
for k in self.mvfts.explanatory_variables]
|
||||
@ -106,3 +105,7 @@ class GridCluster(partitioner.Partitioner):
|
||||
|
||||
def fuzzyfy(self, data, **kwargs):
|
||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||
|
||||
def change_target_variable(self, variable):
|
||||
for fset in self.sets:
|
||||
self.sets[fset].target_variable = variable
|
||||
|
@ -18,184 +18,21 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
from pyFTS.data import henon
|
||||
df = henon.get_dataframe(iterations=1000)
|
||||
|
||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
||||
from pyFTS.models.multivariate import variable, cmvfts
|
||||
|
||||
data = dataset['glo_avg'].values
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
|
||||
|
||||
'''
|
||||
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
|
||||
|
||||
data = [[12, 100], [13, 200]]
|
||||
|
||||
for k in data:
|
||||
k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
df = pd.DataFrame(data, columns=['data', 'glo_avg'])
|
||||
|
||||
#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')})
|
||||
|
||||
#print(forecasts)
|
||||
|
||||
f = lambda x: x + pd.to_timedelta(1, unit='h')
|
||||
|
||||
for ix, row in df.iterrows():
|
||||
print(row['data'])
|
||||
print(f(row['data']))
|
||||
'''
|
||||
|
||||
# Multivariate time series
|
||||
'''
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
|
||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
||||
data=train_mv)
|
||||
vx = variable.Variable("x", data_label="x", partitioner=Grid.GridPartitioner, npart=15, data=df)
|
||||
vy = variable.Variable("y", data_label="y", partitioner=Grid.GridPartitioner, npart=15, data=df)
|
||||
|
||||
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
|
||||
model.append_variable(vhour)
|
||||
model.append_variable(vavg)
|
||||
model.target_variable = vavg
|
||||
model.fit(train_mv)
|
||||
model.append_variable(vx)
|
||||
model.append_variable(vy)
|
||||
model.target_variable = vx
|
||||
|
||||
Util.persist_obj(model, model.shortname)
|
||||
'''
|
||||
model.fit(df.iloc[:800])
|
||||
|
||||
#model = Util.load_obj("ClusteredMVFTS")
|
||||
df = model.predict(df.iloc[800:], type='multivariate')
|
||||
|
||||
model = Util.load_obj("ClusteredMVFTS2loadorder2knn2")
|
||||
|
||||
print(model)
|
||||
|
||||
print(model.predict(test_mv))
|
||||
|
||||
'''
|
||||
train_mv = {}
|
||||
test_mv = {}
|
||||
|
||||
models = {}
|
||||
|
||||
for key in ['price', 'solar', 'load']:
|
||||
models[key] = []
|
||||
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
|
||||
dataset['hour'] = dataset.index.values % 24
|
||||
|
||||
data = dataset['price'].values.flatten()
|
||||
|
||||
train_split = 24 * 800
|
||||
|
||||
|
||||
train_mv['price'] = dataset.iloc[:train_split]
|
||||
test_mv['price'] = dataset.iloc[train_split:]
|
||||
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
|
||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
train_mv['solar'] = dataset.iloc[:24505]
|
||||
test_mv['solar'] = dataset.iloc[24505:]
|
||||
|
||||
from pyFTS.data import Malaysia
|
||||
|
||||
dataset = Malaysia.get_dataframe()
|
||||
|
||||
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
||||
|
||||
train_mv['load'] = dataset.iloc[:train_split]
|
||||
test_mv['load'] = dataset.iloc[train_split:]
|
||||
|
||||
|
||||
exogenous = {}
|
||||
endogenous = {}
|
||||
|
||||
for key in models.keys():
|
||||
exogenous[key] = {}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv['price'],
|
||||
partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'})
|
||||
exogenous['price']['Hour'] = vhour
|
||||
|
||||
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55,
|
||||
data=train_mv['price'])
|
||||
endogenous['price'] = vprice
|
||||
|
||||
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv['solar'], partitioner_specific=sp)
|
||||
|
||||
exogenous['solar']['Hour'] = vhour
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
||||
data=train_mv['solar'])
|
||||
|
||||
endogenous['solar'] = vavg
|
||||
|
||||
|
||||
sp = {'seasonality': DateTime.hour_of_day}
|
||||
|
||||
vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv['load'], partitioner_specific=sp)
|
||||
|
||||
exogenous['load']['Hour'] = vhourp
|
||||
|
||||
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
||||
data=train_mv['load'])
|
||||
|
||||
endogenous['load'] = vload
|
||||
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
|
||||
|
||||
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15])
|
||||
|
||||
parameters = [
|
||||
{},{},
|
||||
{'order':2, 'knn': 1},
|
||||
{'order':2, 'knn': 2},
|
||||
{'order':2, 'knn': 3},
|
||||
]
|
||||
|
||||
for ct, key in enumerate(models.keys()):
|
||||
|
||||
for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
||||
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
||||
print(key, method, parameters[ct2])
|
||||
model = method(**parameters[ct2])
|
||||
_key2 = ""
|
||||
for k in parameters[ct2].keys():
|
||||
_key2 += k + str(parameters[ct2][k])
|
||||
model.shortname += str(ct) + key + _key2
|
||||
for var in exogenous[key].values():
|
||||
model.append_variable(var)
|
||||
model.append_variable(endogenous[key])
|
||||
model.target_variable = endogenous[key]
|
||||
model.fit(train_mv[key])
|
||||
|
||||
models[key].append(model.shortname)
|
||||
|
||||
Util.persist_obj(model, model.shortname)
|
||||
|
||||
del(model)
|
||||
'''
|
||||
print(df)
|
Loading…
Reference in New Issue
Block a user