Multivariate forecasting on cmvfts
This commit is contained in:
parent
2fcb55b2fe
commit
25d69a72ad
@ -88,7 +88,7 @@ class FTS(object):
|
|||||||
|
|
||||||
:param data: time series with minimal length to the order of the model
|
:param data: time series with minimal length to the order of the model
|
||||||
|
|
||||||
:keyword type: the forecasting type, one of these values: point(default), interval or distribution.
|
:keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate.
|
||||||
:keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
|
:keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
|
||||||
:keyword start: in the multi step forecasting, the index of the data where to start forecasting
|
:keyword start: in the multi step forecasting, the index of the data where to start forecasting
|
||||||
:keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
|
:keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
|
||||||
@ -130,6 +130,8 @@ class FTS(object):
|
|||||||
ret = self.forecast_interval(ndata, **kwargs)
|
ret = self.forecast_interval(ndata, **kwargs)
|
||||||
elif type == 'distribution':
|
elif type == 'distribution':
|
||||||
ret = self.forecast_distribution(ndata, **kwargs)
|
ret = self.forecast_distribution(ndata, **kwargs)
|
||||||
|
elif type == 'multivariate':
|
||||||
|
ret = self.forecast_multivariate(ndata, **kwargs)
|
||||||
elif steps_ahead > 1:
|
elif steps_ahead > 1:
|
||||||
if type == 'point':
|
if type == 'point':
|
||||||
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
|
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
|
||||||
@ -137,8 +139,10 @@ class FTS(object):
|
|||||||
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
|
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
|
||||||
elif type == 'distribution':
|
elif type == 'distribution':
|
||||||
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
|
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
|
||||||
|
elif type == 'multivariate':
|
||||||
|
ret = self.forecast_ahead_multivariate(ndata, **kwargs)
|
||||||
|
|
||||||
if not ['point', 'interval', 'distribution'].__contains__(type):
|
if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
|
||||||
raise ValueError('The argument \'type\' has an unknown value.')
|
raise ValueError('The argument \'type\' has an unknown value.')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -184,6 +188,16 @@ class FTS(object):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
|
raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
|
||||||
|
|
||||||
|
def forecast_multivariate(self, data, **kwargs):
|
||||||
|
"""
|
||||||
|
Multivariate forecast one step ahead
|
||||||
|
|
||||||
|
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
|
||||||
|
:param kwargs: model specific parameters
|
||||||
|
:return: a Pandas Dataframe object representing the forecasted values for each variable
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
|
||||||
|
|
||||||
def forecast_ahead(self, data, steps, **kwargs):
|
def forecast_ahead(self, data, steps, **kwargs):
|
||||||
"""
|
"""
|
||||||
Point forecast n steps ahead
|
Point forecast n steps ahead
|
||||||
@ -233,6 +247,17 @@ class FTS(object):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
|
raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
|
||||||
|
|
||||||
|
def forecast_ahead_multivariate(self, data, steps, **kwargs):
|
||||||
|
"""
|
||||||
|
Multivariate forecast n step ahead
|
||||||
|
|
||||||
|
:param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
|
||||||
|
:param steps: the number of steps ahead to forecast
|
||||||
|
:param kwargs: model specific parameters
|
||||||
|
:return: a Pandas Dataframe object representing the forecasted values for each variable
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
"""
|
"""
|
||||||
Method specific parameter fitting
|
Method specific parameter fitting
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||||
from pyFTS.models import hofts
|
from pyFTS.models import hofts
|
||||||
from pyFTS.models.multivariate import mvfts, grid, common
|
from pyFTS.models.multivariate import mvfts, grid, common
|
||||||
@ -55,24 +56,38 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
if self.model.is_high_order:
|
if self.model.is_high_order:
|
||||||
self.model.order = self.order
|
self.model.order = self.order
|
||||||
|
|
||||||
if self.pre_fuzzyfy:
|
ndata = self.check_data(data)
|
||||||
ndata = self.fuzzyfy(data)
|
|
||||||
else:
|
|
||||||
ndata = [self.format_data(k) for k in data.to_dict('records')]
|
|
||||||
|
|
||||||
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
||||||
|
|
||||||
self.cluster.prune()
|
self.cluster.prune()
|
||||||
|
|
||||||
|
def check_data(self, data):
|
||||||
|
if self.pre_fuzzyfy:
|
||||||
|
ndata = self.fuzzyfy(data)
|
||||||
|
else:
|
||||||
|
ndata = [self.format_data(k) for k in data.to_dict('records')]
|
||||||
|
|
||||||
|
return ndata
|
||||||
|
|
||||||
def forecast(self, ndata, **kwargs):
|
def forecast(self, ndata, **kwargs):
|
||||||
|
|
||||||
if self.pre_fuzzyfy:
|
ndata = self.check_data(ndata)
|
||||||
ndata = self.fuzzyfy(ndata)
|
|
||||||
else:
|
|
||||||
ndata = [self.format_data(k) for k in ndata.to_dict('records')]
|
|
||||||
|
|
||||||
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||||
|
|
||||||
|
def forecast_multivariate(self, data, **kwargs):
|
||||||
|
|
||||||
|
ndata = self.check_data(data)
|
||||||
|
|
||||||
|
ret = {}
|
||||||
|
for var in self.explanatory_variables:
|
||||||
|
self.cluster.change_target_variable(var)
|
||||||
|
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||||
|
|
||||||
|
columns = ret.keys()
|
||||||
|
return pd.DataFrame(ret, columns=columns)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""String representation of the model"""
|
"""String representation of the model"""
|
||||||
return str(self.model)
|
return str(self.model)
|
||||||
|
@ -86,7 +86,6 @@ class GridCluster(partitioner.Partitioner):
|
|||||||
|
|
||||||
sys.setrecursionlimit(1000)
|
sys.setrecursionlimit(1000)
|
||||||
|
|
||||||
|
|
||||||
def knn(self, data):
|
def knn(self, data):
|
||||||
tmp = [data[k.name]
|
tmp = [data[k.name]
|
||||||
for k in self.mvfts.explanatory_variables]
|
for k in self.mvfts.explanatory_variables]
|
||||||
@ -106,3 +105,7 @@ class GridCluster(partitioner.Partitioner):
|
|||||||
|
|
||||||
def fuzzyfy(self, data, **kwargs):
|
def fuzzyfy(self, data, **kwargs):
|
||||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||||
|
|
||||||
|
def change_target_variable(self, variable):
|
||||||
|
for fset in self.sets:
|
||||||
|
self.sets[fset].target_variable = variable
|
||||||
|
@ -18,184 +18,21 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
|||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
from pyFTS.data import henon
|
||||||
|
df = henon.get_dataframe(iterations=1000)
|
||||||
|
|
||||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
from pyFTS.models.multivariate import variable, cmvfts
|
||||||
|
|
||||||
data = dataset['glo_avg'].values
|
vx = variable.Variable("x", data_label="x", partitioner=Grid.GridPartitioner, npart=15, data=df)
|
||||||
|
vy = variable.Variable("y", data_label="y", partitioner=Grid.GridPartitioner, npart=15, data=df)
|
||||||
train_mv = dataset.iloc[:24505]
|
|
||||||
test_mv = dataset.iloc[24505:]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
|
|
||||||
|
|
||||||
data = [[12, 100], [13, 200]]
|
|
||||||
|
|
||||||
for k in data:
|
|
||||||
k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
df = pd.DataFrame(data, columns=['data', 'glo_avg'])
|
|
||||||
|
|
||||||
#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')})
|
|
||||||
|
|
||||||
#print(forecasts)
|
|
||||||
|
|
||||||
f = lambda x: x + pd.to_timedelta(1, unit='h')
|
|
||||||
|
|
||||||
for ix, row in df.iterrows():
|
|
||||||
print(row['data'])
|
|
||||||
print(f(row['data']))
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Multivariate time series
|
|
||||||
'''
|
|
||||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
|
||||||
|
|
||||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
train_mv = dataset.iloc[:24505]
|
|
||||||
test_mv = dataset.iloc[24505:]
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv, partitioner_specific=sp)
|
|
||||||
|
|
||||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
|
||||||
data=train_mv)
|
|
||||||
|
|
||||||
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
|
model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS)
|
||||||
model.append_variable(vhour)
|
model.append_variable(vx)
|
||||||
model.append_variable(vavg)
|
model.append_variable(vy)
|
||||||
model.target_variable = vavg
|
model.target_variable = vx
|
||||||
model.fit(train_mv)
|
|
||||||
|
|
||||||
Util.persist_obj(model, model.shortname)
|
model.fit(df.iloc[:800])
|
||||||
'''
|
|
||||||
|
|
||||||
#model = Util.load_obj("ClusteredMVFTS")
|
df = model.predict(df.iloc[800:], type='multivariate')
|
||||||
|
|
||||||
model = Util.load_obj("ClusteredMVFTS2loadorder2knn2")
|
print(df)
|
||||||
|
|
||||||
print(model)
|
|
||||||
|
|
||||||
print(model.predict(test_mv))
|
|
||||||
|
|
||||||
'''
|
|
||||||
train_mv = {}
|
|
||||||
test_mv = {}
|
|
||||||
|
|
||||||
models = {}
|
|
||||||
|
|
||||||
for key in ['price', 'solar', 'load']:
|
|
||||||
models[key] = []
|
|
||||||
|
|
||||||
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
|
|
||||||
dataset['hour'] = dataset.index.values % 24
|
|
||||||
|
|
||||||
data = dataset['price'].values.flatten()
|
|
||||||
|
|
||||||
train_split = 24 * 800
|
|
||||||
|
|
||||||
|
|
||||||
train_mv['price'] = dataset.iloc[:train_split]
|
|
||||||
test_mv['price'] = dataset.iloc[train_split:]
|
|
||||||
|
|
||||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
|
||||||
|
|
||||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
train_mv['solar'] = dataset.iloc[:24505]
|
|
||||||
test_mv['solar'] = dataset.iloc[24505:]
|
|
||||||
|
|
||||||
from pyFTS.data import Malaysia
|
|
||||||
|
|
||||||
dataset = Malaysia.get_dataframe()
|
|
||||||
|
|
||||||
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
|
||||||
|
|
||||||
train_mv['load'] = dataset.iloc[:train_split]
|
|
||||||
test_mv['load'] = dataset.iloc[train_split:]
|
|
||||||
|
|
||||||
|
|
||||||
exogenous = {}
|
|
||||||
endogenous = {}
|
|
||||||
|
|
||||||
for key in models.keys():
|
|
||||||
exogenous[key] = {}
|
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv['price'],
|
|
||||||
partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'})
|
|
||||||
exogenous['price']['Hour'] = vhour
|
|
||||||
|
|
||||||
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55,
|
|
||||||
data=train_mv['price'])
|
|
||||||
endogenous['price'] = vprice
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv['solar'], partitioner_specific=sp)
|
|
||||||
|
|
||||||
exogenous['solar']['Hour'] = vhour
|
|
||||||
|
|
||||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
|
||||||
data=train_mv['solar'])
|
|
||||||
|
|
||||||
endogenous['solar'] = vavg
|
|
||||||
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.hour_of_day}
|
|
||||||
|
|
||||||
vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv['load'], partitioner_specific=sp)
|
|
||||||
|
|
||||||
exogenous['load']['Hour'] = vhourp
|
|
||||||
|
|
||||||
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
|
||||||
data=train_mv['load'])
|
|
||||||
|
|
||||||
endogenous['load'] = vload
|
|
||||||
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15])
|
|
||||||
|
|
||||||
parameters = [
|
|
||||||
{},{},
|
|
||||||
{'order':2, 'knn': 1},
|
|
||||||
{'order':2, 'knn': 2},
|
|
||||||
{'order':2, 'knn': 3},
|
|
||||||
]
|
|
||||||
|
|
||||||
for ct, key in enumerate(models.keys()):
|
|
||||||
|
|
||||||
for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
|
||||||
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
|
||||||
print(key, method, parameters[ct2])
|
|
||||||
model = method(**parameters[ct2])
|
|
||||||
_key2 = ""
|
|
||||||
for k in parameters[ct2].keys():
|
|
||||||
_key2 += k + str(parameters[ct2][k])
|
|
||||||
model.shortname += str(ct) + key + _key2
|
|
||||||
for var in exogenous[key].values():
|
|
||||||
model.append_variable(var)
|
|
||||||
model.append_variable(endogenous[key])
|
|
||||||
model.target_variable = endogenous[key]
|
|
||||||
model.fit(train_mv[key])
|
|
||||||
|
|
||||||
models[key].append(model.shortname)
|
|
||||||
|
|
||||||
Util.persist_obj(model, model.shortname)
|
|
||||||
|
|
||||||
del(model)
|
|
||||||
'''
|
|
Loading…
Reference in New Issue
Block a user