diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 53d3b5b..188053c 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -88,7 +88,7 @@ class FTS(object): :param data: time series with minimal length to the order of the model - :keyword type: the forecasting type, one of these values: point(default), interval or distribution. + :keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate. :keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast :keyword start: in the multi step forecasting, the index of the data where to start forecasting :keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster @@ -130,6 +130,8 @@ class FTS(object): ret = self.forecast_interval(ndata, **kwargs) elif type == 'distribution': ret = self.forecast_distribution(ndata, **kwargs) + elif type == 'multivariate': + ret = self.forecast_multivariate(ndata, **kwargs) elif steps_ahead > 1: if type == 'point': ret = self.forecast_ahead(ndata, steps_ahead, **kwargs) @@ -137,8 +139,10 @@ class FTS(object): ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs) elif type == 'distribution': ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) + elif type == 'multivariate': + ret = self.forecast_ahead_multivariate(ndata, **kwargs) - if not ['point', 'interval', 'distribution'].__contains__(type): + if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type): raise ValueError('The argument \'type\' has an unknown value.') else: @@ -184,6 +188,16 @@ class FTS(object): """ raise NotImplementedError('This model do not perform one step ahead distribution forecasts!') + def forecast_multivariate(self, data, **kwargs): + """ + Multivariate forecast one step ahead + + :param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model + :param kwargs: model specific parameters + :return: a Pandas Dataframe object representing the forecasted values for each variable + """ + raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!') + def forecast_ahead(self, data, steps, **kwargs): """ Point forecast n steps ahead @@ -233,6 +247,17 @@ class FTS(object): """ raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!') + def forecast_ahead_multivariate(self, data, steps, **kwargs): + """ + Multivariate forecast n step ahead + + :param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model + :param steps: the number of steps ahead to forecast + :param kwargs: model specific parameters + :return: a Pandas Dataframe object representing the forecasted values for each variable + """ + raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!') + def train(self, data, **kwargs): """ Method specific parameter fitting diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index de03bc3..e9db220 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -1,5 +1,6 @@ import numpy as np +import pandas as pd from pyFTS.common import FuzzySet, FLR, fts, flrg from pyFTS.models import hofts from pyFTS.models.multivariate import mvfts, grid, common @@ -55,24 +56,38 @@ class ClusteredMVFTS(mvfts.MVFTS): if self.model.is_high_order: self.model.order = self.order - if self.pre_fuzzyfy: - ndata = self.fuzzyfy(data) - else: - ndata = [self.format_data(k) for k in data.to_dict('records')] + ndata = self.check_data(data) self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy) self.cluster.prune() + def check_data(self, data): + if self.pre_fuzzyfy: + ndata = self.fuzzyfy(data) + else: + ndata = [self.format_data(k) for k in data.to_dict('records')] + + return ndata + def forecast(self, ndata, **kwargs): - if self.pre_fuzzyfy: - ndata = self.fuzzyfy(ndata) - else: - ndata = [self.format_data(k) for k in ndata.to_dict('records')] + ndata = self.check_data(ndata) return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs) + def forecast_multivariate(self, data, **kwargs): + + ndata = self.check_data(data) + + ret = {} + for var in self.explanatory_variables: + self.cluster.change_target_variable(var) + ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs) + + columns = ret.keys() + return pd.DataFrame(ret, columns=columns) + def __str__(self): """String representation of the model""" return str(self.model) diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 0441581..caafbc3 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -86,7 +86,6 @@ class GridCluster(partitioner.Partitioner): sys.setrecursionlimit(1000) - def knn(self, data): tmp = [data[k.name] for k in self.mvfts.explanatory_variables] @@ -106,3 +105,7 @@ class GridCluster(partitioner.Partitioner): def fuzzyfy(self, data, **kwargs): return fuzzyfy_instance_clustered(data, self, **kwargs) + + def change_target_variable(self, variable): + for fset in self.sets: + self.sets[fset].target_variable = variable diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 4dba416..6960ba5 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -18,184 +18,21 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime -dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') +from pyFTS.data import henon +df = henon.get_dataframe(iterations=1000) -dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') +from pyFTS.models.multivariate import variable, cmvfts -data = dataset['glo_avg'].values - -train_mv = dataset.iloc[:24505] -test_mv = dataset.iloc[24505:] - - - -''' -model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3') - -data = [[12, 100], [13, 200]] - -for k in data: - k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S') - -df = pd.DataFrame(data, columns=['data', 'glo_avg']) - -#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')}) - -#print(forecasts) - -f = lambda x: x + pd.to_timedelta(1, unit='h') - -for ix, row in df.iterrows(): - print(row['data']) - print(f(row['data'])) -''' - -# Multivariate time series -''' -dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') - -dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') - -train_mv = dataset.iloc[:24505] -test_mv = dataset.iloc[24505:] - -sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} - -vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv, partitioner_specific=sp) - -vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', - partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, - data=train_mv) +vx = variable.Variable("x", data_label="x", partitioner=Grid.GridPartitioner, npart=15, data=df) +vy = variable.Variable("y", data_label="y", partitioner=Grid.GridPartitioner, npart=15, data=df) model = cmvfts.ClusteredMVFTS(pre_fuzzyfy=False, knn=3, fts_method=pwfts.ProbabilisticWeightedFTS) -model.append_variable(vhour) -model.append_variable(vavg) -model.target_variable = vavg -model.fit(train_mv) +model.append_variable(vx) +model.append_variable(vy) +model.target_variable = vx -Util.persist_obj(model, model.shortname) -''' +model.fit(df.iloc[:800]) -#model = Util.load_obj("ClusteredMVFTS") +df = model.predict(df.iloc[800:], type='multivariate') -model = Util.load_obj("ClusteredMVFTS2loadorder2knn2") - -print(model) - -print(model.predict(test_mv)) - -''' -train_mv = {} -test_mv = {} - -models = {} - -for key in ['price', 'solar', 'load']: - models[key] = [] - -dataset = pd.read_csv('/home/petronio/Downloads/priceHong') -dataset['hour'] = dataset.index.values % 24 - -data = dataset['price'].values.flatten() - -train_split = 24 * 800 - - -train_mv['price'] = dataset.iloc[:train_split] -test_mv['price'] = dataset.iloc[train_split:] - -dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') - -dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') - -train_mv['solar'] = dataset.iloc[:24505] -test_mv['solar'] = dataset.iloc[24505:] - -from pyFTS.data import Malaysia - -dataset = Malaysia.get_dataframe() - -dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p') - -train_mv['load'] = dataset.iloc[:train_split] -test_mv['load'] = dataset.iloc[train_split:] - - -exogenous = {} -endogenous = {} - -for key in models.keys(): - exogenous[key] = {} - -vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv['price'], - partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'}) -exogenous['price']['Hour'] = vhour - -vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55, - data=train_mv['price']) -endogenous['price'] = vprice - - - -sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} - -vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv['solar'], partitioner_specific=sp) - -exogenous['solar']['Hour'] = vhour - -vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', - partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, - data=train_mv['solar']) - -endogenous['solar'] = vavg - - -sp = {'seasonality': DateTime.hour_of_day} - -vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv['load'], partitioner_specific=sp) - -exogenous['load']['Hour'] = vhourp - -vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10, - data=train_mv['load']) - -endogenous['load'] = vload - - -from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts - -fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15]) - -parameters = [ - {},{}, - {'order':2, 'knn': 1}, - {'order':2, 'knn': 2}, - {'order':2, 'knn': 3}, -] - -for ct, key in enumerate(models.keys()): - - for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, - cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]): - print(key, method, parameters[ct2]) - model = method(**parameters[ct2]) - _key2 = "" - for k in parameters[ct2].keys(): - _key2 += k + str(parameters[ct2][k]) - model.shortname += str(ct) + key + _key2 - for var in exogenous[key].values(): - model.append_variable(var) - model.append_variable(endogenous[key]) - model.target_variable = endogenous[key] - model.fit(train_mv[key]) - - models[key].append(model.shortname) - - Util.persist_obj(model, model.shortname) - - del(model) -''' \ No newline at end of file +print(df) \ No newline at end of file