pyFTS/pyFTS/tests/multivariate.py
2019-06-17 10:57:09 -03:00

202 lines
7.6 KiB
Python

import numpy as np
import pandas as pd
import time
from pyFTS.data import Enrollments, TAIEX, SONDA
from pyFTS.partitioners import Grid, Simple, Entropy
from pyFTS.common import Util
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
from pyFTS.common import Util as cUtil
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid
from pyFTS.common import Membership
import os
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_uv = dataset['glo_avg'].values[:24505]
test_uv = dataset['glo_avg'].values[24505:]
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from itertools import product
levels = ['VL', 'L', 'M', 'H', 'VH']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
'Jun','Jul', 'Aug','Sep','Oct',
'Nov','Dec']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
order=2, knn=1)
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model.fit(train_mv)
generator = lambda x : x + pd.to_timedelta(1, unit='h')
forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} )
print(forecasts)
'''
from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000)
train = df.iloc[:4000]
#test = df.iloc[4000:]
npart=120
import sys
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=npart, data=train)
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.5), data=train)
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.2), data=train)
rows = []
for ct, train, test in cUtil.sliding_window(df, windowsize=4100, train=.97, inc=.05):
print('Window {}'.format(ct))
for order in [1, 2, 3]:
for knn in [1, 2, 3]:
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=order,
knn=knn)
model.fit(train)
forecasts1 = model.predict(test, type='multivariate')
forecasts2 = model.predict(test, type='multivariate', steps_ahead=100)
for var in ['x', 'y', 'z']:
row = [order, knn, var, len(model)]
for horizon in [1, 25, 50, 75, 100]:
if horizon == 1:
row.append( Measures.mape(test[var].values[model.order:model.order+10],
forecasts1[var].values[:10]))
else:
row.append( Measures.mape(test[var].values[:horizon],
forecasts2[var].values[:horizon]))
print(row)
rows.append(row)
columns = ['Order', 'knn', 'var', 'Rules']
for horizon in [1, 25, 50, 75, 100]:
columns.append('h{}'.format(horizon))
final = pd.DataFrame(rows, columns=columns)
final.to_csv('gmvfts_lorentz1.csv',sep=';',index=False)
import pandas as pd
df = pd.read_csv('https://query.data.world/s/ftb7bzgobr6bsg6bsuxuqowja6ew4r')
#df.dropna()
mload = np.nanmean(df["load"].values)
df['load'] = np.where(pd.isna(df["load"]), mload, df["load"])
mtemp = np.nanmean(df["temperature"].values)
df['temperature'] = np.where(pd.isna(df["temperature"]), mtemp, df["temperature"])
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
df['hour'] = np.float64(df['date'].apply(lambda x: x.strftime('%H')))
df['weekday'] = np.float64(df['date'].apply(lambda x: x.strftime('%w')))
df['month'] = np.float64(df['date'].apply(lambda x: x.strftime('%m')))
train_mv = df.iloc[:31000]
test_mv = df.iloc[:31000:]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=20, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
rows = []
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
for ct, train, test in cUtil.sliding_window(df, windowsize=32000, train=.98, inc=.05):
print('Window {}'.format(ct))
for order in [1, 2, 3]:
for knn in [1, 2, 3]:
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
order=order, knn=knn)
model.fit(train)
forecasts1 = model.predict(test, type='multivariate')
forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
steps_ahead=100)
for var in ['temperature','load']:
row = [order, knn, var, len(model)]
for horizon in [1, 25, 50, 75, 100]:
if horizon == 1:
row.append(Measures.mape(test[var].values[model.order:model.order + 10],
forecasts1[var].values[:10]))
else:
row.append(Measures.mape(test[var].values[:horizon],
forecasts2[var].values[:horizon]))
print(row)
rows.append(row)
columns = ['Order', 'knn', 'var', 'Rules']
for horizon in [1, 25, 50, 75, 100]:
columns.append('h{}'.format(horizon))
final = pd.DataFrame(rows, columns=columns)
final.to_csv('gmvfts_gefcom12.csv', sep=';', index=False)
'''