Bugfix on FTS.uod_clip for MVFTS methods

This commit is contained in:
Petrônio Cândido 2019-06-17 09:25:47 -03:00
parent 67b2104445
commit c7dd392575
4 changed files with 72 additions and 6 deletions

View File

@ -17,7 +17,7 @@ from pyFTS.distributed import dispy as dUtil
__measures = ['f1', 'f2', 'rmse', 'size'] __measures = ['f1', 'f2', 'rmse', 'size']
#
def genotype(mf, npart, partitioner, order, alpha, lags, f1, f2): def genotype(mf, npart, partitioner, order, alpha, lags, f1, f2):
''' '''
Create the individual genotype Create the individual genotype

View File

@ -32,6 +32,7 @@ class MVFTS(fts.FTS):
self.is_multivariate = True self.is_multivariate = True
self.shortname = "MVFTS" self.shortname = "MVFTS"
self.name = "Multivariate FTS" self.name = "Multivariate FTS"
self.uod_clip = False
def append_variable(self, var): def append_variable(self, var):
""" """

View File

@ -52,6 +52,21 @@ datasets['TAIEX'] = TAIEX.get_data()[:5000]
datasets['NASDAQ'] = NASDAQ.get_data()[:5000] datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
datasets['SP500'] = SP500.get_data()[10000:15000] datasets['SP500'] = SP500.get_data()[10000:15000]
#'''
for dataset_name, dataset in datasets.items():
bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[None],
orders=[1, 2, 3],
partitions=np.arange(10, 100, 5),
progress=False, type='point',
distributed=True, nodes=['192.168.254.113'],
file="experiments.db", dataset=dataset_name,
tag="gridsearch")
'''
competitor_methods = [] competitor_methods = []
competitor_methods.extend([arima.ARIMA]*3) competitor_methods.extend([arima.ARIMA]*3)
competitor_methods.extend([quantreg.QuantileRegression]*2) competitor_methods.extend([quantreg.QuantileRegression]*2)
@ -89,9 +104,9 @@ for dataset_name, dataset in datasets.items():
partitions=[35], partitions=[35],
steps_ahead=[10], steps_ahead=[10],
progress=False, type='point', progress=False, type='point',
distributed=False, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'], distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="tmp.db", dataset=dataset_name, file="experiments.db", dataset=dataset_name,
tag="experiments") tag="experiments")
#''' '''

View File

@ -21,6 +21,55 @@ from pyFTS.common import Membership
import os import os
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_uv = dataset['glo_avg'].values[:24505]
test_uv = dataset['glo_avg'].values[24505:]
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from itertools import product
levels = ['VL', 'L', 'M', 'H', 'VH']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
'Jun','Jul', 'Aug','Sep','Oct',
'Nov','Dec']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
order=2, knn=1)
model.fit(train_mv)
forecasts = model.predict(test_mv.iloc[:100])
print(forecasts)
''' '''
from pyFTS.data import lorentz from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000) df = lorentz.get_dataframe(iterations=5000)
@ -76,7 +125,7 @@ for horizon in [1, 25, 50, 75, 100]:
final = pd.DataFrame(rows, columns=columns) final = pd.DataFrame(rows, columns=columns)
final.to_csv('gmvfts_lorentz1.csv',sep=';',index=False) final.to_csv('gmvfts_lorentz1.csv',sep=';',index=False)
'''
import pandas as pd import pandas as pd
df = pd.read_csv('https://query.data.world/s/ftb7bzgobr6bsg6bsuxuqowja6ew4r') df = pd.read_csv('https://query.data.world/s/ftb7bzgobr6bsg6bsuxuqowja6ew4r')
@ -147,3 +196,4 @@ for horizon in [1, 25, 50, 75, 100]:
final = pd.DataFrame(rows, columns=columns) final = pd.DataFrame(rows, columns=columns)
final.to_csv('gmvfts_gefcom12.csv', sep=';', index=False) final.to_csv('gmvfts_gefcom12.csv', sep=';', index=False)
'''