Bugfix in GridPartitioner, occured error when a BoxCox transformed series has -np.inf values

This commit is contained in:
Petrônio Cândido 2018-08-01 11:03:23 -03:00
parent d317fc80cf
commit a8ed5a3db7
3 changed files with 38 additions and 68 deletions

View File

@ -257,6 +257,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
for partition in partitions:
for partitioner in partitioners_methods:
print(transformation, partition)
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)

View File

@ -48,7 +48,11 @@ class Partitioner(object):
if self.indexer is not None:
ndata = self.indexer.get_data(ndata)
_min = min(ndata)
_min = np.nanmin(ndata)
if _min == -np.inf:
ndata[ndata == -np.inf] = 0
_min = np.nanmin(ndata)
if _min < 0:
self.min = _min * 1.1
else:

View File

@ -8,81 +8,46 @@ import matplotlib.pyplot as plt
from pyFTS.common import Util as cUtil
import pandas as pd
'''
from pyFTS.data import artificial
from pyFTS.data import TAIEX, NASDAQ, SP500, artificial
lmv1 = artificial.generate_gaussian_linear(1,0.2,0.2,0.05)
ts=200
ws=35
train1 = lmv1[:ts]
test1 = lmv1[ts:]
tmp_fs1 = Grid.GridPartitioner(data=train1[:50], npart=10)
fs1 = partitioners.PolynomialNonStationaryPartitioner(train1, tmp_fs1, window_size=ws, degree=1)
nsfts1 = honsfts.HighOrderNonStationaryFTS("", partitioner=fs1)
nsfts1.fit(train1, order=2, parameters=ws)
print(fs1)
print(nsfts1.predict(test1))
print(nsfts1)
util.plot_sets(fs1, tam=[10, 5], start=0, end=100, step=2, data=lmv1[:100], window_size=35)
'''
datasets = {
"TAIEX": TAIEX.get_data()[:4000],
"SP500": SP500.get_data()[10000:14000],
"NASDAQ": NASDAQ.get_data()[:4000],
# Incremental Mean and Incremental Variance
"IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
# Incremental Mean and Incremental Variance, lower bound equals to 0
"IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
# Constant Mean and Incremental Variance
"CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
# Incremental Mean and Constant Variance
"IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
}
train_split = 2000
test_length = 200
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
from pyFTS.common import Util
transformations = {'None': None, 'Differential(1)': tdiff, 'BoxCox(0)': boxcox }
from pyFTS.data import TAIEX
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen
taiex = TAIEX.get_data()
#taiex_diff = tdiff.apply(taiex)
tag = 'chen_partitioning'
train = taiex[:600]
test = taiex[600:800]
for ds in ['IMIV0']: #datasets.keys():
dataset = datasets[ds]
#fs_tmp = Grid.GridPartitioner(data=train, npart=7, transformation=tdiff)
#fs_tmp = Entropy.EntropyPartitioner(data=train, npart=7, transformation=tdiff)
fs_tmp = Grid.GridPartitioner(data=train, npart=20)
fs = partitioners.SimpleNonStationaryPartitioner(train, fs_tmp)
print(fs)
model = cvfts.ConditionalVarianceFTS(partitioner=fs,memory_window=3)
model.fit(train)
print(model)
#tmpp4 = model.predict(test, type='point')
#tmp = model.predict(test, type='interval')
#util.plot_sets_conditional(model, tdiff.apply(test), step=5, size=[10, 5])
#util.plot_sets_conditional(model, test, step=5, size=[10, 5])
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=[10, 5])
axes[0].plot(test[1:], label="Test Data")
forecasts = model.predict(test, type='point')
axes[0].plot(forecasts[:-1], label="CVFTS Forecasts")
handles0, labels0 = axes[0].get_legend_handles_labels()
lgd = axes[0].legend(handles0, labels0, loc=2)
residuals = np.array(test[1:]) - np.array(forecasts[:-1])
axes[1].plot(residuals)
axes[1].set_title("Residuals")
print("fim")
bchmk.sliding_window_benchmarks(dataset, 4000, train=0.2, inc=0.2,
methods=[chen.ConventionalFTS],
benchmark_models=False,
transformations=[boxcox], #transformations[t] for t in transformations.keys()],
partitions=np.arange(3, 40, 1),
progress=False, type='point',
file="nsfts_benchmarks.db", dataset=ds, tag=tag)