Bugfix in GridPartitioner, occured error when a BoxCox transformed series has -np.inf values

This commit is contained in:
Petrônio Cândido 2018-08-01 11:03:23 -03:00
parent d317fc80cf
commit a8ed5a3db7
3 changed files with 38 additions and 68 deletions

View File

@ -257,6 +257,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
for partition in partitions: for partition in partitions:
for partitioner in partitioners_methods: for partitioner in partitioners_methods:
print(transformation, partition)
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation) data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)

View File

@ -48,7 +48,11 @@ class Partitioner(object):
if self.indexer is not None: if self.indexer is not None:
ndata = self.indexer.get_data(ndata) ndata = self.indexer.get_data(ndata)
_min = min(ndata) _min = np.nanmin(ndata)
if _min == -np.inf:
ndata[ndata == -np.inf] = 0
_min = np.nanmin(ndata)
if _min < 0: if _min < 0:
self.min = _min * 1.1 self.min = _min * 1.1
else: else:

View File

@ -8,81 +8,46 @@ import matplotlib.pyplot as plt
from pyFTS.common import Util as cUtil from pyFTS.common import Util as cUtil
import pandas as pd import pandas as pd
''' from pyFTS.data import TAIEX, NASDAQ, SP500, artificial
from pyFTS.data import artificial
lmv1 = artificial.generate_gaussian_linear(1,0.2,0.2,0.05) datasets = {
"TAIEX": TAIEX.get_data()[:4000],
ts=200 "SP500": SP500.get_data()[10000:14000],
ws=35 "NASDAQ": NASDAQ.get_data()[:4000],
train1 = lmv1[:ts] # Incremental Mean and Incremental Variance
test1 = lmv1[ts:] "IMIV": artificial.generate_gaussian_linear(1,0.2,0.2,0.05,it=100, num=40),
# Incremental Mean and Incremental Variance, lower bound equals to 0
tmp_fs1 = Grid.GridPartitioner(data=train1[:50], npart=10) "IMIV0": artificial.generate_gaussian_linear(1,0.2,0.,0.05, vmin=0,it=100, num=40),
# Constant Mean and Incremental Variance
fs1 = partitioners.PolynomialNonStationaryPartitioner(train1, tmp_fs1, window_size=ws, degree=1) "CMIV": artificial.generate_gaussian_linear(5,0.1,0,0.02,it=100, num=40),
# Incremental Mean and Constant Variance
nsfts1 = honsfts.HighOrderNonStationaryFTS("", partitioner=fs1) "IMCV": artificial.generate_gaussian_linear(1,0.6,0.1,0,it=100, num=40)
}
nsfts1.fit(train1, order=2, parameters=ws)
print(fs1)
print(nsfts1.predict(test1))
print(nsfts1)
util.plot_sets(fs1, tam=[10, 5], start=0, end=100, step=2, data=lmv1[:100], window_size=35)
'''
train_split = 2000
test_length = 200
from pyFTS.common import Transformations from pyFTS.common import Transformations
tdiff = Transformations.Differential(1) tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
from pyFTS.common import Util transformations = {'None': None, 'Differential(1)': tdiff, 'BoxCox(0)': boxcox }
from pyFTS.data import TAIEX from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen
taiex = TAIEX.get_data() tag = 'chen_partitioning'
#taiex_diff = tdiff.apply(taiex)
train = taiex[:600] for ds in ['IMIV0']: #datasets.keys():
test = taiex[600:800] dataset = datasets[ds]
#fs_tmp = Grid.GridPartitioner(data=train, npart=7, transformation=tdiff) bchmk.sliding_window_benchmarks(dataset, 4000, train=0.2, inc=0.2,
#fs_tmp = Entropy.EntropyPartitioner(data=train, npart=7, transformation=tdiff) methods=[chen.ConventionalFTS],
fs_tmp = Grid.GridPartitioner(data=train, npart=20) benchmark_models=False,
transformations=[boxcox], #transformations[t] for t in transformations.keys()],
fs = partitioners.SimpleNonStationaryPartitioner(train, fs_tmp) partitions=np.arange(3, 40, 1),
progress=False, type='point',
print(fs) file="nsfts_benchmarks.db", dataset=ds, tag=tag)
model = cvfts.ConditionalVarianceFTS(partitioner=fs,memory_window=3)
model.fit(train)
print(model)
#tmpp4 = model.predict(test, type='point')
#tmp = model.predict(test, type='interval')
#util.plot_sets_conditional(model, tdiff.apply(test), step=5, size=[10, 5])
#util.plot_sets_conditional(model, test, step=5, size=[10, 5])
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=[10, 5])
axes[0].plot(test[1:], label="Test Data")
forecasts = model.predict(test, type='point')
axes[0].plot(forecasts[:-1], label="CVFTS Forecasts")
handles0, labels0 = axes[0].get_legend_handles_labels()
lgd = axes[0].legend(handles0, labels0, loc=2)
residuals = np.array(test[1:]) - np.array(forecasts[:-1])
axes[1].plot(residuals)
axes[1].set_title("Residuals")
print("fim")