Small bugfixes in benchmarks and benchmark models

This commit is contained in:
Petrônio Cândido 2018-04-26 11:53:53 -03:00
parent 95c6e90bfb
commit 191ddf90d8
7 changed files with 63 additions and 54 deletions

View File

@ -219,10 +219,14 @@ def brier_score(targets, densities):
'''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 13. '''
ret = []
for ct, d in enumerate(densities):
try:
v = d.bin_index.find_ge(targets[ct])
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
score += (d.distribution[v] - 1) ** 2
ret.append(score)
except ValueError as ex:
ret.append(sum([d.distribution[k] ** 2 for k in d.bins]))
return sum(ret)/len(ret)

View File

@ -168,14 +168,15 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
mfts.order = 1
pool.append(mfts)
benchmark_models = __pop("benchmark_models", None, kwargs)
benchmark_models = __pop("benchmark_models", False, kwargs)
if benchmark_models != False:
benchmark_methods = __pop("benchmark_methods", None, kwargs)
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
benchmark_pool = [] if benchmark_models is None else benchmark_models
if benchmark_models != False:
benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
else benchmark_models
if benchmark_models is None and benchmark_methods is None:
if type == 'point'or type == 'partition':
@ -228,8 +229,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if progress:
progressbar.update(windowsize * inc)
partitioners_pool = []
if benchmark_models != False:
for model in benchmark_pool:
for step in steps_ahead:
kwargs['steps_ahead'] = step
@ -241,6 +241,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
jobs.append(job)
partitioners_pool = []
if partitioners_models is None:

View File

@ -26,12 +26,15 @@ class KNearestNeighbors(fts.FTS):
self.order = kwargs.get("order", 1)
self.lag = None
self.k = kwargs.get("k", 30)
self.uod = None
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
self.data = data
self.data = np.array(data)
self.original_max = max(data)
self.original_min = min(data)
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
@ -47,8 +50,8 @@ class KNearestNeighbors(fts.FTS):
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
ix = np.argsort(np.array(dist)) + self.order + 1
ix = np.clip(ix, 0, len(self.data)-1 )
return self.data[ix[:self.k]]
ix2 = np.clip(ix[:self.k], 0, len(self.data)-1)
return self.data[ix2]
def forecast_distribution(self, data, **kwargs):
ret = []

View File

@ -3,6 +3,15 @@ import pandas as pd
import numpy as np
def get_data():
"""
Get the univariate time series data.
:return: numpy array
"""
dat = get_dataframe()
return np.array(dat["Avg"])
def get_dataframe():
"""
Get the complete multivariate time series data.
@ -11,6 +20,5 @@ def get_dataframe():
dat = common.get_dataframe('SP500.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SP500.csv.bz2',
sep=",", compression='bz2')
dat = np.array(dat["Avg"])
return dat

View File

@ -5,7 +5,7 @@ import numpy as np
def get_data():
"""
:param field: the dataset field name to extract
Get the univariate time series data.
:return: numpy array
"""
dat = get_dataframe()

View File

@ -88,7 +88,7 @@ class ProbabilityDistribution(object):
for k in values:
if self.type == "histogram":
v = self.bin_index.find_ge(k)
ret.append(self.distribution[v] / self.count)
ret.append(self.distribution[v] / (self.count + 1e-5))
elif self.type == "KDE":
v = self.kde.probability(k, self.data)
ret.append(v)
@ -119,7 +119,6 @@ class ProbabilityDistribution(object):
self.cdf = None
self.qtl = None
def expected_value(self):
return np.nansum([v * self.distribution[v] for v in self.bins])

View File

@ -11,9 +11,11 @@ from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
from pyFTS.data import TAIEX
from pyFTS.data import TAIEX, SP500
dataset = TAIEX.get_data()
#dataset = TAIEX.get_data()
dataset = SP500.get_data()[11500:16000]
#print(len(dataset))
'''
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff)
@ -25,13 +27,17 @@ from pyFTS.models import pwfts, song, ifts
from pyFTS.models.ensemble import ensemble
'''
model = knn.KNearestNeighbors("")
#model = knn.KNearestNeighbors("")
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
#model = arima.ARIMA("", order=(2,0,2))
#model = quantreg.QuantileRegression("", order=2, dist=True)
model.append_transformation(tdiff)
model.fit(dataset[:800])
Measures.get_distribution_statistics(dataset[800:1000], model)
#tmp = model.predict(dataset[800:1000], type='distribution')
#for tmp2 in tmp:
# print(tmp2)
'''
#'''
'''
@ -51,28 +57,16 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
#methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=[],
benchmark_methods=[arima.ARIMA for k in range(4)]
+ [quantreg.QuantileRegression for k in range(2)]
+ [knn.KNearestNeighbors],
benchmark_methods_parameters=[
{'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 1)},
{'order': (2, 0, 2)},
{'order': 1, 'dist': True},
{'order': 2, 'dist': True},
{}
],
#transformations=[tdiff],
orders=[1],
partitions=np.arange(30, 80, 5),
progress=False, type='distribution',
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[tdiff],
orders=[1,2,3],
partitions=np.arange(3, 50, 2),
progress=False, type='point',
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.db", dataset="SP500", tag="partitioning")
#'''