Small bugfixes in benchmarks and benchmark models
This commit is contained in:
parent
95c6e90bfb
commit
191ddf90d8
@ -219,10 +219,14 @@ def brier_score(targets, densities):
|
||||
'''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
|
||||
ret = []
|
||||
for ct, d in enumerate(densities):
|
||||
v = d.bin_index.find_ge(targets[ct])
|
||||
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
|
||||
score += (d.distribution[v] - 1) ** 2
|
||||
ret.append(score)
|
||||
try:
|
||||
v = d.bin_index.find_ge(targets[ct])
|
||||
|
||||
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
|
||||
score += (d.distribution[v] - 1) ** 2
|
||||
ret.append(score)
|
||||
except ValueError as ex:
|
||||
ret.append(sum([d.distribution[k] ** 2 for k in d.bins]))
|
||||
return sum(ret)/len(ret)
|
||||
|
||||
|
||||
|
@ -168,15 +168,16 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
mfts.order = 1
|
||||
pool.append(mfts)
|
||||
|
||||
benchmark_models = __pop("benchmark_models", None, kwargs)
|
||||
|
||||
benchmark_methods = __pop("benchmark_methods", None, kwargs)
|
||||
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
|
||||
|
||||
benchmark_pool = [] if benchmark_models is None else benchmark_models
|
||||
benchmark_models = __pop("benchmark_models", False, kwargs)
|
||||
|
||||
if benchmark_models != False:
|
||||
|
||||
benchmark_methods = __pop("benchmark_methods", None, kwargs)
|
||||
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
|
||||
|
||||
benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
|
||||
else benchmark_models
|
||||
|
||||
if benchmark_models is None and benchmark_methods is None:
|
||||
if type == 'point'or type == 'partition':
|
||||
benchmark_methods = get_benchmark_point_methods()
|
||||
@ -228,20 +229,20 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
if progress:
|
||||
progressbar.update(windowsize * inc)
|
||||
|
||||
if benchmark_models != False:
|
||||
for model in benchmark_pool:
|
||||
for step in steps_ahead:
|
||||
kwargs['steps_ahead'] = step
|
||||
|
||||
if not distributed:
|
||||
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
else:
|
||||
job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
partitioners_pool = []
|
||||
|
||||
for model in benchmark_pool:
|
||||
for step in steps_ahead:
|
||||
kwargs['steps_ahead'] = step
|
||||
|
||||
if not distributed:
|
||||
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
else:
|
||||
job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
|
||||
if partitioners_models is None:
|
||||
|
||||
for transformation in transformations:
|
||||
|
@ -26,12 +26,15 @@ class KNearestNeighbors(fts.FTS):
|
||||
self.order = kwargs.get("order", 1)
|
||||
self.lag = None
|
||||
self.k = kwargs.get("k", 30)
|
||||
self.uod = None
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('order', None) is not None:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
self.data = data
|
||||
self.data = np.array(data)
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||
|
||||
@ -47,8 +50,8 @@ class KNearestNeighbors(fts.FTS):
|
||||
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
|
||||
ix = np.argsort(np.array(dist)) + self.order + 1
|
||||
|
||||
ix = np.clip(ix, 0, len(self.data)-1 )
|
||||
return self.data[ix[:self.k]]
|
||||
ix2 = np.clip(ix[:self.k], 0, len(self.data)-1)
|
||||
return self.data[ix2]
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
ret = []
|
||||
|
@ -3,6 +3,15 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_data():
|
||||
"""
|
||||
Get the univariate time series data.
|
||||
:return: numpy array
|
||||
"""
|
||||
dat = get_dataframe()
|
||||
return np.array(dat["Avg"])
|
||||
|
||||
|
||||
def get_dataframe():
|
||||
"""
|
||||
Get the complete multivariate time series data.
|
||||
@ -11,6 +20,5 @@ def get_dataframe():
|
||||
dat = common.get_dataframe('SP500.csv.bz2',
|
||||
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SP500.csv.bz2',
|
||||
sep=",", compression='bz2')
|
||||
dat = np.array(dat["Avg"])
|
||||
return dat
|
||||
|
||||
|
@ -5,7 +5,7 @@ import numpy as np
|
||||
|
||||
def get_data():
|
||||
"""
|
||||
:param field: the dataset field name to extract
|
||||
Get the univariate time series data.
|
||||
:return: numpy array
|
||||
"""
|
||||
dat = get_dataframe()
|
||||
|
@ -88,7 +88,7 @@ class ProbabilityDistribution(object):
|
||||
for k in values:
|
||||
if self.type == "histogram":
|
||||
v = self.bin_index.find_ge(k)
|
||||
ret.append(self.distribution[v] / self.count)
|
||||
ret.append(self.distribution[v] / (self.count + 1e-5))
|
||||
elif self.type == "KDE":
|
||||
v = self.kde.probability(k, self.data)
|
||||
ret.append(v)
|
||||
@ -119,7 +119,6 @@ class ProbabilityDistribution(object):
|
||||
self.cdf = None
|
||||
self.qtl = None
|
||||
|
||||
|
||||
def expected_value(self):
|
||||
return np.nansum([v * self.distribution[v] for v in self.bins])
|
||||
|
||||
|
@ -11,9 +11,11 @@ from pyFTS.common import Transformations
|
||||
|
||||
tdiff = Transformations.Differential(1)
|
||||
|
||||
from pyFTS.data import TAIEX
|
||||
from pyFTS.data import TAIEX, SP500
|
||||
|
||||
dataset = TAIEX.get_data()
|
||||
#dataset = TAIEX.get_data()
|
||||
dataset = SP500.get_data()[11500:16000]
|
||||
#print(len(dataset))
|
||||
'''
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff)
|
||||
@ -25,13 +27,17 @@ from pyFTS.models import pwfts, song, ifts
|
||||
from pyFTS.models.ensemble import ensemble
|
||||
|
||||
'''
|
||||
model = knn.KNearestNeighbors("")
|
||||
#model = knn.KNearestNeighbors("")
|
||||
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
|
||||
#model = arima.ARIMA("", order=(2,0,2))
|
||||
#model = quantreg.QuantileRegression("", order=2, dist=True)
|
||||
model.append_transformation(tdiff)
|
||||
model.fit(dataset[:800])
|
||||
Measures.get_distribution_statistics(dataset[800:1000], model)
|
||||
#tmp = model.predict(dataset[800:1000], type='distribution')
|
||||
#for tmp2 in tmp:
|
||||
# print(tmp2)
|
||||
'''
|
||||
#'''
|
||||
|
||||
|
||||
'''
|
||||
@ -51,28 +57,16 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
|
||||
|
||||
from pyFTS.benchmarks import arima, naive, quantreg
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
|
||||
#methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=[],
|
||||
benchmark_methods=[arima.ARIMA for k in range(4)]
|
||||
+ [quantreg.QuantileRegression for k in range(2)]
|
||||
+ [knn.KNearestNeighbors],
|
||||
benchmark_methods_parameters=[
|
||||
{'order': (1, 0, 0)},
|
||||
{'order': (1, 0, 1)},
|
||||
{'order': (2, 0, 1)},
|
||||
{'order': (2, 0, 2)},
|
||||
{'order': 1, 'dist': True},
|
||||
{'order': 2, 'dist': True},
|
||||
{}
|
||||
],
|
||||
#transformations=[tdiff],
|
||||
orders=[1],
|
||||
partitions=np.arange(30, 80, 5),
|
||||
progress=False, type='distribution',
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
transformations=[tdiff],
|
||||
orders=[1,2,3],
|
||||
partitions=np.arange(3, 50, 2),
|
||||
progress=False, type='point',
|
||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.db", dataset="SP500", tag="partitioning")
|
||||
|
||||
|
||||
#'''
|
||||
|
Loading…
Reference in New Issue
Block a user