Bugfixes in Measures and optimizations on benchmarks
This commit is contained in:
parent
a446cec232
commit
95c6e90bfb
@ -239,6 +239,9 @@ def pmf_to_cdf(density):
|
||||
return df
|
||||
|
||||
|
||||
def heavyside(bin, target):
|
||||
return 1 if bin >= target else 0
|
||||
|
||||
def heavyside_cdf(bins, targets):
|
||||
ret = []
|
||||
for t in targets:
|
||||
@ -255,24 +258,13 @@ def crps(targets, densities):
|
||||
:return: float
|
||||
'''
|
||||
_crps = float(0.0)
|
||||
if isinstance(densities, pd.DataFrame):
|
||||
l = len(densities.columns)
|
||||
n = len(densities.index)
|
||||
Ff = pmf_to_cdf(densities)
|
||||
Fa = heavyside_cdf(densities.columns, targets)
|
||||
for k in densities.index:
|
||||
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
||||
elif isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
|
||||
l = len(densities.bins)
|
||||
n = 1
|
||||
Fa = heavyside_cdf(densities.bins, targets)
|
||||
_crps = sum([(densities.cummulative(val) - Fa[val][0]) ** 2 for val in densities.bins])
|
||||
elif isinstance(densities, list):
|
||||
l = len(densities[0].bins)
|
||||
n = len(densities)
|
||||
Fa = heavyside_cdf(densities[0].bins, targets)
|
||||
for df in densities:
|
||||
_crps += sum([(df.cummulative(val) - Fa[val][0]) ** 2 for val in df.bins])
|
||||
if isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
|
||||
densities = [densities]
|
||||
|
||||
l = len(densities[0].bins)
|
||||
n = len(densities)
|
||||
for ct, df in enumerate(densities):
|
||||
_crps += sum([(df.cummulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||
|
||||
return _crps / float(l * n)
|
||||
|
||||
@ -387,8 +379,9 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
_s1 = time.time()
|
||||
forecasts = model.predict(data, **kwargs)
|
||||
_e1 = time.time()
|
||||
ret.append(round(crps(data[model.order:], forecasts), 3))
|
||||
ret.append(round(crps(data[model.order:], forecasts[:-1]), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
ret.append(round(brier_score(data[model.order:], forecasts[:-1]), 3))
|
||||
else:
|
||||
skip = kwargs.get('steps_ahead_sampler', 1)
|
||||
forecasts = []
|
||||
@ -402,6 +395,7 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
start = model.order + steps_ahead
|
||||
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
|
||||
return ret
|
||||
|
||||
|
||||
|
@ -48,6 +48,38 @@ def __pop(key, default, kwargs):
|
||||
return default
|
||||
|
||||
|
||||
def get_benchmark_point_methods():
|
||||
"""Return all non FTS methods for point forecasting"""
|
||||
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
|
||||
def get_point_methods():
|
||||
"""Return all FTS methods for point forecasting"""
|
||||
return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
|
||||
pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_benchmark_interval_methods():
|
||||
"""Return all non FTS methods for point_to_interval forecasting"""
|
||||
return [ arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
|
||||
def get_interval_methods():
|
||||
"""Return all FTS methods for point_to_interval forecasting"""
|
||||
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_probabilistic_methods():
|
||||
"""Return all FTS methods for probabilistic forecasting"""
|
||||
return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_benchmark_probabilistic_methods():
|
||||
"""Return all FTS methods for probabilistic forecasting"""
|
||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||
|
||||
|
||||
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
"""
|
||||
Sliding window benchmarks for FTS forecasters.
|
||||
@ -141,6 +173,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
benchmark_methods = __pop("benchmark_methods", None, kwargs)
|
||||
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
|
||||
|
||||
benchmark_pool = [] if benchmark_models is None else benchmark_models
|
||||
|
||||
if benchmark_models != False:
|
||||
|
||||
if benchmark_models is None and benchmark_methods is None:
|
||||
@ -151,13 +185,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
elif type == 'distribution':
|
||||
benchmark_methods = get_benchmark_probabilistic_methods()
|
||||
|
||||
if isinstance(benchmark_models, list) :
|
||||
pool.extend(benchmark_models)
|
||||
elif benchmark_methods is not None:
|
||||
for count, model in enumerate(benchmark_methods, start=0):
|
||||
par = benchmark_methods_parameters[count]
|
||||
mfts = model("", **par)
|
||||
pool.append(mfts)
|
||||
if benchmark_methods is not None:
|
||||
for transformation in transformations:
|
||||
for count, model in enumerate(benchmark_methods, start=0):
|
||||
par = benchmark_methods_parameters[count]
|
||||
mfts = model("", **par)
|
||||
mfts.append_transformation(transformation)
|
||||
benchmark_pool.append(mfts)
|
||||
|
||||
if type == 'point':
|
||||
experiment_method = run_point
|
||||
@ -184,6 +218,10 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
|
||||
inc = __pop("inc", 0.1, kwargs)
|
||||
|
||||
file = kwargs.get('file', "benchmarks.db")
|
||||
|
||||
conn = bUtil.open_benchmark_db(file)
|
||||
|
||||
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||
experiments += 1
|
||||
|
||||
@ -192,6 +230,18 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
|
||||
partitioners_pool = []
|
||||
|
||||
for model in benchmark_pool:
|
||||
for step in steps_ahead:
|
||||
kwargs['steps_ahead'] = step
|
||||
|
||||
if not distributed:
|
||||
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
else:
|
||||
job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
|
||||
if partitioners_models is None:
|
||||
|
||||
for transformation in transformations:
|
||||
@ -210,10 +260,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
if progress:
|
||||
rng1 = tqdm(steps_ahead, desc="Steps")
|
||||
|
||||
file = kwargs.get('file', "benchmarks.db")
|
||||
|
||||
conn = bUtil.open_benchmark_db(file)
|
||||
|
||||
for step in rng1:
|
||||
rng2 = partitioners_pool
|
||||
|
||||
@ -267,36 +313,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_benchmark_point_methods():
|
||||
"""Return all non FTS methods for point forecasting"""
|
||||
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
|
||||
def get_point_methods():
|
||||
"""Return all FTS methods for point forecasting"""
|
||||
return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
|
||||
pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_benchmark_interval_methods():
|
||||
"""Return all non FTS methods for point_to_interval forecasting"""
|
||||
return [ arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
|
||||
def get_interval_methods():
|
||||
"""Return all FTS methods for point_to_interval forecasting"""
|
||||
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_probabilistic_methods():
|
||||
"""Return all FTS methods for probabilistic forecasting"""
|
||||
return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
|
||||
def get_benchmark_probabilistic_methods():
|
||||
"""Return all FTS methods for probabilistic forecasting"""
|
||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||
|
||||
|
||||
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
||||
@ -336,7 +352,6 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
|
||||
|
||||
if mfts.benchmark_only:
|
||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
|
||||
mfts.append_transformation(partitioner.transformation)
|
||||
else:
|
||||
pttr = str(partitioner.__module__).split('.')[-1]
|
||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||
@ -347,7 +362,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
|
||||
_key += str(method) if method is not None else ""
|
||||
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, order=mfts.order, **kwargs)
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
@ -392,7 +407,6 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
||||
method = kwargs.get('method', None)
|
||||
|
||||
if mfts.benchmark_only:
|
||||
mfts.append_transformation(partitioner.transformation)
|
||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||
else:
|
||||
pttr = str(partitioner.__module__).split('.')[-1]
|
||||
@ -404,7 +418,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
||||
_key += str(method) if method is not None else ""
|
||||
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, order=mfts.order, **kwargs)
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
@ -456,7 +470,6 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
||||
|
||||
if mfts.benchmark_only:
|
||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||
mfts.append_transformation(partitioner.transformation)
|
||||
else:
|
||||
pttr = str(partitioner.__module__).split('.')[-1]
|
||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||
@ -469,20 +482,15 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
||||
if mfts.has_seasonality:
|
||||
mfts.indexer = indexer
|
||||
|
||||
try:
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, order=mfts.order)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
_crps1, _t1 = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
|
||||
_t1 += times
|
||||
except Exception as e:
|
||||
print(e)
|
||||
_crps1 = np.nan
|
||||
_t1 = np.nan
|
||||
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
|
||||
_t1 += times
|
||||
|
||||
ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'window': window_key,
|
||||
ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'brier': _brier, 'window': window_key,
|
||||
'steps': steps_ahead, 'method': method}
|
||||
|
||||
return ret
|
||||
@ -541,11 +549,14 @@ def process_probabilistic_jobs(dataset, tag, job, conn):
|
||||
data = bUtil.process_common_data(dataset, tag, 'density', job)
|
||||
|
||||
crps = deepcopy(data)
|
||||
crps.extend(["CRPS",job["CRPS"]])
|
||||
crps.extend(["crps",job["CRPS"]])
|
||||
bUtil.insert_benchmark(crps, conn)
|
||||
time = deepcopy(data)
|
||||
time.extend(["time", job["time"]])
|
||||
bUtil.insert_benchmark(time, conn)
|
||||
brier = deepcopy(data)
|
||||
brier.extend(["brier", job["brier"]])
|
||||
bUtil.insert_benchmark(brier, conn)
|
||||
|
||||
|
||||
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
||||
|
@ -6,6 +6,7 @@ from statsmodels.tsa.tsatools import lagmat
|
||||
from pyFTS.common import fts
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
|
||||
|
||||
class KNearestNeighbors(fts.FTS):
|
||||
"""
|
||||
K-Nearest Neighbors
|
||||
@ -13,6 +14,7 @@ class KNearestNeighbors(fts.FTS):
|
||||
def __init__(self, name, **kwargs):
|
||||
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
|
||||
self.name = "kNN"
|
||||
self.shortname = "kNN"
|
||||
self.detail = "K-Nearest Neighbors"
|
||||
self.is_high_order = True
|
||||
self.has_point_forecasting = True
|
||||
|
@ -8,6 +8,7 @@ from statsmodels.tsa.tsatools import lagmat
|
||||
from pyFTS.common import SortedCollection, fts
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
|
||||
|
||||
class QuantileRegression(fts.FTS):
|
||||
"""Façade for statsmodels.regression.quantile_regression"""
|
||||
def __init__(self, name, **kwargs):
|
||||
@ -26,10 +27,11 @@ class QuantileRegression(fts.FTS):
|
||||
self.mean_qt = None
|
||||
self.lower_qt = None
|
||||
self.dist_qt = None
|
||||
self.order = kwargs.get('order', 1)
|
||||
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('order', None) is not None:
|
||||
if 'order' in kwargs:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
|
@ -246,9 +246,10 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
|
||||
class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
def __init__(self, **kwargs):
|
||||
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs)
|
||||
def __init__(self, name, **kwargs):
|
||||
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS"+name, **kwargs)
|
||||
self.min_order = 3
|
||||
self.shortname ="Ensemble FTS"
|
||||
|
||||
def set_transformations(self, model):
|
||||
for t in self.transformations:
|
||||
|
@ -20,15 +20,17 @@ partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformati
|
||||
'''
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
|
||||
|
||||
'''
|
||||
|
||||
from pyFTS.models import pwfts, song, ifts
|
||||
from pyFTS.models.ensemble import ensemble
|
||||
|
||||
model = ensemble.AllMethodEnsembleFTS(partitioner=partitioner)
|
||||
'''
|
||||
model = knn.KNearestNeighbors("")
|
||||
model.fit(dataset[:800])
|
||||
tmp = model.predict(dataset[800:1000], type='distribution')
|
||||
for tmp2 in tmp:
|
||||
print(tmp2)
|
||||
Measures.get_distribution_statistics(dataset[800:1000], model)
|
||||
#tmp = model.predict(dataset[800:1000], type='distribution')
|
||||
#for tmp2 in tmp:
|
||||
# print(tmp2)
|
||||
'''
|
||||
|
||||
|
||||
@ -49,11 +51,12 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
|
||||
|
||||
from pyFTS.benchmarks import arima, naive, quantreg
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
#methods=[ifts.IntervalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
|
||||
#methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=[],
|
||||
benchmark_methods=[arima.ARIMA for k in range(4)]
|
||||
+ [quantreg.QuantileRegression for k in range(2)],
|
||||
+ [quantreg.QuantileRegression for k in range(2)]
|
||||
+ [knn.KNearestNeighbors],
|
||||
benchmark_methods_parameters=[
|
||||
{'order': (1, 0, 0)},
|
||||
{'order': (1, 0, 1)},
|
||||
@ -61,14 +64,15 @@ bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
{'order': (2, 0, 2)},
|
||||
{'order': 1, 'dist': True},
|
||||
{'order': 2, 'dist': True},
|
||||
{}
|
||||
],
|
||||
#transformations=[None, tdiff],
|
||||
orders=[1, 2, 3],
|
||||
#transformations=[tdiff],
|
||||
orders=[1],
|
||||
partitions=np.arange(30, 80, 5),
|
||||
progress=False, type='distribution',
|
||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.db", dataset="TAIEX", tag="comparisons")
|
||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
|
||||
|
||||
|
||||
#'''
|
||||
|
Loading…
Reference in New Issue
Block a user