Bugfixes in Measures and optimizations on benchmarks

This commit is contained in:
Petrônio Cândido 2018-04-25 18:47:51 -03:00
parent a446cec232
commit 95c6e90bfb
6 changed files with 108 additions and 94 deletions

View File

@ -239,6 +239,9 @@ def pmf_to_cdf(density):
return df return df
def heavyside(bin, target):
return 1 if bin >= target else 0
def heavyside_cdf(bins, targets): def heavyside_cdf(bins, targets):
ret = [] ret = []
for t in targets: for t in targets:
@ -255,24 +258,13 @@ def crps(targets, densities):
:return: float :return: float
''' '''
_crps = float(0.0) _crps = float(0.0)
if isinstance(densities, pd.DataFrame): if isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
l = len(densities.columns) densities = [densities]
n = len(densities.index)
Ff = pmf_to_cdf(densities)
Fa = heavyside_cdf(densities.columns, targets)
for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
elif isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
l = len(densities.bins)
n = 1
Fa = heavyside_cdf(densities.bins, targets)
_crps = sum([(densities.cummulative(val) - Fa[val][0]) ** 2 for val in densities.bins])
elif isinstance(densities, list):
l = len(densities[0].bins) l = len(densities[0].bins)
n = len(densities) n = len(densities)
Fa = heavyside_cdf(densities[0].bins, targets) for ct, df in enumerate(densities):
for df in densities: _crps += sum([(df.cummulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
_crps += sum([(df.cummulative(val) - Fa[val][0]) ** 2 for val in df.bins])
return _crps / float(l * n) return _crps / float(l * n)
@ -387,8 +379,9 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time() _s1 = time.time()
forecasts = model.predict(data, **kwargs) forecasts = model.predict(data, **kwargs)
_e1 = time.time() _e1 = time.time()
ret.append(round(crps(data[model.order:], forecasts), 3)) ret.append(round(crps(data[model.order:], forecasts[:-1]), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[model.order:], forecasts[:-1]), 3))
else: else:
skip = kwargs.get('steps_ahead_sampler', 1) skip = kwargs.get('steps_ahead_sampler', 1)
forecasts = [] forecasts = []
@ -402,6 +395,7 @@ def get_distribution_statistics(data, model, **kwargs):
start = model.order + steps_ahead start = model.order + steps_ahead
ret.append(round(crps(data[start:-1:skip], forecasts), 3)) ret.append(round(crps(data[start:-1:skip], forecasts), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
return ret return ret

View File

@ -48,6 +48,38 @@ def __pop(key, default, kwargs):
return default return default
def get_benchmark_point_methods():
"""Return all non FTS methods for point forecasting"""
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
def get_point_methods():
"""Return all FTS methods for point forecasting"""
return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
def get_benchmark_interval_methods():
"""Return all non FTS methods for point_to_interval forecasting"""
return [ arima.ARIMA, quantreg.QuantileRegression]
def get_interval_methods():
"""Return all FTS methods for point_to_interval forecasting"""
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
def get_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def get_benchmark_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
""" """
Sliding window benchmarks for FTS forecasters. Sliding window benchmarks for FTS forecasters.
@ -141,6 +173,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
benchmark_methods = __pop("benchmark_methods", None, kwargs) benchmark_methods = __pop("benchmark_methods", None, kwargs)
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs) benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
benchmark_pool = [] if benchmark_models is None else benchmark_models
if benchmark_models != False: if benchmark_models != False:
if benchmark_models is None and benchmark_methods is None: if benchmark_models is None and benchmark_methods is None:
@ -151,13 +185,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
elif type == 'distribution': elif type == 'distribution':
benchmark_methods = get_benchmark_probabilistic_methods() benchmark_methods = get_benchmark_probabilistic_methods()
if isinstance(benchmark_models, list) : if benchmark_methods is not None:
pool.extend(benchmark_models) for transformation in transformations:
elif benchmark_methods is not None:
for count, model in enumerate(benchmark_methods, start=0): for count, model in enumerate(benchmark_methods, start=0):
par = benchmark_methods_parameters[count] par = benchmark_methods_parameters[count]
mfts = model("", **par) mfts = model("", **par)
pool.append(mfts) mfts.append_transformation(transformation)
benchmark_pool.append(mfts)
if type == 'point': if type == 'point':
experiment_method = run_point experiment_method = run_point
@ -184,6 +218,10 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
inc = __pop("inc", 0.1, kwargs) inc = __pop("inc", 0.1, kwargs)
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs): for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
experiments += 1 experiments += 1
@ -192,6 +230,18 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
partitioners_pool = [] partitioners_pool = []
for model in benchmark_pool:
for step in steps_ahead:
kwargs['steps_ahead'] = step
if not distributed:
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
synthesis_method(dataset, tag, job, conn)
else:
job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
jobs.append(job)
if partitioners_models is None: if partitioners_models is None:
for transformation in transformations: for transformation in transformations:
@ -210,10 +260,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if progress: if progress:
rng1 = tqdm(steps_ahead, desc="Steps") rng1 = tqdm(steps_ahead, desc="Steps")
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
for step in rng1: for step in rng1:
rng2 = partitioners_pool rng2 = partitioners_pool
@ -267,36 +313,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
conn.close() conn.close()
def get_benchmark_point_methods():
"""Return all non FTS methods for point forecasting"""
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
def get_point_methods():
"""Return all FTS methods for point forecasting"""
return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
def get_benchmark_interval_methods():
"""Return all non FTS methods for point_to_interval forecasting"""
return [ arima.ARIMA, quantreg.QuantileRegression]
def get_interval_methods():
"""Return all FTS methods for point_to_interval forecasting"""
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
def get_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def get_benchmark_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
@ -336,7 +352,6 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
if mfts.benchmark_only: if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
mfts.append_transformation(partitioner.transformation)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
@ -347,7 +362,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_key += str(method) if method is not None else "" _key += str(method) if method is not None else ""
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs) mfts.fit(train_data, **kwargs)
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
@ -392,7 +407,6 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
method = kwargs.get('method', None) method = kwargs.get('method', None)
if mfts.benchmark_only: if mfts.benchmark_only:
mfts.append_transformation(partitioner.transformation)
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha) _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
@ -404,7 +418,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
_key += str(method) if method is not None else "" _key += str(method) if method is not None else ""
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs) mfts.fit(train_data, **kwargs)
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
@ -456,7 +470,6 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
if mfts.benchmark_only: if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha) _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
mfts.append_transformation(partitioner.transformation)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
@ -469,20 +482,15 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
if mfts.has_seasonality: if mfts.has_seasonality:
mfts.indexer = indexer mfts.indexer = indexer
try:
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order) mfts.fit(train_data, **kwargs)
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
_crps1, _t1 = Measures.get_distribution_statistics(test_data, mfts, **kwargs) _crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
_t1 += times _t1 += times
except Exception as e:
print(e)
_crps1 = np.nan
_t1 = np.nan
ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'window': window_key, ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'brier': _brier, 'window': window_key,
'steps': steps_ahead, 'method': method} 'steps': steps_ahead, 'method': method}
return ret return ret
@ -541,11 +549,14 @@ def process_probabilistic_jobs(dataset, tag, job, conn):
data = bUtil.process_common_data(dataset, tag, 'density', job) data = bUtil.process_common_data(dataset, tag, 'density', job)
crps = deepcopy(data) crps = deepcopy(data)
crps.extend(["CRPS",job["CRPS"]]) crps.extend(["crps",job["CRPS"]])
bUtil.insert_benchmark(crps, conn) bUtil.insert_benchmark(crps, conn)
time = deepcopy(data) time = deepcopy(data)
time.extend(["time", job["time"]]) time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn) bUtil.insert_benchmark(time, conn)
brier = deepcopy(data)
brier.extend(["brier", job["brier"]])
bUtil.insert_benchmark(brier, conn)
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):

View File

@ -6,6 +6,7 @@ from statsmodels.tsa.tsatools import lagmat
from pyFTS.common import fts from pyFTS.common import fts
from pyFTS.probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
class KNearestNeighbors(fts.FTS): class KNearestNeighbors(fts.FTS):
""" """
K-Nearest Neighbors K-Nearest Neighbors
@ -13,6 +14,7 @@ class KNearestNeighbors(fts.FTS):
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(KNearestNeighbors, self).__init__(1, "kNN"+name) super(KNearestNeighbors, self).__init__(1, "kNN"+name)
self.name = "kNN" self.name = "kNN"
self.shortname = "kNN"
self.detail = "K-Nearest Neighbors" self.detail = "K-Nearest Neighbors"
self.is_high_order = True self.is_high_order = True
self.has_point_forecasting = True self.has_point_forecasting = True

View File

@ -8,6 +8,7 @@ from statsmodels.tsa.tsatools import lagmat
from pyFTS.common import SortedCollection, fts from pyFTS.common import SortedCollection, fts
from pyFTS.probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
class QuantileRegression(fts.FTS): class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression""" """Façade for statsmodels.regression.quantile_regression"""
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
@ -26,10 +27,11 @@ class QuantileRegression(fts.FTS):
self.mean_qt = None self.mean_qt = None
self.lower_qt = None self.lower_qt = None
self.dist_qt = None self.dist_qt = None
self.order = kwargs.get('order', 1)
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")" self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('order', None) is not None: if 'order' in kwargs:
self.order = kwargs.get('order', 1) self.order = kwargs.get('order', 1)
if self.indexer is not None and isinstance(data, pd.DataFrame): if self.indexer is not None and isinstance(data, pd.DataFrame):

View File

@ -246,9 +246,10 @@ class EnsembleFTS(fts.FTS):
class AllMethodEnsembleFTS(EnsembleFTS): class AllMethodEnsembleFTS(EnsembleFTS):
def __init__(self, **kwargs): def __init__(self, name, **kwargs):
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs) super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS"+name, **kwargs)
self.min_order = 3 self.min_order = 3
self.shortname ="Ensemble FTS"
def set_transformations(self, model): def set_transformations(self, model):
for t in self.transformations: for t in self.transformations:

View File

@ -20,15 +20,17 @@ partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformati
''' '''
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
'''
from pyFTS.models import pwfts, song, ifts from pyFTS.models import pwfts, song, ifts
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
model = ensemble.AllMethodEnsembleFTS(partitioner=partitioner) '''
model = knn.KNearestNeighbors("")
model.fit(dataset[:800]) model.fit(dataset[:800])
tmp = model.predict(dataset[800:1000], type='distribution') Measures.get_distribution_statistics(dataset[800:1000], model)
for tmp2 in tmp: #tmp = model.predict(dataset[800:1000], type='distribution')
print(tmp2) #for tmp2 in tmp:
# print(tmp2)
''' '''
@ -49,11 +51,12 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
from pyFTS.benchmarks import arima, naive, quantreg from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
#methods=[ifts.IntervalFTS], #[pwfts.ProbabilisticWeightedFTS], #methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, benchmark_models=[],
benchmark_methods=[arima.ARIMA for k in range(4)] benchmark_methods=[arima.ARIMA for k in range(4)]
+ [quantreg.QuantileRegression for k in range(2)], + [quantreg.QuantileRegression for k in range(2)]
+ [knn.KNearestNeighbors],
benchmark_methods_parameters=[ benchmark_methods_parameters=[
{'order': (1, 0, 0)}, {'order': (1, 0, 0)},
{'order': (1, 0, 1)}, {'order': (1, 0, 1)},
@ -61,14 +64,15 @@ bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
{'order': (2, 0, 2)}, {'order': (2, 0, 2)},
{'order': 1, 'dist': True}, {'order': 1, 'dist': True},
{'order': 2, 'dist': True}, {'order': 2, 'dist': True},
{}
], ],
#transformations=[None, tdiff], #transformations=[tdiff],
orders=[1, 2, 3], orders=[1],
partitions=np.arange(30, 80, 5), partitions=np.arange(30, 80, 5),
progress=False, type='distribution', progress=False, type='distribution',
#steps_ahead=[1,4,7,10], #steps_ahead=[1] #steps_ahead=[1,4,7,10], #steps_ahead=[1]
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'], #distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.db", dataset="TAIEX", tag="comparisons") file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
#''' #'''