Improvements on benchmarks
This commit is contained in:
parent
ecfc9db862
commit
654ddec218
@ -297,16 +297,17 @@ def get_point_statistics(data, model, **kwargs):
|
|||||||
ret.append(np.round(smape(ndata, nforecasts), 2))
|
ret.append(np.round(smape(ndata, nforecasts), 2))
|
||||||
ret.append(np.round(UStatistic(ndata, nforecasts), 2))
|
ret.append(np.round(UStatistic(ndata, nforecasts), 2))
|
||||||
else:
|
else:
|
||||||
|
steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1)
|
||||||
nforecasts = []
|
nforecasts = []
|
||||||
for k in np.arange(model.order, len(ndata)-steps_ahead):
|
for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler):
|
||||||
sample = ndata[k - model.order: k]
|
sample = ndata[k - model.order: k]
|
||||||
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
|
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
|
||||||
nforecasts.append(tmp[-1])
|
nforecasts.append(tmp[-1])
|
||||||
|
|
||||||
start = model.order + steps_ahead
|
start = model.order + steps_ahead
|
||||||
ret.append(np.round(rmse(ndata[start:], nforecasts), 2))
|
ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
ret.append(np.round(smape(ndata[start:], nforecasts), 2))
|
ret.append(np.round(smape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
ret.append(np.round(UStatistic(ndata[start:], nforecasts), 2))
|
ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -371,16 +372,17 @@ def get_distribution_statistics(data, model, **kwargs):
|
|||||||
ret.append(round(crps(data, forecasts), 3))
|
ret.append(round(crps(data, forecasts), 3))
|
||||||
ret.append(round(_e1 - _s1, 3))
|
ret.append(round(_e1 - _s1, 3))
|
||||||
else:
|
else:
|
||||||
|
skip = kwargs.get('steps_ahead_sampler', 1)
|
||||||
forecasts = []
|
forecasts = []
|
||||||
_s1 = time.time()
|
_s1 = time.time()
|
||||||
for k in np.arange(model.order, len(data) - steps_ahead):
|
for k in np.arange(model.order, len(data) - steps_ahead, skip):
|
||||||
sample = data[k - model.order: k]
|
sample = data[k - model.order: k]
|
||||||
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
|
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
|
||||||
forecasts.append(tmp[-1])
|
forecasts.append(tmp[-1])
|
||||||
_e1 = time.time()
|
_e1 = time.time()
|
||||||
|
|
||||||
start = model.order + steps_ahead
|
start = model.order + steps_ahead
|
||||||
ret.append(round(crps(data[start:], forecasts), 3))
|
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
|
||||||
ret.append(round(_e1 - _s1, 3))
|
ret.append(round(_e1 - _s1, 3))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -45,10 +45,47 @@ def find_best(dataframe, criteria, ascending):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def analytic_tabular_dataframe(dataframe):
|
||||||
|
experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1
|
||||||
|
models = dataframe.Model.unique()
|
||||||
|
orders = dataframe.Order.unique()
|
||||||
|
schemes = dataframe.Scheme.unique()
|
||||||
|
partitions = dataframe.Partitions.unique()
|
||||||
|
steps = dataframe.Steps.unique()
|
||||||
|
measures = dataframe.Measure.unique()
|
||||||
|
data_columns = analytical_data_columns(experiments)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for m in models:
|
||||||
|
for o in orders:
|
||||||
|
for s in schemes:
|
||||||
|
for p in partitions:
|
||||||
|
for st in steps:
|
||||||
|
for ms in measures:
|
||||||
|
df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)
|
||||||
|
& (dataframe.Scheme == s) & (dataframe.Partitions == p)
|
||||||
|
& (dataframe.Steps == st) & (dataframe.Measure == ms) ]
|
||||||
|
|
||||||
|
if not df.empty:
|
||||||
|
for col in data_columns:
|
||||||
|
mod = [m, o, s, p, st, ms, df[col].values[0]]
|
||||||
|
ret.append(mod)
|
||||||
|
|
||||||
|
dat = pd.DataFrame(ret, columns=tabular_dataframe_columns())
|
||||||
|
return dat
|
||||||
|
|
||||||
|
|
||||||
|
def tabular_dataframe_columns():
|
||||||
|
return ["Model", "Order", "Scheme", "Partitions", "Steps", "Measure", "Value"]
|
||||||
|
|
||||||
|
|
||||||
|
def base_dataframe_columns():
|
||||||
|
return ["Model", "Order", "Scheme", "Partitions", "Size", "Steps", "Method"]
|
||||||
|
|
||||||
def point_dataframe_synthetic_columns():
|
def point_dataframe_synthetic_columns():
|
||||||
return ["Model", "Order", "Scheme", "Partitions", "Size", "Steps", "Method", "RMSEAVG", "RMSESTD",
|
return base_dataframe_columns().extend(["RMSEAVG", "RMSESTD",
|
||||||
"SMAPEAVG", "SMAPESTD", "UAVG","USTD", "TIMEAVG", "TIMESTD"]
|
"SMAPEAVG", "SMAPESTD", "UAVG","USTD", "TIMEAVG", "TIMESTD"])
|
||||||
|
|
||||||
|
|
||||||
def point_dataframe_analytic_columns(experiments):
|
def point_dataframe_analytic_columns(experiments):
|
||||||
|
@ -50,30 +50,43 @@ def __pop(key, default, kwargs):
|
|||||||
|
|
||||||
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||||
"""
|
"""
|
||||||
Sliding window benchmarks for FTS point forecasters
|
Sliding window benchmarks for FTS forecasters.
|
||||||
|
|
||||||
|
For each data window, a train and test datasets will be splitted. For each train split, number of
|
||||||
|
partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
|
||||||
|
steps ahead and FTS method a foreasting model will be trained.
|
||||||
|
|
||||||
|
Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for
|
||||||
|
posterior analysis.
|
||||||
|
|
||||||
|
The number of experiments is determined by the windowsize and inc.
|
||||||
|
|
||||||
:param data: test data
|
:param data: test data
|
||||||
:param windowsize: size of sliding window
|
:param windowsize: size of sliding window
|
||||||
:param train: percentual of sliding window data used to train the models
|
:param train: percentual of sliding window data used to train the models
|
||||||
:param kwargs: dict, optional arguments
|
:param kwargs: dict, optional arguments
|
||||||
|
|
||||||
:keyword
|
:keyword
|
||||||
models: FTS point forecasters
|
inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
|
||||||
partitioners: Universe of Discourse partitioner
|
models: a list with prebuilt FTS objects. The default is None.
|
||||||
partitions: the max number of partitions on the Universe of Discourse
|
methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
|
||||||
max_order: the max order of the models (for high order models)
|
partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
|
||||||
type: the forecasting type, one of these values: point(default), interval or distribution.
|
partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
|
||||||
steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
|
partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
|
||||||
start: in the multi step forecasting, the index of the data where to start forecasting
|
orders: a list with orders of the models (for high order models). The default is [1,2,3].
|
||||||
transformation: data transformation
|
type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point.
|
||||||
indexer: seasonal indexer
|
steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
|
||||||
progress: If true a progress bar will be displayed during the benchmarks
|
start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
|
||||||
distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
|
transformation: data transformation . The default is None.
|
||||||
nodes: a list with the dispy cluster nodes addresses
|
indexer: seasonal indexer. . The default is None.
|
||||||
benchmark_methods: Non FTS models to benchmark
|
progress: If true a progress bar will be displayed during the benchmarks. The default is False.
|
||||||
benchmark_methods_parameters: Non FTS models parameters
|
distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
|
||||||
save: save results
|
nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
|
||||||
file: file path to save the results
|
benchmark_methods: a list with Non FTS models to benchmark. The default is None.
|
||||||
sintetic: if true only the average and standard deviation of the results
|
benchmark_methods_parameters: a list with Non FTS models parameters. . The default is None.
|
||||||
|
save: save results. The default is False.
|
||||||
|
file: file path to save the results. The default is None.
|
||||||
|
sintetic: if true only the average and standard deviation of the results. The de fault is False.
|
||||||
|
|
||||||
:return: DataFrame with the benchmark results
|
:return: DataFrame with the benchmark results
|
||||||
"""
|
"""
|
||||||
@ -235,7 +248,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
if job.status == dispy.DispyJob.Finished and job is not None:
|
if job.status == dispy.DispyJob.Finished and job is not None:
|
||||||
tmp = job()
|
tmp = job()
|
||||||
jobs2.append(tmp)
|
jobs2.append(tmp)
|
||||||
print(tmp)
|
|
||||||
else:
|
else:
|
||||||
print("status",job.status)
|
print("status",job.status)
|
||||||
print("result",job.result)
|
print("result",job.result)
|
||||||
@ -249,8 +261,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
file = kwargs.get('file', None)
|
file = kwargs.get('file', None)
|
||||||
sintetic = kwargs.get('sintetic', False)
|
sintetic = kwargs.get('sintetic', False)
|
||||||
|
|
||||||
print(jobs)
|
|
||||||
|
|
||||||
return synthesis_method(jobs, experiments, save, file, sintetic)
|
return synthesis_method(jobs, experiments, save, file, sintetic)
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,17 +15,22 @@ from pyFTS.data import TAIEX
|
|||||||
|
|
||||||
dataset = TAIEX.get_data()
|
dataset = TAIEX.get_data()
|
||||||
|
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk
|
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
|
||||||
|
|
||||||
from pyFTS.models import pwfts
|
from pyFTS.models import pwfts
|
||||||
|
|
||||||
#'''
|
'''
|
||||||
bchmk.sliding_window_benchmarks(dataset[:2000], 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
|
||||||
benchmark_models=False, orders=[1,2,3], partitions=[30,50,70], #np.arange(10,100,2),
|
benchmark_models=False, orders=[1,2,3], partitions=np.arange(10,100,5),
|
||||||
progress=False, type='distribution', steps_ahead=[1,4,7,10],
|
progress=False, type='point',
|
||||||
|
#steps_ahead=[1,4,7,10], steps_ahead_sampler=10,
|
||||||
distributed=True, nodes=['192.168.0.102','192.168.0.106','192.168.0.110'],
|
distributed=True, nodes=['192.168.0.102','192.168.0.106','192.168.0.110'],
|
||||||
save=True, file="pwfts_taiex_distribution.csv")
|
save=True, file="pwfts_taiex_partitioning.csv")
|
||||||
#'''
|
'''
|
||||||
|
|
||||||
|
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||||
|
print(bUtil.analytic_tabular_dataframe(dat))
|
||||||
|
#print(dat["Size"].values[0])
|
||||||
|
|
||||||
'''
|
'''
|
||||||
train_split = 2000
|
train_split = 2000
|
||||||
|
Loading…
Reference in New Issue
Block a user