Improvements on benchmarks

This commit is contained in:
Petrônio Cândido 2018-04-13 15:23:54 -03:00
parent ff23d874bc
commit 3db6e69119
10 changed files with 268 additions and 210 deletions

BIN
img/logo_medium.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
img/minds_logo.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

BIN
img/minds_logo_medium.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -271,6 +271,8 @@ def crps(targets, densities):
def get_point_statistics(data, model, **kwargs): def get_point_statistics(data, model, **kwargs):
"""Condensate all measures for point forecasters""" """Condensate all measures for point forecasters"""
steps_ahead = kwargs.get('steps_ahead',1)
indexer = kwargs.get('indexer', None) indexer = kwargs.get('indexer', None)
if indexer is not None: if indexer is not None:
@ -278,46 +280,89 @@ def get_point_statistics(data, model, **kwargs):
else: else:
ndata = np.array(data[model.order:]) ndata = np.array(data[model.order:])
forecasts = model.predict(data, **kwargs) ret = list()
try: if steps_ahead == 1:
forecasts = model.forecast(data, **kwargs)
if model.has_seasonality: if model.has_seasonality:
nforecasts = np.array(forecasts) nforecasts = np.array(forecasts)
else: else:
nforecasts = np.array(forecasts[:-1]) nforecasts = np.array(forecasts[:-1])
except Exception as ex: ret.append(np.round(rmse(ndata, nforecasts), 2))
print(ex) ret.append(np.round(smape(ndata, nforecasts), 2))
return [np.nan,np.nan,np.nan] ret.append(np.round(UStatistic(ndata, nforecasts), 2))
ret = list() else:
nforecasts = []
for k in np.arange(model.order, len(ndata)-steps_ahead):
sample = ndata[k - model.order: k]
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
nforecasts.append(tmp[-1])
ret.append(np.round(rmse(ndata, nforecasts), 2)) start = model.order + steps_ahead
ret.append(np.round(smape(ndata, nforecasts), 2)) ret.append(np.round(rmse(ndata[start:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata, nforecasts), 2)) ret.append(np.round(smape(ndata[start:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[start:], nforecasts), 2))
return ret return ret
def get_interval_statistics(original, model, **kwargs): def get_interval_statistics(data, model, **kwargs):
"""Condensate all measures for point_to_interval forecasters""" """Condensate all measures for point_to_interval forecasters"""
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list() ret = list()
forecasts = model.predict(original, **kwargs)
ret.append(round(sharpness(forecasts), 2)) if steps_ahead == 1:
ret.append(round(resolution(forecasts), 2)) forecasts = model.forecast_interval(data, **kwargs)
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(pinball_mean(0.05, original[model.order:], forecasts[:-1]), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(pinball_mean(0.25, original[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
else:
forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
start = model.order + steps_ahead
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
return ret return ret
def get_distribution_statistics(original, model, **kwargs): def get_distribution_statistics(data, model, **kwargs):
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list() ret = list()
_s1 = time.time()
densities1 = model.predict(original, **kwargs) if steps_ahead == 1:
_e1 = time.time() _s1 = time.time()
ret.append(round(crps(original, densities1), 3)) forecasts = model.forecast_distribution(data, **kwargs)
ret.append(round(_e1 - _s1, 3)) _e1 = time.time()
ret.append(round(crps(data, forecasts), 3))
ret.append(round(_e1 - _s1, 3))
else:
forecasts = []
_s1 = time.time()
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
_e1 = time.time()
start = model.order + steps_ahead
ret.append(round(crps(data[start:], forecasts), 3))
ret.append(round(_e1 - _s1, 3))
return ret return ret

View File

@ -15,7 +15,7 @@ from copy import deepcopy
from pyFTS.common import Util from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns): def extract_measure(dataframe, measure, data_columns):
if not dataframe.empty: if not dataframe.empty:
df = dataframe[(dataframe.Measure == measure)][data_columns] df = dataframe[(dataframe.Measure == measure)][data_columns]
tmp = df.to_dict(orient="records")[0] tmp = df.to_dict(orient="records")[0]
@ -92,12 +92,12 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(len(mfts)) mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps[k])
mod.append(method[k])
mod.append(np.round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(np.round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(np.round(np.nanmean(smape[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2))
@ -126,17 +126,18 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
print([n, o, s, p, l, steps, method]) st = steps[k]
tmp = [n, o, s, p, l, steps, method, 'RMSE'] mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'RMSE']
tmp.extend(rmse[k]) tmp.extend(rmse[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'SMAPE'] tmp = [n, o, s, p, l, st, mt, 'SMAPE']
tmp.extend(smape[k]) tmp.extend(smape[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'U'] tmp = [n, o, s, p, l, st, mt, 'U']
tmp.extend(u[k]) tmp.extend(u[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -154,13 +155,30 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
print(ret) print(ret)
def cast_dataframe_to_synthetic_point(infile, outfile, experiments): def cast_dataframe_to_synthetic(infile, outfile, experiments, type):
columns = point_dataframe_analytic_columns(experiments) if type == 'point':
analytic_columns = point_dataframe_analytic_columns
synthetic_columns = point_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_point
elif type == 'interval':
analytic_columns = interval_dataframe_analytic_columns
synthetic_columns = interval_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_interval
elif type == 'distribution':
analytic_columns = probabilistic_dataframe_analytic_columns
synthetic_columns = probabilistic_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_probabilistic
else:
raise ValueError("Type parameter has an unknown value!")
columns = analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns) dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique() models = dat.Model.unique()
orders = dat.Order.unique() orders = dat.Order.unique()
schemes = dat.Scheme.unique() schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique() partitions = dat.Partitions.unique()
steps = dat.Steps.unique()
methods = dat.Method.unique()
data_columns = analytical_data_columns(experiments) data_columns = analytical_data_columns(experiments)
@ -170,39 +188,48 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
for o in orders: for o in orders:
for s in schemes: for s in schemes:
for p in partitions: for p in partitions:
mod = [] for st in steps:
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] for mt in methods:
if not df.empty: df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) &
rmse = extract_measure(df, 'RMSE', data_columns) (dat.Partitions == p) & (dat.Steps == st) & (dat.Method == mt)]
smape = extract_measure(df, 'SMAPE', data_columns) if not df.empty:
u = extract_measure(df, 'U', data_columns) mod = synthetize_measures(df, data_columns)
times = extract_measure(df, 'TIME', data_columns) mod.insert(0, m)
mod.append(m) mod.insert(1, o)
mod.append(o) mod.insert(2, s)
mod.append(s) mod.insert(3, p)
mod.append(p) mod.insert(4, df.iat[0,5])
mod.append(extract_measure(df, 'RMSE', ['Size'])[0]) mod.insert(5, st)
mod.append(np.round(np.nanmean(rmse), 2)) mod.insert(6, mt)
mod.append(np.round(np.nanstd(rmse), 2)) ret.append(mod)
mod.append(np.round(np.nanmean(smape), 2))
mod.append(np.round(np.nanstd(smape), 2))
mod.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns()) dat = pd.DataFrame(ret, columns=synthetic_columns())
dat.to_csv(outfile, sep=";", index=False) dat.to_csv(outfile, sep=";", index=False)
def cast_dataframe_to_synthetic_point(df, data_columns):
ret = []
rmse = extract_measure(df, 'RMSE', data_columns)
smape = extract_measure(df, 'SMAPE', data_columns)
u = extract_measure(df, 'U', data_columns)
times = extract_measure(df, 'TIME', data_columns)
ret.append(np.round(np.nanmean(rmse), 2))
ret.append(np.round(np.nanstd(rmse), 2))
ret.append(np.round(np.nanmean(smape), 2))
ret.append(np.round(np.nanstd(smape), 2))
ret.append(np.round(np.nanmean(u), 2))
ret.append(np.round(np.nanstd(u), 2))
ret.append(np.round(np.nanmean(times), 4))
ret.append(np.round(np.nanstd(times), 4))
return ret
def analytical_data_columns(experiments): def analytical_data_columns(experiments):
data_columns = [str(k) for k in np.arange(0, experiments)] data_columns = [str(k) for k in np.arange(0, experiments)]
return data_columns return data_columns
def scale_params(data): def scale_params(data):
vmin = np.nanmin(data) vmin = np.nanmin(data)
vlen = np.nanmax(data) - vmin vlen = np.nanmax(data) - vmin
@ -215,12 +242,10 @@ def scale(data, params):
return ndata return ndata
def stats(measure, data): def stats(measure, data):
print(measure, np.nanmean(data), np.nanstd(data)) print(measure, np.nanmean(data), np.nanstd(data))
def unified_scaled_point(experiments, tam, save=False, file=None, def unified_scaled_point(experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -330,7 +355,6 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None, def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -419,14 +443,12 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(l) mod.append(l)
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps) mod.append(steps[k])
mod.append(method) mod.append(method[k])
mod.append(round(np.nanmean(sharpness[k]), 2)) mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2)) mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2)) mod.append(round(np.nanmean(resolution[k]), 2))
@ -461,29 +483,30 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
st = steps[k]
tmp = [n, o, s, p, l, steps, method, 'Sharpness'] mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'Sharpness']
tmp.extend(sharpness[k]) tmp.extend(sharpness[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Resolution'] tmp = [n, o, s, p, l, st, mt, 'Resolution']
tmp.extend(resolution[k]) tmp.extend(resolution[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Coverage'] tmp = [n, o, s, p, l, st, mt, 'Coverage']
tmp.extend(coverage[k]) tmp.extend(coverage[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q05'] tmp = [n, o, s, p, l, st, mt, 'Q05']
tmp.extend(q05[k]) tmp.extend(q05[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q25'] tmp = [n, o, s, p, l, st, mt, 'Q25']
tmp.extend(q25[k]) tmp.extend(q25[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q75'] tmp = [n, o, s, p, l, st, mt, 'Q75']
tmp.extend(q75[k]) tmp.extend(q75[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q95'] tmp = [n, o, s, p, l, st, mt, 'Q95']
tmp.extend(q95[k]) tmp.extend(q95[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -515,57 +538,34 @@ def interval_dataframe_synthetic_columns():
return columns return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments): def cast_dataframe_to_synthetic_interval(df, data_columns):
columns = interval_dataframe_analytic_columns(experiments) sharpness = extract_measure(df, 'Sharpness', data_columns)
dat = pd.read_csv(infile, sep=";", usecols=columns) resolution = extract_measure(df, 'Resolution', data_columns)
models = dat.Model.unique() coverage = extract_measure(df, 'Coverage', data_columns)
orders = dat.Order.unique() times = extract_measure(df, 'TIME', data_columns)
schemes = dat.Scheme.unique() q05 = extract_measure(df, 'Q05', data_columns)
partitions = dat.Partitions.unique() q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns)
data_columns = analytical_data_columns(experiments) q95 = extract_measure(df, 'Q95', data_columns)
ret = [] ret = []
ret.append(np.round(np.nanmean(sharpness), 2))
ret.append(np.round(np.nanstd(sharpness), 2))
ret.append(np.round(np.nanmean(resolution), 2))
ret.append(np.round(np.nanstd(resolution), 2))
ret.append(np.round(np.nanmean(coverage), 2))
ret.append(np.round(np.nanstd(coverage), 2))
ret.append(np.round(np.nanmean(times), 4))
ret.append(np.round(np.nanstd(times), 4))
ret.append(np.round(np.nanmean(q05), 4))
ret.append(np.round(np.nanstd(q05), 4))
ret.append(np.round(np.nanmean(q25), 4))
ret.append(np.round(np.nanstd(q25), 4))
ret.append(np.round(np.nanmean(q75), 4))
ret.append(np.round(np.nanstd(q75), 4))
ret.append(np.round(np.nanmean(q95), 4))
ret.append(np.round(np.nanstd(q95), 4))
return ret
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
sharpness = extract_measure(df, 'Sharpness', data_columns)
resolution = extract_measure(df, 'Resolution', data_columns)
coverage = extract_measure(df, 'Coverage', data_columns)
times = extract_measure(df, 'TIME', data_columns)
q05 = extract_measure(df, 'Q05', data_columns)
q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns)
q95 = extract_measure(df, 'Q95', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(sharpness), 2))
mod.append(np.round(np.nanstd(sharpness), 2))
mod.append(np.round(np.nanmean(resolution), 2))
mod.append(np.round(np.nanstd(resolution), 2))
mod.append(np.round(np.nanmean(coverage), 2))
mod.append(np.round(np.nanstd(coverage), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanmean(q05), 4))
mod.append(np.round(np.nanstd(q05), 4))
mod.append(np.round(np.nanmean(q25), 4))
mod.append(np.round(np.nanstd(q25), 4))
mod.append(np.round(np.nanmean(q75), 4))
mod.append(np.round(np.nanstd(q75), 4))
mod.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanstd(q95), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
@ -905,17 +905,14 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(len(mfts)) mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps) mod.append(steps[k])
mod.append(method) mod.append(method[k])
mod.append(np.round(np.nanmean(crps[k]), 2)) mod.append(np.round(np.nanmean(crps[k]), 2))
mod.append(np.round(np.nanstd(crps[k]), 2)) mod.append(np.round(np.nanstd(crps[k]), 2))
mod.append(l)
mod.append(np.round(np.nanmean(times[k]), 4)) mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanstd(times[k]), 4)) mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod) ret.append(mod)
@ -940,10 +937,12 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
tmp = [n, o, s, p, l, steps, method, 'CRPS'] st = steps[k]
mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'CRPS']
tmp.extend(crps[k]) tmp.extend(crps[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -974,40 +973,15 @@ def probabilistic_dataframe_synthetic_columns():
return columns return columns
def cast_dataframe_to_synthetic_probabilistic(infile, outfile, experiments): def cast_dataframe_to_synthetic_probabilistic(df, data_columns):
columns = probabilistic_dataframe_analytic_columns(experiments) crps1 = extract_measure(df, 'CRPS', data_columns)
dat = pd.read_csv(infile, sep=";", usecols=columns) times1 = extract_measure(df, 'TIME', data_columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = [] ret = []
ret.append(np.round(np.nanmean(crps1), 2))
for m in models: ret.append(np.round(np.nanstd(crps1), 2))
for o in orders: ret.append(np.round(np.nanmean(times1), 2))
for s in schemes: ret.append(np.round(np.nanstd(times1), 2))
for p in partitions: return ret
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
crps1 = extract_measure(df, 'CRPS', data_columns)
times1 = extract_measure(df, 'TIME', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(crps1), 2))
mod.append(np.round(np.nanstd(crps1), 2))
mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2))
ret.append(mod)
dat = pd.DataFrame(ret, columns=probabilistic_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def unified_scaled_probabilistic(experiments, tam, save=False, file=None, def unified_scaled_probabilistic(experiments, tam, save=False, file=None,

View File

@ -81,6 +81,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs) partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
partitions = __pop("partitions", [10], kwargs) partitions = __pop("partitions", [10], kwargs)
steps_ahead = __pop('steps_ahead', [1], kwargs)
methods = __pop('methods', None, kwargs) methods = __pop('methods', None, kwargs)
models = __pop('models', None, kwargs) models = __pop('models', None, kwargs)
@ -178,27 +180,34 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
else: else:
partitioners_pool = partitioners_models partitioners_pool = partitioners_models
rng1 = partitioners_pool rng1 = steps_ahead
if progress: if progress:
rng1 = tqdm(partitioners_pool, desc="Partitioners") rng1 = tqdm(steps_ahead, desc="Steps")
for partitioner in rng1: for step in rng1:
rng2 = partitioners_pool
rng2 = enumerate(pool,start=0)
if progress: if progress:
rng2 = enumerate(tqdm(pool, desc="Models"),start=0) rng2 = tqdm(partitioners_pool, desc="Partitioners")
for _id, model in rng2: for partitioner in rng2:
if not distributed: rng3 = enumerate(pool,start=0)
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
jobs.append(job) if progress:
else: rng3 = enumerate(tqdm(pool, desc="Models"),start=0)
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
job.id = id # associate an ID to identify jobs (if needed later) for _id, model in rng3:
jobs.append(job)
kwargs['steps_ahead'] = step
if not distributed:
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
jobs.append(job)
else:
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
job.id = id # associate an ID to identify jobs (if needed later)
jobs.append(job)
if progress: if progress:
progressbar.close() progressbar.close()
@ -303,6 +312,9 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
@ -363,6 +375,9 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs) mfts.fit(train_data, order=mfts.order, **kwargs)
_end = time.time() _end = time.time()
@ -421,6 +436,9 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
@ -478,8 +496,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape = {} smape = {}
u = {} u = {}
times = {} times = {}
steps = None steps = {}
method = None method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -489,14 +507,16 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape[_key] = [] smape[_key] = []
u[_key] = [] u[_key] = []
times[_key] = [] times[_key] = []
steps[_key] = job['steps'] steps[_key] = []
method[_key] = job['method'] method[_key] = []
steps[_key] = job['steps']
method[_key] = job['method']
rmse[_key].append(job['rmse']) rmse[_key].append(job['rmse'])
smape[_key].append(job['smape']) smape[_key].append(job['smape'])
u[_key].append(job['u']) u[_key].append(job['u'])
times[_key].append(job['time']) times[_key].append(job['time'])
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method)
def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False):
@ -509,6 +529,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q75 = {} q75 = {}
q95 = {} q95 = {}
times = {} times = {}
steps = {}
method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -522,6 +544,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key] = [] q25[_key] = []
q75[_key] = [] q75[_key] = []
q95[_key] = [] q95[_key] = []
steps[_key] = []
method[_key] = []
sharpness[_key].append(job['sharpness']) sharpness[_key].append(job['sharpness'])
resolution[_key].append(job['resolution']) resolution[_key].append(job['resolution'])
@ -531,16 +555,18 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key].append(job['Q25']) q25[_key].append(job['Q25'])
q75[_key].append(job['Q75']) q75[_key].append(job['Q75'])
q95[_key].append(job['Q95']) q95[_key].append(job['Q95'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
times, q05, q25, q75, q95) times, q05, q25, q75, q95, steps, method)
def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
objs = {} objs = {}
crps = {} crps = {}
times = {} times = {}
steps = {}
method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -548,11 +574,15 @@ def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sinteti
objs[_key] = job['obj'] objs[_key] = job['obj']
crps[_key] = [] crps[_key] = []
times[_key] = [] times[_key] = []
steps[_key] = []
method[_key] = []
crps[_key].append(job['CRPS']) crps[_key].append(job['CRPS'])
times[_key].append(job['time']) times[_key].append(job['time'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic) return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method)

View File

@ -89,19 +89,22 @@ class FTS(object):
steps_ahead = kwargs.get("steps_ahead", None) steps_ahead = kwargs.get("steps_ahead", None)
if type == 'point' and (steps_ahead == None or steps_ahead == 1): if steps_ahead == None or steps_ahead == 1:
ret = self.forecast(ndata, **kwargs) if type == 'point':
elif type == 'point' and steps_ahead > 1: ret = self.forecast(ndata, **kwargs)
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs) elif type == 'interval':
elif type == 'interval' and (steps_ahead == None or steps_ahead == 1): ret = self.forecast_interval(ndata, **kwargs)
ret = self.forecast_interval(ndata, **kwargs) elif type == 'distribution':
elif type == 'interval' and steps_ahead > 1: ret = self.forecast_distribution(ndata, **kwargs)
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs) elif steps_ahead > 1:
elif type == 'distribution' and (steps_ahead == None or steps_ahead == 1): if type == 'point':
ret = self.forecast_distribution(ndata, **kwargs) ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
elif type == 'distribution' and steps_ahead > 1: elif type == 'interval':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
else: elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
if not ['point', 'interval', 'distribution'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.') raise ValueError('The argument \'type\' has an unknown value.')
else: else:

View File

@ -372,6 +372,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
flrgs = self.generate_lhs_flrg(sample) flrgs = self.generate_lhs_flrg(sample)
if 'type' in kwargs:
kwargs.pop('type')
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for bin in _bins: for bin in _bins:
@ -409,7 +412,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append(ret[-1]) ret.append(ret[-1])
else: else:
mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs) mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs)
ret.append(mp) ret.append(mp[0])
return ret[self.order:] return ret[self.order:]
@ -427,13 +430,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order) start = kwargs.get('start', self.order)
sample = data[start - (self.order - 1): start + 1] sample = data[start - self.order: start]
ret = [[k, k] for k in sample] ret = [[k, k] for k in sample]
for k in np.arange(self.order, steps+self.order): for k in np.arange(self.order, steps+self.order):
if self.__check_interval_bounds(ret[-1]): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1]) ret.append(ret[-1])
else: else:
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs) lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs)
@ -460,9 +463,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order) start = kwargs.get('start', self.order)
sample = ndata[start - (self.order - 1): start + 1] sample = ndata[start - self.order: start]
for dat in sample: for dat in sample:
if 'type' in kwargs:
kwargs.pop('type')
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0) tmp.set(dat, 1.0)
ret.append(tmp) ret.append(tmp)

View File

@ -11,7 +11,7 @@ class ProbabilityDistribution(object):
If type is histogram, the PDF is discrete If type is histogram, the PDF is discrete
If type is KDE the PDF is continuous If type is KDE the PDF is continuous
""" """
def __init__(self,type = "KDE", **kwargs): def __init__(self, type = "KDE", **kwargs):
self.uod = kwargs.get("uod", None) self.uod = kwargs.get("uod", None)
self.type = type self.type = type

View File

@ -19,14 +19,15 @@ from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import pwfts from pyFTS.models import pwfts
''' #'''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS], bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2), benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2),
progress=False, type='distribution', progress=False, type='distribution', steps_ahead=[1,4,7,10],
distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'], #distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
save=True, file="pwfts_taiex_interval.csv") save=True, file="pwfts_taiex_distribution.csv")
''' #'''
'''
train_split = 2000 train_split = 2000
test_length = 200 test_length = 200
@ -55,7 +56,7 @@ tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
print(tmp) print(tmp)
'''
''' '''
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini