Improvements on benchmarks

This commit is contained in:
Petrônio Cândido 2018-04-13 15:23:54 -03:00
parent ff23d874bc
commit 3db6e69119
10 changed files with 268 additions and 210 deletions

BIN
img/logo_medium.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
img/minds_logo.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

BIN
img/minds_logo_medium.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -271,6 +271,8 @@ def crps(targets, densities):
def get_point_statistics(data, model, **kwargs): def get_point_statistics(data, model, **kwargs):
"""Condensate all measures for point forecasters""" """Condensate all measures for point forecasters"""
steps_ahead = kwargs.get('steps_ahead',1)
indexer = kwargs.get('indexer', None) indexer = kwargs.get('indexer', None)
if indexer is not None: if indexer is not None:
@ -278,45 +280,88 @@ def get_point_statistics(data, model, **kwargs):
else: else:
ndata = np.array(data[model.order:]) ndata = np.array(data[model.order:])
forecasts = model.predict(data, **kwargs) ret = list()
try: if steps_ahead == 1:
forecasts = model.forecast(data, **kwargs)
if model.has_seasonality: if model.has_seasonality:
nforecasts = np.array(forecasts) nforecasts = np.array(forecasts)
else: else:
nforecasts = np.array(forecasts[:-1]) nforecasts = np.array(forecasts[:-1])
except Exception as ex:
print(ex)
return [np.nan,np.nan,np.nan]
ret = list()
ret.append(np.round(rmse(ndata, nforecasts), 2)) ret.append(np.round(rmse(ndata, nforecasts), 2))
ret.append(np.round(smape(ndata, nforecasts), 2)) ret.append(np.round(smape(ndata, nforecasts), 2))
ret.append(np.round(UStatistic(ndata, nforecasts), 2)) ret.append(np.round(UStatistic(ndata, nforecasts), 2))
else:
nforecasts = []
for k in np.arange(model.order, len(ndata)-steps_ahead):
sample = ndata[k - model.order: k]
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
nforecasts.append(tmp[-1])
start = model.order + steps_ahead
ret.append(np.round(rmse(ndata[start:], nforecasts), 2))
ret.append(np.round(smape(ndata[start:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[start:], nforecasts), 2))
return ret return ret
def get_interval_statistics(original, model, **kwargs): def get_interval_statistics(data, model, **kwargs):
"""Condensate all measures for point_to_interval forecasters""" """Condensate all measures for point_to_interval forecasters"""
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list() ret = list()
forecasts = model.predict(original, **kwargs)
if steps_ahead == 1:
forecasts = model.forecast_interval(data, **kwargs)
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, original[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
else:
forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
start = model.order + steps_ahead
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
return ret return ret
def get_distribution_statistics(original, model, **kwargs): def get_distribution_statistics(data, model, **kwargs):
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list() ret = list()
if steps_ahead == 1:
_s1 = time.time() _s1 = time.time()
densities1 = model.predict(original, **kwargs) forecasts = model.forecast_distribution(data, **kwargs)
_e1 = time.time() _e1 = time.time()
ret.append(round(crps(original, densities1), 3)) ret.append(round(crps(data, forecasts), 3))
ret.append(round(_e1 - _s1, 3))
else:
forecasts = []
_s1 = time.time()
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
_e1 = time.time()
start = model.order + steps_ahead
ret.append(round(crps(data[start:], forecasts), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
return ret return ret

View File

@ -92,12 +92,12 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(len(mfts)) mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps[k])
mod.append(method[k])
mod.append(np.round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(np.round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(np.round(np.nanmean(smape[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2))
@ -126,17 +126,18 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
print([n, o, s, p, l, steps, method]) st = steps[k]
tmp = [n, o, s, p, l, steps, method, 'RMSE'] mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'RMSE']
tmp.extend(rmse[k]) tmp.extend(rmse[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'SMAPE'] tmp = [n, o, s, p, l, st, mt, 'SMAPE']
tmp.extend(smape[k]) tmp.extend(smape[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'U'] tmp = [n, o, s, p, l, st, mt, 'U']
tmp.extend(u[k]) tmp.extend(u[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -154,13 +155,30 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
print(ret) print(ret)
def cast_dataframe_to_synthetic_point(infile, outfile, experiments): def cast_dataframe_to_synthetic(infile, outfile, experiments, type):
columns = point_dataframe_analytic_columns(experiments) if type == 'point':
analytic_columns = point_dataframe_analytic_columns
synthetic_columns = point_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_point
elif type == 'interval':
analytic_columns = interval_dataframe_analytic_columns
synthetic_columns = interval_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_interval
elif type == 'distribution':
analytic_columns = probabilistic_dataframe_analytic_columns
synthetic_columns = probabilistic_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_probabilistic
else:
raise ValueError("Type parameter has an unknown value!")
columns = analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns) dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique() models = dat.Model.unique()
orders = dat.Order.unique() orders = dat.Order.unique()
schemes = dat.Scheme.unique() schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique() partitions = dat.Partitions.unique()
steps = dat.Steps.unique()
methods = dat.Method.unique()
data_columns = analytical_data_columns(experiments) data_columns = analytical_data_columns(experiments)
@ -170,31 +188,41 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
for o in orders: for o in orders:
for s in schemes: for s in schemes:
for p in partitions: for p in partitions:
mod = [] for st in steps:
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] for mt in methods:
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) &
(dat.Partitions == p) & (dat.Steps == st) & (dat.Method == mt)]
if not df.empty: if not df.empty:
mod = synthetize_measures(df, data_columns)
mod.insert(0, m)
mod.insert(1, o)
mod.insert(2, s)
mod.insert(3, p)
mod.insert(4, df.iat[0,5])
mod.insert(5, st)
mod.insert(6, mt)
ret.append(mod)
dat = pd.DataFrame(ret, columns=synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def cast_dataframe_to_synthetic_point(df, data_columns):
ret = []
rmse = extract_measure(df, 'RMSE', data_columns) rmse = extract_measure(df, 'RMSE', data_columns)
smape = extract_measure(df, 'SMAPE', data_columns) smape = extract_measure(df, 'SMAPE', data_columns)
u = extract_measure(df, 'U', data_columns) u = extract_measure(df, 'U', data_columns)
times = extract_measure(df, 'TIME', data_columns) times = extract_measure(df, 'TIME', data_columns)
mod.append(m) ret.append(np.round(np.nanmean(rmse), 2))
mod.append(o) ret.append(np.round(np.nanstd(rmse), 2))
mod.append(s) ret.append(np.round(np.nanmean(smape), 2))
mod.append(p) ret.append(np.round(np.nanstd(smape), 2))
mod.append(extract_measure(df, 'RMSE', ['Size'])[0]) ret.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanmean(rmse), 2)) ret.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanstd(rmse), 2)) ret.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanmean(smape), 2)) ret.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanstd(smape), 2))
mod.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
return ret
def analytical_data_columns(experiments): def analytical_data_columns(experiments):
@ -202,7 +230,6 @@ def analytical_data_columns(experiments):
return data_columns return data_columns
def scale_params(data): def scale_params(data):
vmin = np.nanmin(data) vmin = np.nanmin(data)
vlen = np.nanmax(data) - vmin vlen = np.nanmax(data) - vmin
@ -215,12 +242,10 @@ def scale(data, params):
return ndata return ndata
def stats(measure, data): def stats(measure, data):
print(measure, np.nanmean(data), np.nanstd(data)) print(measure, np.nanmean(data), np.nanstd(data))
def unified_scaled_point(experiments, tam, save=False, file=None, def unified_scaled_point(experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -330,7 +355,6 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None, def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -419,14 +443,12 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(l) mod.append(l)
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps) mod.append(steps[k])
mod.append(method) mod.append(method[k])
mod.append(round(np.nanmean(sharpness[k]), 2)) mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2)) mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2)) mod.append(round(np.nanmean(resolution[k]), 2))
@ -461,29 +483,30 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
st = steps[k]
tmp = [n, o, s, p, l, steps, method, 'Sharpness'] mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'Sharpness']
tmp.extend(sharpness[k]) tmp.extend(sharpness[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Resolution'] tmp = [n, o, s, p, l, st, mt, 'Resolution']
tmp.extend(resolution[k]) tmp.extend(resolution[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Coverage'] tmp = [n, o, s, p, l, st, mt, 'Coverage']
tmp.extend(coverage[k]) tmp.extend(coverage[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q05'] tmp = [n, o, s, p, l, st, mt, 'Q05']
tmp.extend(q05[k]) tmp.extend(q05[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q25'] tmp = [n, o, s, p, l, st, mt, 'Q25']
tmp.extend(q25[k]) tmp.extend(q25[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q75'] tmp = [n, o, s, p, l, st, mt, 'Q75']
tmp.extend(q75[k]) tmp.extend(q75[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q95'] tmp = [n, o, s, p, l, st, mt, 'Q95']
tmp.extend(q95[k]) tmp.extend(q95[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -515,25 +538,7 @@ def interval_dataframe_synthetic_columns():
return columns return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments): def cast_dataframe_to_synthetic_interval(df, data_columns):
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
sharpness = extract_measure(df, 'Sharpness', data_columns) sharpness = extract_measure(df, 'Sharpness', data_columns)
resolution = extract_measure(df, 'Resolution', data_columns) resolution = extract_measure(df, 'Resolution', data_columns)
coverage = extract_measure(df, 'Coverage', data_columns) coverage = extract_measure(df, 'Coverage', data_columns)
@ -542,30 +547,25 @@ def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
q25 = extract_measure(df, 'Q25', data_columns) q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns) q75 = extract_measure(df, 'Q75', data_columns)
q95 = extract_measure(df, 'Q95', data_columns) q95 = extract_measure(df, 'Q95', data_columns)
mod.append(m) ret = []
mod.append(o) ret.append(np.round(np.nanmean(sharpness), 2))
mod.append(s) ret.append(np.round(np.nanstd(sharpness), 2))
mod.append(p) ret.append(np.round(np.nanmean(resolution), 2))
mod.append(np.round(np.nanmean(sharpness), 2)) ret.append(np.round(np.nanstd(resolution), 2))
mod.append(np.round(np.nanstd(sharpness), 2)) ret.append(np.round(np.nanmean(coverage), 2))
mod.append(np.round(np.nanmean(resolution), 2)) ret.append(np.round(np.nanstd(coverage), 2))
mod.append(np.round(np.nanstd(resolution), 2)) ret.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanmean(coverage), 2)) ret.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanstd(coverage), 2)) ret.append(np.round(np.nanmean(q05), 4))
mod.append(np.round(np.nanmean(times), 4)) ret.append(np.round(np.nanstd(q05), 4))
mod.append(np.round(np.nanstd(times), 4)) ret.append(np.round(np.nanmean(q25), 4))
mod.append(np.round(np.nanmean(q05), 4)) ret.append(np.round(np.nanstd(q25), 4))
mod.append(np.round(np.nanstd(q05), 4)) ret.append(np.round(np.nanmean(q75), 4))
mod.append(np.round(np.nanmean(q25), 4)) ret.append(np.round(np.nanstd(q75), 4))
mod.append(np.round(np.nanstd(q25), 4)) ret.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanmean(q75), 4)) ret.append(np.round(np.nanstd(q95), 4))
mod.append(np.round(np.nanstd(q75), 4)) return ret
mod.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanstd(q95), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
@ -905,17 +905,14 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(len(mfts)) mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else: else:
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
mod.append(steps) mod.append(steps[k])
mod.append(method) mod.append(method[k])
mod.append(np.round(np.nanmean(crps[k]), 2)) mod.append(np.round(np.nanmean(crps[k]), 2))
mod.append(np.round(np.nanstd(crps[k]), 2)) mod.append(np.round(np.nanstd(crps[k]), 2))
mod.append(l)
mod.append(np.round(np.nanmean(times[k]), 4)) mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanstd(times[k]), 4)) mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod) ret.append(mod)
@ -940,10 +937,12 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
tmp = [n, o, s, p, l, steps, method, 'CRPS'] st = steps[k]
mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'CRPS']
tmp.extend(crps[k]) tmp.extend(crps[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME'] tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
@ -974,40 +973,15 @@ def probabilistic_dataframe_synthetic_columns():
return columns return columns
def cast_dataframe_to_synthetic_probabilistic(infile, outfile, experiments): def cast_dataframe_to_synthetic_probabilistic(df, data_columns):
columns = probabilistic_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
crps1 = extract_measure(df, 'CRPS', data_columns) crps1 = extract_measure(df, 'CRPS', data_columns)
times1 = extract_measure(df, 'TIME', data_columns) times1 = extract_measure(df, 'TIME', data_columns)
mod.append(m) ret = []
mod.append(o) ret.append(np.round(np.nanmean(crps1), 2))
mod.append(s) ret.append(np.round(np.nanstd(crps1), 2))
mod.append(p) ret.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanmean(crps1), 2)) ret.append(np.round(np.nanstd(times1), 2))
mod.append(np.round(np.nanstd(crps1), 2)) return ret
mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2))
ret.append(mod)
dat = pd.DataFrame(ret, columns=probabilistic_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def unified_scaled_probabilistic(experiments, tam, save=False, file=None, def unified_scaled_probabilistic(experiments, tam, save=False, file=None,

View File

@ -81,6 +81,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs) partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
partitions = __pop("partitions", [10], kwargs) partitions = __pop("partitions", [10], kwargs)
steps_ahead = __pop('steps_ahead', [1], kwargs)
methods = __pop('methods', None, kwargs) methods = __pop('methods', None, kwargs)
models = __pop('models', None, kwargs) models = __pop('models', None, kwargs)
@ -178,19 +180,26 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
else: else:
partitioners_pool = partitioners_models partitioners_pool = partitioners_models
rng1 = partitioners_pool rng1 = steps_ahead
if progress:
rng1 = tqdm(steps_ahead, desc="Steps")
for step in rng1:
rng2 = partitioners_pool
if progress: if progress:
rng1 = tqdm(partitioners_pool, desc="Partitioners") rng2 = tqdm(partitioners_pool, desc="Partitioners")
for partitioner in rng1: for partitioner in rng2:
rng2 = enumerate(pool,start=0) rng3 = enumerate(pool,start=0)
if progress: if progress:
rng2 = enumerate(tqdm(pool, desc="Models"),start=0) rng3 = enumerate(tqdm(pool, desc="Models"),start=0)
for _id, model in rng2: for _id, model in rng3:
kwargs['steps_ahead'] = step
if not distributed: if not distributed:
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
@ -303,6 +312,9 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
@ -363,6 +375,9 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs) mfts.fit(train_data, order=mfts.order, **kwargs)
_end = time.time() _end = time.time()
@ -421,6 +436,9 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None: if transformation is not None:
mfts.append_transformation(transformation) mfts.append_transformation(transformation)
@ -478,8 +496,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape = {} smape = {}
u = {} u = {}
times = {} times = {}
steps = None steps = {}
method = None method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -489,6 +507,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape[_key] = [] smape[_key] = []
u[_key] = [] u[_key] = []
times[_key] = [] times[_key] = []
steps[_key] = []
method[_key] = []
steps[_key] = job['steps'] steps[_key] = job['steps']
method[_key] = job['method'] method[_key] = job['method']
rmse[_key].append(job['rmse']) rmse[_key].append(job['rmse'])
@ -496,7 +516,7 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
u[_key].append(job['u']) u[_key].append(job['u'])
times[_key].append(job['time']) times[_key].append(job['time'])
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method)
def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False):
@ -509,6 +529,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q75 = {} q75 = {}
q95 = {} q95 = {}
times = {} times = {}
steps = {}
method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -522,6 +544,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key] = [] q25[_key] = []
q75[_key] = [] q75[_key] = []
q95[_key] = [] q95[_key] = []
steps[_key] = []
method[_key] = []
sharpness[_key].append(job['sharpness']) sharpness[_key].append(job['sharpness'])
resolution[_key].append(job['resolution']) resolution[_key].append(job['resolution'])
@ -531,16 +555,18 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key].append(job['Q25']) q25[_key].append(job['Q25'])
q75[_key].append(job['Q75']) q75[_key].append(job['Q75'])
q95[_key].append(job['Q95']) q95[_key].append(job['Q95'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
times, q05, q25, q75, q95) times, q05, q25, q75, q95, steps, method)
def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
objs = {} objs = {}
crps = {} crps = {}
times = {} times = {}
steps = {}
method = {}
for job in jobs: for job in jobs:
_key = job['key'] _key = job['key']
@ -548,11 +574,15 @@ def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sinteti
objs[_key] = job['obj'] objs[_key] = job['obj']
crps[_key] = [] crps[_key] = []
times[_key] = [] times[_key] = []
steps[_key] = []
method[_key] = []
crps[_key].append(job['CRPS']) crps[_key].append(job['CRPS'])
times[_key].append(job['time']) times[_key].append(job['time'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic) return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method)

View File

@ -89,19 +89,22 @@ class FTS(object):
steps_ahead = kwargs.get("steps_ahead", None) steps_ahead = kwargs.get("steps_ahead", None)
if type == 'point' and (steps_ahead == None or steps_ahead == 1): if steps_ahead == None or steps_ahead == 1:
if type == 'point':
ret = self.forecast(ndata, **kwargs) ret = self.forecast(ndata, **kwargs)
elif type == 'point' and steps_ahead > 1: elif type == 'interval':
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
elif type == 'interval' and (steps_ahead == None or steps_ahead == 1):
ret = self.forecast_interval(ndata, **kwargs) ret = self.forecast_interval(ndata, **kwargs)
elif type == 'interval' and steps_ahead > 1: elif type == 'distribution':
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution' and (steps_ahead == None or steps_ahead == 1):
ret = self.forecast_distribution(ndata, **kwargs) ret = self.forecast_distribution(ndata, **kwargs)
elif type == 'distribution' and steps_ahead > 1: elif steps_ahead > 1:
if type == 'point':
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
elif type == 'interval':
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
else:
if not ['point', 'interval', 'distribution'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.') raise ValueError('The argument \'type\' has an unknown value.')
else: else:

View File

@ -372,6 +372,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
flrgs = self.generate_lhs_flrg(sample) flrgs = self.generate_lhs_flrg(sample)
if 'type' in kwargs:
kwargs.pop('type')
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for bin in _bins: for bin in _bins:
@ -409,7 +412,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append(ret[-1]) ret.append(ret[-1])
else: else:
mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs) mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs)
ret.append(mp) ret.append(mp[0])
return ret[self.order:] return ret[self.order:]
@ -427,13 +430,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order) start = kwargs.get('start', self.order)
sample = data[start - (self.order - 1): start + 1] sample = data[start - self.order: start]
ret = [[k, k] for k in sample] ret = [[k, k] for k in sample]
for k in np.arange(self.order, steps+self.order): for k in np.arange(self.order, steps+self.order):
if self.__check_interval_bounds(ret[-1]): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1]) ret.append(ret[-1])
else: else:
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs) lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs)
@ -460,9 +463,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order) start = kwargs.get('start', self.order)
sample = ndata[start - (self.order - 1): start + 1] sample = ndata[start - self.order: start]
for dat in sample: for dat in sample:
if 'type' in kwargs:
kwargs.pop('type')
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0) tmp.set(dat, 1.0)
ret.append(tmp) ret.append(tmp)

View File

@ -19,14 +19,15 @@ from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import pwfts from pyFTS.models import pwfts
''' #'''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS], bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2), benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2),
progress=False, type='distribution', progress=False, type='distribution', steps_ahead=[1,4,7,10],
distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'], #distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
save=True, file="pwfts_taiex_interval.csv") save=True, file="pwfts_taiex_distribution.csv")
''' #'''
'''
train_split = 2000 train_split = 2000
test_length = 200 test_length = 200
@ -55,7 +56,7 @@ tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
print(tmp) print(tmp)
'''
''' '''
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini