Improvements on benchmarks

This commit is contained in:
Petrônio Cândido 2018-04-13 15:23:54 -03:00
parent ff23d874bc
commit 3db6e69119
10 changed files with 268 additions and 210 deletions

BIN
img/logo_medium.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
img/minds_logo.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

BIN
img/minds_logo_medium.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -271,6 +271,8 @@ def crps(targets, densities):
def get_point_statistics(data, model, **kwargs):
"""Condensate all measures for point forecasters"""
steps_ahead = kwargs.get('steps_ahead',1)
indexer = kwargs.get('indexer', None)
if indexer is not None:
@ -278,45 +280,88 @@ def get_point_statistics(data, model, **kwargs):
else:
ndata = np.array(data[model.order:])
forecasts = model.predict(data, **kwargs)
ret = list()
try:
if steps_ahead == 1:
forecasts = model.forecast(data, **kwargs)
if model.has_seasonality:
nforecasts = np.array(forecasts)
else:
nforecasts = np.array(forecasts[:-1])
except Exception as ex:
print(ex)
return [np.nan,np.nan,np.nan]
ret = list()
ret.append(np.round(rmse(ndata, nforecasts), 2))
ret.append(np.round(smape(ndata, nforecasts), 2))
ret.append(np.round(UStatistic(ndata, nforecasts), 2))
else:
nforecasts = []
for k in np.arange(model.order, len(ndata)-steps_ahead):
sample = ndata[k - model.order: k]
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
nforecasts.append(tmp[-1])
start = model.order + steps_ahead
ret.append(np.round(rmse(ndata[start:], nforecasts), 2))
ret.append(np.round(smape(ndata[start:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[start:], nforecasts), 2))
return ret
def get_interval_statistics(original, model, **kwargs):
def get_interval_statistics(data, model, **kwargs):
"""Condensate all measures for point_to_interval forecasters"""
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list()
forecasts = model.predict(original, **kwargs)
if steps_ahead == 1:
forecasts = model.forecast_interval(data, **kwargs)
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, original[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, original[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, original[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, original[model.order:], forecasts[:-1]), 2))
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
else:
forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
start = model.order + steps_ahead
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
return ret
def get_distribution_statistics(original, model, **kwargs):
def get_distribution_statistics(data, model, **kwargs):
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list()
if steps_ahead == 1:
_s1 = time.time()
densities1 = model.predict(original, **kwargs)
forecasts = model.forecast_distribution(data, **kwargs)
_e1 = time.time()
ret.append(round(crps(original, densities1), 3))
ret.append(round(crps(data, forecasts), 3))
ret.append(round(_e1 - _s1, 3))
else:
forecasts = []
_s1 = time.time()
for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k]
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1])
_e1 = time.time()
start = model.order + steps_ahead
ret.append(round(crps(data[start:], forecasts), 3))
ret.append(round(_e1 - _s1, 3))
return ret

View File

@ -15,7 +15,7 @@ from copy import deepcopy
from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns):
def extract_measure(dataframe, measure, data_columns):
if not dataframe.empty:
df = dataframe[(dataframe.Measure == measure)][data_columns]
tmp = df.to_dict(orient="records")[0]
@ -92,12 +92,12 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else:
mod.append('-')
mod.append('-')
mod.append('-')
mod.append(steps[k])
mod.append(method[k])
mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(np.round(np.nanmean(smape[k]), 2))
@ -126,17 +126,18 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
s = '-'
p = '-'
l = '-'
print([n, o, s, p, l, steps, method])
tmp = [n, o, s, p, l, steps, method, 'RMSE']
st = steps[k]
mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'RMSE']
tmp.extend(rmse[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'SMAPE']
tmp = [n, o, s, p, l, st, mt, 'SMAPE']
tmp.extend(smape[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'U']
tmp = [n, o, s, p, l, st, mt, 'U']
tmp.extend(u[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME']
tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
@ -154,13 +155,30 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
print(ret)
def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
columns = point_dataframe_analytic_columns(experiments)
def cast_dataframe_to_synthetic(infile, outfile, experiments, type):
if type == 'point':
analytic_columns = point_dataframe_analytic_columns
synthetic_columns = point_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_point
elif type == 'interval':
analytic_columns = interval_dataframe_analytic_columns
synthetic_columns = interval_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_interval
elif type == 'distribution':
analytic_columns = probabilistic_dataframe_analytic_columns
synthetic_columns = probabilistic_dataframe_synthetic_columns
synthetize_measures = cast_dataframe_to_synthetic_probabilistic
else:
raise ValueError("Type parameter has an unknown value!")
columns = analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
steps = dat.Steps.unique()
methods = dat.Method.unique()
data_columns = analytical_data_columns(experiments)
@ -170,31 +188,41 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
for st in steps:
for mt in methods:
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) &
(dat.Partitions == p) & (dat.Steps == st) & (dat.Method == mt)]
if not df.empty:
mod = synthetize_measures(df, data_columns)
mod.insert(0, m)
mod.insert(1, o)
mod.insert(2, s)
mod.insert(3, p)
mod.insert(4, df.iat[0,5])
mod.insert(5, st)
mod.insert(6, mt)
ret.append(mod)
dat = pd.DataFrame(ret, columns=synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def cast_dataframe_to_synthetic_point(df, data_columns):
ret = []
rmse = extract_measure(df, 'RMSE', data_columns)
smape = extract_measure(df, 'SMAPE', data_columns)
u = extract_measure(df, 'U', data_columns)
times = extract_measure(df, 'TIME', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(extract_measure(df, 'RMSE', ['Size'])[0])
mod.append(np.round(np.nanmean(rmse), 2))
mod.append(np.round(np.nanstd(rmse), 2))
mod.append(np.round(np.nanmean(smape), 2))
mod.append(np.round(np.nanstd(smape), 2))
mod.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
ret.append(np.round(np.nanmean(rmse), 2))
ret.append(np.round(np.nanstd(rmse), 2))
ret.append(np.round(np.nanmean(smape), 2))
ret.append(np.round(np.nanstd(smape), 2))
ret.append(np.round(np.nanmean(u), 2))
ret.append(np.round(np.nanstd(u), 2))
ret.append(np.round(np.nanmean(times), 4))
ret.append(np.round(np.nanstd(times), 4))
return ret
def analytical_data_columns(experiments):
@ -202,7 +230,6 @@ def analytical_data_columns(experiments):
return data_columns
def scale_params(data):
vmin = np.nanmin(data)
vlen = np.nanmax(data) - vmin
@ -215,12 +242,10 @@ def scale(data, params):
return ndata
def stats(measure, data):
print(measure, np.nanmean(data), np.nanstd(data))
def unified_scaled_point(experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False,
@ -330,7 +355,6 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save)
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False,
@ -419,14 +443,12 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(l)
mod.append(steps)
mod.append(method)
else:
mod.append('-')
mod.append('-')
mod.append('-')
mod.append(steps)
mod.append(method)
mod.append(steps[k])
mod.append(method[k])
mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2))
@ -461,29 +483,30 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, steps, method, 'Sharpness']
st = steps[k]
mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'Sharpness']
tmp.extend(sharpness[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Resolution']
tmp = [n, o, s, p, l, st, mt, 'Resolution']
tmp.extend(resolution[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Coverage']
tmp = [n, o, s, p, l, st, mt, 'Coverage']
tmp.extend(coverage[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME']
tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q05']
tmp = [n, o, s, p, l, st, mt, 'Q05']
tmp.extend(q05[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q25']
tmp = [n, o, s, p, l, st, mt, 'Q25']
tmp.extend(q25[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q75']
tmp = [n, o, s, p, l, st, mt, 'Q75']
tmp.extend(q75[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'Q95']
tmp = [n, o, s, p, l, st, mt, 'Q95']
tmp.extend(q95[k])
ret.append(deepcopy(tmp))
except Exception as ex:
@ -515,25 +538,7 @@ def interval_dataframe_synthetic_columns():
return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
def cast_dataframe_to_synthetic_interval(df, data_columns):
sharpness = extract_measure(df, 'Sharpness', data_columns)
resolution = extract_measure(df, 'Resolution', data_columns)
coverage = extract_measure(df, 'Coverage', data_columns)
@ -542,30 +547,25 @@ def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns)
q95 = extract_measure(df, 'Q95', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(sharpness), 2))
mod.append(np.round(np.nanstd(sharpness), 2))
mod.append(np.round(np.nanmean(resolution), 2))
mod.append(np.round(np.nanstd(resolution), 2))
mod.append(np.round(np.nanmean(coverage), 2))
mod.append(np.round(np.nanstd(coverage), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanmean(q05), 4))
mod.append(np.round(np.nanstd(q05), 4))
mod.append(np.round(np.nanmean(q25), 4))
mod.append(np.round(np.nanstd(q25), 4))
mod.append(np.round(np.nanmean(q75), 4))
mod.append(np.round(np.nanstd(q75), 4))
mod.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanstd(q95), 4))
ret.append(mod)
ret = []
ret.append(np.round(np.nanmean(sharpness), 2))
ret.append(np.round(np.nanstd(sharpness), 2))
ret.append(np.round(np.nanmean(resolution), 2))
ret.append(np.round(np.nanstd(resolution), 2))
ret.append(np.round(np.nanmean(coverage), 2))
ret.append(np.round(np.nanstd(coverage), 2))
ret.append(np.round(np.nanmean(times), 4))
ret.append(np.round(np.nanstd(times), 4))
ret.append(np.round(np.nanmean(q05), 4))
ret.append(np.round(np.nanstd(q05), 4))
ret.append(np.round(np.nanmean(q25), 4))
ret.append(np.round(np.nanstd(q25), 4))
ret.append(np.round(np.nanmean(q75), 4))
ret.append(np.round(np.nanstd(q75), 4))
ret.append(np.round(np.nanmean(q95), 4))
ret.append(np.round(np.nanstd(q95), 4))
return ret
dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
@ -905,17 +905,14 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(len(mfts))
mod.append(steps)
mod.append(method)
else:
mod.append('-')
mod.append('-')
mod.append('-')
mod.append(steps)
mod.append(method)
mod.append(steps[k])
mod.append(method[k])
mod.append(np.round(np.nanmean(crps[k]), 2))
mod.append(np.round(np.nanstd(crps[k]), 2))
mod.append(l)
mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod)
@ -940,10 +937,12 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, steps, method, 'CRPS']
st = steps[k]
mt = method[k]
tmp = [n, o, s, p, l, st, mt, 'CRPS']
tmp.extend(crps[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, steps, method, 'TIME']
tmp = [n, o, s, p, l, st, mt, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
@ -974,40 +973,15 @@ def probabilistic_dataframe_synthetic_columns():
return columns
def cast_dataframe_to_synthetic_probabilistic(infile, outfile, experiments):
columns = probabilistic_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
def cast_dataframe_to_synthetic_probabilistic(df, data_columns):
crps1 = extract_measure(df, 'CRPS', data_columns)
times1 = extract_measure(df, 'TIME', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(crps1), 2))
mod.append(np.round(np.nanstd(crps1), 2))
mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2))
ret.append(mod)
dat = pd.DataFrame(ret, columns=probabilistic_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
ret = []
ret.append(np.round(np.nanmean(crps1), 2))
ret.append(np.round(np.nanstd(crps1), 2))
ret.append(np.round(np.nanmean(times1), 2))
ret.append(np.round(np.nanstd(times1), 2))
return ret
def unified_scaled_probabilistic(experiments, tam, save=False, file=None,

View File

@ -81,6 +81,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
partitions = __pop("partitions", [10], kwargs)
steps_ahead = __pop('steps_ahead', [1], kwargs)
methods = __pop('methods', None, kwargs)
models = __pop('models', None, kwargs)
@ -178,19 +180,26 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
else:
partitioners_pool = partitioners_models
rng1 = partitioners_pool
rng1 = steps_ahead
if progress:
rng1 = tqdm(steps_ahead, desc="Steps")
for step in rng1:
rng2 = partitioners_pool
if progress:
rng1 = tqdm(partitioners_pool, desc="Partitioners")
rng2 = tqdm(partitioners_pool, desc="Partitioners")
for partitioner in rng1:
for partitioner in rng2:
rng2 = enumerate(pool,start=0)
rng3 = enumerate(pool,start=0)
if progress:
rng2 = enumerate(tqdm(pool, desc="Models"),start=0)
rng3 = enumerate(tqdm(pool, desc="Models"),start=0)
for _id, model in rng2:
for _id, model in rng3:
kwargs['steps_ahead'] = step
if not distributed:
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
@ -303,6 +312,9 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None:
mfts.append_transformation(transformation)
@ -363,6 +375,9 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
if transformation is not None:
mfts.append_transformation(transformation)
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
_start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs)
_end = time.time()
@ -421,6 +436,9 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
_key += str(steps_ahead)
_key += str(method) if method is not None else ""
if transformation is not None:
mfts.append_transformation(transformation)
@ -478,8 +496,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape = {}
u = {}
times = {}
steps = None
method = None
steps = {}
method = {}
for job in jobs:
_key = job['key']
@ -489,6 +507,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
smape[_key] = []
u[_key] = []
times[_key] = []
steps[_key] = []
method[_key] = []
steps[_key] = job['steps']
method[_key] = job['method']
rmse[_key].append(job['rmse'])
@ -496,7 +516,7 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False)
u[_key].append(job['u'])
times[_key].append(job['time'])
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method)
def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False):
@ -509,6 +529,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q75 = {}
q95 = {}
times = {}
steps = {}
method = {}
for job in jobs:
_key = job['key']
@ -522,6 +544,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key] = []
q75[_key] = []
q95[_key] = []
steps[_key] = []
method[_key] = []
sharpness[_key].append(job['sharpness'])
resolution[_key].append(job['resolution'])
@ -531,16 +555,18 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal
q25[_key].append(job['Q25'])
q75[_key].append(job['Q75'])
q95[_key].append(job['Q95'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
times, q05, q25, q75, q95)
times, q05, q25, q75, q95, steps, method)
def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
objs = {}
crps = {}
times = {}
steps = {}
method = {}
for job in jobs:
_key = job['key']
@ -548,11 +574,15 @@ def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sinteti
objs[_key] = job['obj']
crps[_key] = []
times[_key] = []
steps[_key] = []
method[_key] = []
crps[_key].append(job['CRPS'])
times[_key].append(job['time'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic)
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method)

View File

@ -89,19 +89,22 @@ class FTS(object):
steps_ahead = kwargs.get("steps_ahead", None)
if type == 'point' and (steps_ahead == None or steps_ahead == 1):
if steps_ahead == None or steps_ahead == 1:
if type == 'point':
ret = self.forecast(ndata, **kwargs)
elif type == 'point' and steps_ahead > 1:
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
elif type == 'interval' and (steps_ahead == None or steps_ahead == 1):
elif type == 'interval':
ret = self.forecast_interval(ndata, **kwargs)
elif type == 'interval' and steps_ahead > 1:
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution' and (steps_ahead == None or steps_ahead == 1):
elif type == 'distribution':
ret = self.forecast_distribution(ndata, **kwargs)
elif type == 'distribution' and steps_ahead > 1:
elif steps_ahead > 1:
if type == 'point':
ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
elif type == 'interval':
ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
elif type == 'distribution':
ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
else:
if not ['point', 'interval', 'distribution'].__contains__(type):
raise ValueError('The argument \'type\' has an unknown value.')
else:

View File

@ -372,6 +372,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
flrgs = self.generate_lhs_flrg(sample)
if 'type' in kwargs:
kwargs.pop('type')
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for bin in _bins:
@ -409,7 +412,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append(ret[-1])
else:
mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs)
ret.append(mp)
ret.append(mp[0])
return ret[self.order:]
@ -427,13 +430,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order)
sample = data[start - (self.order - 1): start + 1]
sample = data[start - self.order: start]
ret = [[k, k] for k in sample]
for k in np.arange(self.order, steps+self.order):
if self.__check_interval_bounds(ret[-1]):
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1])
else:
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs)
@ -460,9 +463,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start', self.order)
sample = ndata[start - (self.order - 1): start + 1]
sample = ndata[start - self.order: start]
for dat in sample:
if 'type' in kwargs:
kwargs.pop('type')
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0)
ret.append(tmp)

View File

@ -11,7 +11,7 @@ class ProbabilityDistribution(object):
If type is histogram, the PDF is discrete
If type is KDE the PDF is continuous
"""
def __init__(self,type = "KDE", **kwargs):
def __init__(self, type = "KDE", **kwargs):
self.uod = kwargs.get("uod", None)
self.type = type

View File

@ -19,14 +19,15 @@ from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import pwfts
'''
#'''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2),
progress=False, type='distribution',
distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
save=True, file="pwfts_taiex_interval.csv")
'''
progress=False, type='distribution', steps_ahead=[1,4,7,10],
#distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
save=True, file="pwfts_taiex_distribution.csv")
#'''
'''
train_split = 2000
test_length = 200
@ -55,7 +56,7 @@ tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
print(tmp)
'''
'''
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini