- Bugfixes and improvements on benchmarks

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-24 00:31:05 -03:00
parent 849cd74bff
commit 9af879e195
9 changed files with 495 additions and 160 deletions

View File

@ -65,27 +65,30 @@ def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
:return:
"""
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
c = 0
for mfts in models:
for c, mfts in enumerate(models):
if len(models) > 1:
ax = axes[c]
else:
ax = axes
forecasts = mfts.forecast(targets)
res = residuals(targets,forecasts,mfts.order)
mu = np.mean(res)
sig = np.std(res)
axes[c][0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
axes[c][0].set_ylabel('E')
axes[c][0].set_xlabel('T')
axes[c][0].plot(res)
ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
ax[0].set_ylabel('E')
ax[0].set_xlabel('T')
ax[0].plot(res)
axes[c][1].set_title("Residuals Autocorrelation")
axes[c][1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag')
axes[c][1].acorr(res)
ax[1].set_title("Residuals Autocorrelation")
ax[1].set_ylabel('ACS')
ax[1].set_xlabel('Lag')
ax[1].acorr(res)
axes[c][2].set_title("Residuals Histogram")
axes[c][2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins')
axes[c][2].hist(res)
ax[2].set_title("Residuals Histogram")
ax[2].set_ylabel('Freq')
ax[2].set_xlabel('Bins')
ax[2].hist(res)
c += 1
@ -98,25 +101,29 @@ def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
for c, mfts in enumerate(models, start=0):
if len(models) > 1:
ax = axes[c]
else:
ax = axes
forecasts = mfts.forecast(targets)
res = residuals(targets, forecasts, mfts.order)
mu = np.mean(res)
sig = np.std(res)
if c == 0: axes[c][0].set_title("Residuals", size='large')
axes[c][0].set_ylabel(mfts.shortname, size='large')
axes[c][0].set_xlabel(' ')
axes[c][0].plot(res)
if c == 0: ax[0].set_title("Residuals", size='large')
ax[0].set_ylabel(mfts.shortname, size='large')
ax[0].set_xlabel(' ')
ax[0].plot(res)
if c == 0: axes[c][1].set_title("Residuals Autocorrelation", size='large')
axes[c][1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag')
axes[c][1].acorr(res)
if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large')
ax[1].set_ylabel('ACS')
ax[1].set_xlabel('Lag')
ax[1].acorr(res)
if c == 0: axes[c][2].set_title("Residuals Histogram", size='large')
axes[c][2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins')
axes[c][2].hist(res)
if c == 0: ax[2].set_title("Residuals Histogram", size='large')
ax[2].set_ylabel('Freq')
ax[2].set_xlabel('Bins')
ax[2].hist(res)
plt.tight_layout()

View File

@ -9,7 +9,7 @@ import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
from mpl_toolkits.mplot3d import Axes3D
#from mpl_toolkits.mplot3d import Axes3D
import numpy as np
@ -20,8 +20,9 @@ from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns):
if not dataframe.empty:
tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0]
ret = [k for k in tmp.values()]
df = dataframe[(dataframe.Measure == measure)][data_columns]
tmp = df.to_dict(orient="records")[0]
ret = [k for k in tmp.values() if not np.isnan(k)]
return ret
else:
return None
@ -191,7 +192,7 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns())
dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False)
dat.to_csv(outfile, sep=";", index=False)
def analytical_data_columns(experiments):
@ -199,23 +200,29 @@ def analytical_data_columns(experiments):
return data_columns
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False,
ignore=None,replace=None):
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=tam)
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
axes[0].set_title('RMSE')
axes[1].set_title('SMAPE')
axes[2].set_title('U Statistic')
axes[3].set_title('Execution Time')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_synthetic_columns())
bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1])
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
rmse = []
smape = []
u = []
@ -223,6 +230,9 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
labels = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
@ -230,14 +240,36 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
smape.append(extract_measure(tmp, 'SMAPE', data_columns))
u.append(extract_measure(tmp, 'U', data_columns))
times.append(extract_measure(tmp, 'TIME', data_columns))
labels.append(best["Model"] + " " + str(best["Order"]))
axes[0].boxplot(rmse, labels=labels, showmeans=True)
axes[1].boxplot(smape, labels=labels, showmeans=True)
axes[2].boxplot(u, labels=labels, showmeans=True)
axes[3].boxplot(times, labels=labels, showmeans=True)
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]),replace))
plt.show()
axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
axes[0].set_title("RMSE")
axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
axes[1].set_title("SMAPE")
axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
axes[2].set_title("U Statistic")
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def check_replace_list(m, replace):
if replace is not None:
for r in replace:
if r[0] in m:
return r[1]
return m
def check_ignore_list(b, ignore):
flag = False
if ignore is not None:
for i in ignore:
if i in b:
flag = True
return flag
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95):
@ -336,10 +368,170 @@ def interval_dataframe_analytic_columns(experiments):
def interval_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD", "SIZE"]
"COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD"]
return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
sharpness = extract_measure(df, 'Sharpness', data_columns)
resolution = extract_measure(df, 'Resolution', data_columns)
coverage = extract_measure(df, 'Coverage', data_columns)
times = extract_measure(df, 'TIME', data_columns)
q05 = extract_measure(df, 'Q05', data_columns)
q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns)
q95 = extract_measure(df, 'Q95', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(sharpness), 2))
mod.append(np.round(np.nanstd(sharpness), 2))
mod.append(np.round(np.nanmean(resolution), 2))
mod.append(np.round(np.nanstd(resolution), 2))
mod.append(np.round(np.nanmean(coverage), 2))
mod.append(np.round(np.nanstd(coverage), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanmean(q05), 4))
mod.append(np.round(np.nanstd(q05), 4))
mod.append(np.round(np.nanmean(q25), 4))
mod.append(np.round(np.nanstd(q25), 4))
mod.append(np.round(np.nanmean(q75), 4))
mod.append(np.round(np.nanstd(q75), 4))
mod.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanstd(q95), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
sort_ascend=[True, False, True, True],save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
axes[0].set_title('Sharpness')
axes[1].set_title('Resolution')
axes[2].set_title('Coverage')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
sharpness = []
resolution = []
coverage = []
times = []
labels = []
bounds_shp = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
sharpness.append( extract_measure(df,'Sharpness',data_columns) )
resolution.append(extract_measure(df, 'Resolution', data_columns))
coverage.append(extract_measure(df, 'Coverage', data_columns))
times.append(extract_measure(df, 'TIME', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
axes[0].set_title("Sharpness")
axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
axes[1].set_title("Resolution")
axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)
axes[2].set_title("Coverage")
axes[2].set_ylim([0, 1.1])
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
sort_ascend=[True, False, True, True], save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
axes[0].set_title(r'$\tau=0.05$')
axes[1].set_title(r'$\tau=0.25$')
axes[2].set_title(r'$\tau=0.75$')
axes[3].set_title(r'$\tau=0.95$')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
q05 = []
q25 = []
q75 = []
q95 = []
labels = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
q05.append(extract_measure(df, 'Q05', data_columns))
q25.append(extract_measure(df, 'Q25', data_columns))
q75.append(extract_measure(df, 'Q75', data_columns))
q95.append(extract_measure(df, 'Q95', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True)
axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True)
axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True)
axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True)
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
@ -438,5 +630,90 @@ def ahead_dataframe_analytic_columns(experiments):
def ahead_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"]
"TIME1AVG", "TIME1STD", "TIME2AVG", "TIME2STD"]
return columns
def cast_dataframe_to_synthetic_ahead(infile, outfile, experiments):
columns = ahead_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
crps1 = extract_measure(df, 'CRPS_Interval', data_columns)
crps2 = extract_measure(df, 'CRPS_Distribution', data_columns)
times1 = extract_measure(df, 'TIME_Interval', data_columns)
times2 = extract_measure(df, 'TIME_Distribution', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(crps1), 2))
mod.append(np.round(np.nanstd(crps1), 2))
mod.append(np.round(np.nanmean(crps2), 2))
mod.append(np.round(np.nanstd(crps2), 2))
mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2))
mod.append(np.round(np.nanmean(times2), 4))
mod.append(np.round(np.nanstd(times2), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=ahead_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def plot_dataframe_ahead(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
sort_ascend=[True, True, True, True],save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)
axes[0].set_title('CRPS Interval Ahead')
axes[1].set_title('CRPS Distribution Ahead')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=ahead_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=ahead_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
crps1 = []
crps2 = []
labels = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
crps1.append( extract_measure(df,'CRPS_Interval',data_columns) )
crps2.append(extract_measure(df, 'CRPS_Distribution', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)

View File

@ -14,7 +14,7 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
#from mpl_toolkits.mplot3d import Axes3D
from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
@ -213,10 +213,35 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if models is None:
models = get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
return pool
def all_point_forecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
distributions=False):
distributions=False, benchmark_models=None, benchmark_models_parameters=None):
"""
Fixed data benchmark for FTS point forecasters
:param data_train: data used to train the models
@ -234,8 +259,7 @@ def all_point_forecasters(data_train, data_test, partitions, max_order=3, statis
:param distributions: plot distributions
:return:
"""
if models is None:
models = get_point_methods()
models = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters)
objs = []
@ -247,22 +271,11 @@ def all_point_forecasters(data_train, data_test, partitions, max_order=3, statis
for count, model in enumerate(models, start=0):
#print(model)
mfts = model("")
if not mfts.is_high_order:
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs.sets)
objs.append(mfts)
model.appendTransformation(transformation)
model.train(data_train, data_train_fs.sets, order=model.order)
objs.append(model)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.min_order:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs.sets, order=order)
objs.append(mfts)
lcolors.append(colors[(count + order) % ncol])
if statistics:
print_point_statistics(data_test, objs)
@ -421,38 +434,55 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times)
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None):
def build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if models is None:
models = get_interval_methods()
for model in models:
mfts = model("")
objs = []
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
alphas = [0.05, 0.25]
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
for alpha in alphas:
mfts = model(str(alpha), alpha=alpha)
mfts.order = par
pool.append(mfts)
return pool
data_train_fs = Grid.GridPartitioner(data_train,partitions, transformation=transformation).sets
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
statistics=False, models=None, transformation=None,
benchmark_models=None, benchmark_models_parameters=None):
models = build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters)
data_train_fs = Grid.GridPartitioner(data_train, partitions, transformation=transformation).sets
lcolors = []
objs = []
for count, model in Util.enumerate2(models, start=0, step=2):
mfts = model("")
if not mfts.is_high_order:
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs)
objs.append(mfts)
model.appendTransformation(transformation)
model.train(data_train, data_train_fs, order=model.order)
objs.append(model)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.min_order:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)
lcolors.append(colors[count % ncol])
if statistics:
print_interval_statistics(data_test, objs)
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True)
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
points=False, intervals=True)
def print_interval_statistics(original, models):
@ -467,15 +497,6 @@ def print_interval_statistics(original, models):
print(ret)
def plot_distribution(dist):
for k in dist.index:
alpha = np.array([dist[x][k] for x in dist]) * 100
x = [k for x in np.arange(0, len(alpha))]
y = dist.columns
plt.scatter(x, y, c=alpha, marker='s', linewidths=0, cmap='Oranges', norm=pltcolors.Normalize(vmin=0, vmax=1),
vmin=0, vmax=1, edgecolors=None)
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5):
"""
@ -506,11 +527,13 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
for count, fts in enumerate(models, start=0):
if fts.has_point_forecasting and points:
forecasted = fts.forecast(original)
if isinstance(forecasted, np.ndarray):
forecasted = forecasted.tolist()
mi.append(min(forecasted) * 0.95)
ma.append(max(forecasted) * 1.05)
for k in np.arange(0, fts.order):
forecasted.insert(0, None)
lbl = fts.shortname
lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Point)"
ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth)
@ -523,7 +546,7 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
for k in np.arange(0, fts.order):
lower.insert(0, None)
upper.insert(0, None)
lbl = fts.shortname
lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Interval)"
if not points and intervals:
ls = "-"
@ -556,8 +579,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0)
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, synthetic=False):

View File

@ -101,13 +101,6 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
:return: DataFrame with the results
"""
if benchmark_models is None and models is None:
benchmark_models = [naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None:
benchmark_models_parameters = [1, (1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -116,7 +109,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
pool = []
jobs = []
objs = {}
rmse = {}
@ -124,28 +117,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
u = {}
times = {}
if models is None:
models = benchmarks.get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
pool = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters)
experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train, inc):
@ -204,6 +176,40 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if benchmark_models is None and models is None:
benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None:
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
if models is None:
models = benchmarks.get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
return pool
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):
"""
Interval forecast benchmark function to be executed on cluster nodes

View File

@ -26,9 +26,9 @@ def showAndSaveImage(fig,file,flag,lgd=None):
if flag:
plt.show()
if lgd is not None:
fig.savefig(uniquefilename(file), additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), )
fig.savefig(file, additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), )
else:
fig.savefig(uniquefilename(file))
fig.savefig(file)
plt.close(fig)

View File

@ -15,7 +15,7 @@ class IntervalFTS(hofts.HighOrderFTS):
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
self.flrgs = {}
self.has_point_forecasting = False
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.is_high_order = True
def getUpper(self, flrg):

View File

@ -537,9 +537,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid))
tmp = np.array([grid[k] for k in sorted(grid)])
tmp = np.array([grid[k] for k in sorted(grid) if not np.isnan(grid[k])])
try:
ret.append(tmp / sum(tmp))
except Exception as ex:
ret.append(0)
else:
ret = []

View File

@ -62,7 +62,7 @@ class ExponentialyWeightedFTS(fts.FTS):
flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)
def train(self, data, sets,order=1,parameters=2):
def train(self, data, sets,order=1,parameters=1.05):
self.c = parameters
self.sets = sets
ndata = self.doTransformations(data)

View File

@ -6,7 +6,7 @@ import numpy as np
import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
@ -41,28 +41,30 @@ DATASETS
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000])
#del(taiexpd)
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
#del(nasdaqpd)
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
#sp500 = np.array(sp500pd["Avg"][11000:])
#del(sp500pd)
sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
sondapd = sondapd.dropna(axis=0, how='any')
sonda = np.array(sondapd["glo_avg"])
del(sondapd)
#sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
#sondapd = sondapd.dropna(axis=0, how='any')
#sonda = np.array(sondapd["glo_avg"])
#del(sondapd)
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
#best = np.array(bestpd["Anomaly"])
#del(bestpd)
bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
best = np.array(bestpd["Anomaly"])
del(bestpd)
#print(lag)
#print(a)
#from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import distributed_benchmarks as bchmk
from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
from pyFTS.benchmarks import Util
from pyFTS.benchmarks import arima, quantreg, Measures
@ -102,7 +104,7 @@ bchmk.plot_compared_series(enrollments,[tmp], ['blue','red'], points=False, inte
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
#print(kk)
#"""
"""
"""
@ -120,9 +122,9 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted
dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
"""
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
@ -131,28 +133,48 @@ bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.Weighte
"_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.WeightedFTS], # #
bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/sp500_analytic_diff.csv",
dump=True, save=True, file="experiments/taiex_interval_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.2,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(10,200,step=10), indexer=ix,
dump=True, save=True, file="experiments/sondawind_ahead_analytic.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.2,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(3,20,step=2), transformation=diff, indexer=ix,
dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
#"""
from pyFTS import pwfts
from pyFTS.common import Transformations
from pyFTS.partitioners import Grid
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.5,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(10,200,step=10), indexer=ix,
dump=True, save=True, file="experiments/sondasolar_ahead_analytic.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
diff = Transformations.Differential(1)
fs = Grid.GridPartitioner(best, 190) #, transformation=diff)
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.5,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(3,20,step=2), transformation=diff, indexer=ix,
dump=True, save=True, file="experiments/sondasolar_ahead_analytic_diff.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
model = pwfts.ProbabilisticWeightedFTS("FTS 1")
#model.appendTransformation(diff)
model.train(best[0:1600],fs.sets, order=3)
bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'],
distributions=[True], save=True, file="pictures/best_ahead_forecasts",
time_from=40, time_to=60, resolution=100)
"""
from pyFTS.partitioners import Grid