- Bugfixes and improvements on benchmarks

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-24 00:31:05 -03:00
parent 849cd74bff
commit 9af879e195
9 changed files with 495 additions and 160 deletions

View File

@ -65,27 +65,30 @@ def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
:return: :return:
""" """
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
c = 0 for c, mfts in enumerate(models):
for mfts in models: if len(models) > 1:
ax = axes[c]
else:
ax = axes
forecasts = mfts.forecast(targets) forecasts = mfts.forecast(targets)
res = residuals(targets,forecasts,mfts.order) res = residuals(targets,forecasts,mfts.order)
mu = np.mean(res) mu = np.mean(res)
sig = np.std(res) sig = np.std(res)
axes[c][0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig)) ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
axes[c][0].set_ylabel('E') ax[0].set_ylabel('E')
axes[c][0].set_xlabel('T') ax[0].set_xlabel('T')
axes[c][0].plot(res) ax[0].plot(res)
axes[c][1].set_title("Residuals Autocorrelation") ax[1].set_title("Residuals Autocorrelation")
axes[c][1].set_ylabel('ACS') ax[1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag') ax[1].set_xlabel('Lag')
axes[c][1].acorr(res) ax[1].acorr(res)
axes[c][2].set_title("Residuals Histogram") ax[2].set_title("Residuals Histogram")
axes[c][2].set_ylabel('Freq') ax[2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins') ax[2].set_xlabel('Bins')
axes[c][2].hist(res) ax[2].hist(res)
c += 1 c += 1
@ -98,25 +101,29 @@ def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
for c, mfts in enumerate(models, start=0): for c, mfts in enumerate(models, start=0):
if len(models) > 1:
ax = axes[c]
else:
ax = axes
forecasts = mfts.forecast(targets) forecasts = mfts.forecast(targets)
res = residuals(targets, forecasts, mfts.order) res = residuals(targets, forecasts, mfts.order)
mu = np.mean(res) mu = np.mean(res)
sig = np.std(res) sig = np.std(res)
if c == 0: axes[c][0].set_title("Residuals", size='large') if c == 0: ax[0].set_title("Residuals", size='large')
axes[c][0].set_ylabel(mfts.shortname, size='large') ax[0].set_ylabel(mfts.shortname, size='large')
axes[c][0].set_xlabel(' ') ax[0].set_xlabel(' ')
axes[c][0].plot(res) ax[0].plot(res)
if c == 0: axes[c][1].set_title("Residuals Autocorrelation", size='large') if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large')
axes[c][1].set_ylabel('ACS') ax[1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag') ax[1].set_xlabel('Lag')
axes[c][1].acorr(res) ax[1].acorr(res)
if c == 0: axes[c][2].set_title("Residuals Histogram", size='large') if c == 0: ax[2].set_title("Residuals Histogram", size='large')
axes[c][2].set_ylabel('Freq') ax[2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins') ax[2].set_xlabel('Bins')
axes[c][2].hist(res) ax[2].hist(res)
plt.tight_layout() plt.tight_layout()

View File

@ -9,7 +9,7 @@ import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
import numpy as np import numpy as np
@ -20,8 +20,9 @@ from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns): def extract_measure(dataframe,measure,data_columns):
if not dataframe.empty: if not dataframe.empty:
tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0] df = dataframe[(dataframe.Measure == measure)][data_columns]
ret = [k for k in tmp.values()] tmp = df.to_dict(orient="records")[0]
ret = [k for k in tmp.values() if not np.isnan(k)]
return ret return ret
else: else:
return None return None
@ -191,7 +192,7 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
ret.append(mod) ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns()) dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns())
dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False) dat.to_csv(outfile, sep=";", index=False)
def analytical_data_columns(experiments): def analytical_data_columns(experiments):
@ -199,23 +200,29 @@ def analytical_data_columns(experiments):
return data_columns return data_columns
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam): def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False,
ignore=None,replace=None):
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=tam) fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
axes[0].set_title('RMSE') axes[0].set_title('RMSE')
axes[1].set_title('SMAPE') axes[1].set_title('SMAPE')
axes[2].set_title('U Statistic') axes[2].set_title('U Statistic')
axes[3].set_title('Execution Time')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_synthetic_columns()) dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_synthetic_columns())
bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1]) bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments)) dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments) data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
rmse = [] rmse = []
smape = [] smape = []
u = [] u = []
@ -223,6 +230,9 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
labels = [] labels = []
for b in sorted(bests.keys()): for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b] best = bests[b]
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
@ -230,14 +240,36 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
smape.append(extract_measure(tmp, 'SMAPE', data_columns)) smape.append(extract_measure(tmp, 'SMAPE', data_columns))
u.append(extract_measure(tmp, 'U', data_columns)) u.append(extract_measure(tmp, 'U', data_columns))
times.append(extract_measure(tmp, 'TIME', data_columns)) times.append(extract_measure(tmp, 'TIME', data_columns))
labels.append(best["Model"] + " " + str(best["Order"]))
axes[0].boxplot(rmse, labels=labels, showmeans=True) labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]),replace))
axes[1].boxplot(smape, labels=labels, showmeans=True)
axes[2].boxplot(u, labels=labels, showmeans=True)
axes[3].boxplot(times, labels=labels, showmeans=True)
plt.show() axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
axes[0].set_title("RMSE")
axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
axes[1].set_title("SMAPE")
axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
axes[2].set_title("U Statistic")
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def check_replace_list(m, replace):
if replace is not None:
for r in replace:
if r[0] in m:
return r[1]
return m
def check_ignore_list(b, ignore):
flag = False
if ignore is not None:
for i in ignore:
if i in b:
flag = True
return flag
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95): def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95):
@ -336,10 +368,170 @@ def interval_dataframe_analytic_columns(experiments):
def interval_dataframe_synthetic_columns(): def interval_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG", columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD", "SIZE"] "COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD"]
return columns return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
sharpness = extract_measure(df, 'Sharpness', data_columns)
resolution = extract_measure(df, 'Resolution', data_columns)
coverage = extract_measure(df, 'Coverage', data_columns)
times = extract_measure(df, 'TIME', data_columns)
q05 = extract_measure(df, 'Q05', data_columns)
q25 = extract_measure(df, 'Q25', data_columns)
q75 = extract_measure(df, 'Q75', data_columns)
q95 = extract_measure(df, 'Q95', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(sharpness), 2))
mod.append(np.round(np.nanstd(sharpness), 2))
mod.append(np.round(np.nanmean(resolution), 2))
mod.append(np.round(np.nanstd(resolution), 2))
mod.append(np.round(np.nanmean(coverage), 2))
mod.append(np.round(np.nanstd(coverage), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
mod.append(np.round(np.nanmean(q05), 4))
mod.append(np.round(np.nanstd(q05), 4))
mod.append(np.round(np.nanmean(q25), 4))
mod.append(np.round(np.nanstd(q25), 4))
mod.append(np.round(np.nanmean(q75), 4))
mod.append(np.round(np.nanstd(q75), 4))
mod.append(np.round(np.nanmean(q95), 4))
mod.append(np.round(np.nanstd(q95), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
sort_ascend=[True, False, True, True],save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
axes[0].set_title('Sharpness')
axes[1].set_title('Resolution')
axes[2].set_title('Coverage')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
sharpness = []
resolution = []
coverage = []
times = []
labels = []
bounds_shp = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
sharpness.append( extract_measure(df,'Sharpness',data_columns) )
resolution.append(extract_measure(df, 'Resolution', data_columns))
coverage.append(extract_measure(df, 'Coverage', data_columns))
times.append(extract_measure(df, 'TIME', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
axes[0].set_title("Sharpness")
axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
axes[1].set_title("Resolution")
axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)
axes[2].set_title("Coverage")
axes[2].set_ylim([0, 1.1])
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
sort_ascend=[True, False, True, True], save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
axes[0].set_title(r'$\tau=0.05$')
axes[1].set_title(r'$\tau=0.25$')
axes[2].set_title(r'$\tau=0.75$')
axes[3].set_title(r'$\tau=0.95$')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
q05 = []
q25 = []
q75 = []
q95 = []
labels = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
q05.append(extract_measure(df, 'Q05', data_columns))
q25.append(extract_measure(df, 'Q25', data_columns))
q75.append(extract_measure(df, 'Q75', data_columns))
q95.append(extract_measure(df, 'Q95', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True)
axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True)
axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True)
axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True)
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic): def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic):
""" """
Save benchmark results for m-step ahead probabilistic forecasters Save benchmark results for m-step ahead probabilistic forecasters
@ -438,5 +630,90 @@ def ahead_dataframe_analytic_columns(experiments):
def ahead_dataframe_synthetic_columns(): def ahead_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD", columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"] "TIME1AVG", "TIME1STD", "TIME2AVG", "TIME2STD"]
return columns return columns
def cast_dataframe_to_synthetic_ahead(infile, outfile, experiments):
columns = ahead_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
crps1 = extract_measure(df, 'CRPS_Interval', data_columns)
crps2 = extract_measure(df, 'CRPS_Distribution', data_columns)
times1 = extract_measure(df, 'TIME_Interval', data_columns)
times2 = extract_measure(df, 'TIME_Distribution', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(np.round(np.nanmean(crps1), 2))
mod.append(np.round(np.nanstd(crps1), 2))
mod.append(np.round(np.nanmean(crps2), 2))
mod.append(np.round(np.nanstd(crps2), 2))
mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2))
mod.append(np.round(np.nanmean(times2), 4))
mod.append(np.round(np.nanstd(times2), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=ahead_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False)
def plot_dataframe_ahead(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
sort_ascend=[True, True, True, True],save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)
axes[0].set_title('CRPS Interval Ahead')
axes[1].set_title('CRPS Distribution Ahead')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=ahead_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=ahead_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
if save_best:
dat = pd.DataFrame.from_dict(bests, orient='index')
dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
crps1 = []
crps2 = []
labels = []
for b in sorted(bests.keys()):
if check_ignore_list(b, ignore):
continue
best = bests[b]
df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
crps1.append( extract_measure(df,'CRPS_Interval',data_columns) )
crps2.append(extract_measure(df, 'CRPS_Distribution', data_columns))
labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)

View File

@ -14,7 +14,7 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
from pyFTS.probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
@ -213,10 +213,35 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if models is None:
models = get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
return pool
def all_point_forecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True, def all_point_forecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None, series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
distributions=False): distributions=False, benchmark_models=None, benchmark_models_parameters=None):
""" """
Fixed data benchmark for FTS point forecasters Fixed data benchmark for FTS point forecasters
:param data_train: data used to train the models :param data_train: data used to train the models
@ -234,8 +259,7 @@ def all_point_forecasters(data_train, data_test, partitions, max_order=3, statis
:param distributions: plot distributions :param distributions: plot distributions
:return: :return:
""" """
if models is None: models = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters)
models = get_point_methods()
objs = [] objs = []
@ -247,22 +271,11 @@ def all_point_forecasters(data_train, data_test, partitions, max_order=3, statis
for count, model in enumerate(models, start=0): for count, model in enumerate(models, start=0):
#print(model) #print(model)
mfts = model("") if transformation is not None:
if not mfts.is_high_order: model.appendTransformation(transformation)
if transformation is not None: model.train(data_train, data_train_fs.sets, order=model.order)
mfts.appendTransformation(transformation) objs.append(model)
mfts.train(data_train, data_train_fs.sets) lcolors.append( colors[count % ncol] )
objs.append(mfts)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.min_order:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs.sets, order=order)
objs.append(mfts)
lcolors.append(colors[(count + order) % ncol])
if statistics: if statistics:
print_point_statistics(data_test, objs) print_point_statistics(data_test, objs)
@ -421,38 +434,55 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times) return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times)
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], def build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters):
models=None, transformation=None): pool = []
if models is None: if models is None:
models = get_interval_methods() models = get_interval_methods()
for model in models:
mfts = model("")
objs = [] if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
alphas = [0.05, 0.25]
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
for alpha in alphas:
mfts = model(str(alpha), alpha=alpha)
mfts.order = par
pool.append(mfts)
return pool
data_train_fs = Grid.GridPartitioner(data_train,partitions, transformation=transformation).sets
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
statistics=False, models=None, transformation=None,
benchmark_models=None, benchmark_models_parameters=None):
models = build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters)
data_train_fs = Grid.GridPartitioner(data_train, partitions, transformation=transformation).sets
lcolors = [] lcolors = []
objs = []
for count, model in Util.enumerate2(models, start=0, step=2): for count, model in Util.enumerate2(models, start=0, step=2):
mfts = model("") if transformation is not None:
if not mfts.is_high_order: model.appendTransformation(transformation)
if transformation is not None: model.train(data_train, data_train_fs, order=model.order)
mfts.appendTransformation(transformation) objs.append(model)
mfts.train(data_train, data_train_fs) lcolors.append( colors[count % ncol] )
objs.append(mfts)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.min_order:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)
lcolors.append(colors[count % ncol])
print_interval_statistics(data_test, objs) if statistics:
print_interval_statistics(data_test, objs)
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True) plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
points=False, intervals=True)
def print_interval_statistics(original, models): def print_interval_statistics(original, models):
@ -467,15 +497,6 @@ def print_interval_statistics(original, models):
print(ret) print(ret)
def plot_distribution(dist):
for k in dist.index:
alpha = np.array([dist[x][k] for x in dist]) * 100
x = [k for x in np.arange(0, len(alpha))]
y = dist.columns
plt.scatter(x, y, c=alpha, marker='s', linewidths=0, cmap='Oranges', norm=pltcolors.Normalize(vmin=0, vmax=1),
vmin=0, vmax=1, edgecolors=None)
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5): points=True, intervals=True, linewidth=1.5):
""" """
@ -506,11 +527,13 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
for count, fts in enumerate(models, start=0): for count, fts in enumerate(models, start=0):
if fts.has_point_forecasting and points: if fts.has_point_forecasting and points:
forecasted = fts.forecast(original) forecasted = fts.forecast(original)
if isinstance(forecasted, np.ndarray):
forecasted = forecasted.tolist()
mi.append(min(forecasted) * 0.95) mi.append(min(forecasted) * 0.95)
ma.append(max(forecasted) * 1.05) ma.append(max(forecasted) * 1.05)
for k in np.arange(0, fts.order): for k in np.arange(0, fts.order):
forecasted.insert(0, None) forecasted.insert(0, None)
lbl = fts.shortname lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Point)" if typeonlegend: lbl += " (Point)"
ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth) ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth)
@ -523,7 +546,7 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
for k in np.arange(0, fts.order): for k in np.arange(0, fts.order):
lower.insert(0, None) lower.insert(0, None)
upper.insert(0, None) upper.insert(0, None)
lbl = fts.shortname lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Interval)" if typeonlegend: lbl += " (Interval)"
if not points and intervals: if not points and intervals:
ls = "-" ls = "-"
@ -556,8 +579,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0) ax.legend(handles0, labels0)
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, synthetic=False): save=False, file=None, synthetic=False):

View File

@ -101,13 +101,6 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
:return: DataFrame with the results :return: DataFrame with the results
""" """
if benchmark_models is None and models is None:
benchmark_models = [naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None:
benchmark_models_parameters = [1, (1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies) cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -116,7 +109,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
pool = []
jobs = [] jobs = []
objs = {} objs = {}
rmse = {} rmse = {}
@ -124,28 +117,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
u = {} u = {}
times = {} times = {}
if models is None: pool = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters)
models = benchmarks.get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
experiments = 0 experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train, inc): for ct, train, test in Util.sliding_window(data, windowsize, train, inc):
@ -204,6 +176,40 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if benchmark_models is None and models is None:
benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None:
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
if models is None:
models = benchmarks.get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
return pool
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): def run_interval(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):
""" """
Interval forecast benchmark function to be executed on cluster nodes Interval forecast benchmark function to be executed on cluster nodes

View File

@ -26,9 +26,9 @@ def showAndSaveImage(fig,file,flag,lgd=None):
if flag: if flag:
plt.show() plt.show()
if lgd is not None: if lgd is not None:
fig.savefig(uniquefilename(file), additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), ) fig.savefig(file, additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), )
else: else:
fig.savefig(uniquefilename(file)) fig.savefig(file)
plt.close(fig) plt.close(fig)

View File

@ -15,7 +15,7 @@ class IntervalFTS(hofts.HighOrderFTS):
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)" self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
self.flrgs = {} self.flrgs = {}
self.has_point_forecasting = False self.has_point_forecasting = False
self.has_point_forecasting = True self.has_interval_forecasting = True
self.is_high_order = True self.is_high_order = True
def getUpper(self, flrg): def getUpper(self, flrg):

View File

@ -537,9 +537,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)]) [intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid)) grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid))
tmp = np.array([grid[k] for k in sorted(grid)]) tmp = np.array([grid[k] for k in sorted(grid) if not np.isnan(grid[k])])
try:
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
except Exception as ex:
ret.append(0)
else: else:
ret = [] ret = []

View File

@ -62,7 +62,7 @@ class ExponentialyWeightedFTS(fts.FTS):
flrgs[flr.LHS.name].append(flr.RHS) flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs) return (flrgs)
def train(self, data, sets,order=1,parameters=2): def train(self, data, sets,order=1,parameters=1.05):
self.c = parameters self.c = parameters
self.sets = sets self.sets = sets
ndata = self.doTransformations(data) ndata = self.doTransformations(data)

View File

@ -6,7 +6,7 @@ import numpy as np
import pandas as pd import pandas as pd
import matplotlib as plt import matplotlib as plt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
import pandas as pd import pandas as pd
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
@ -41,28 +41,30 @@ DATASETS
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") #taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000]) #taiex = np.array(taiexpd["avg"][:5000])
#del(taiexpd)
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000]) #nasdaq = np.array(nasdaqpd["avg"][0:5000])
#del(nasdaqpd)
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",") #sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
#sp500 = np.array(sp500pd["Avg"][11000:]) #sp500 = np.array(sp500pd["Avg"][11000:])
#del(sp500pd) #del(sp500pd)
sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";") #sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
sondapd = sondapd.dropna(axis=0, how='any') #sondapd = sondapd.dropna(axis=0, how='any')
sonda = np.array(sondapd["glo_avg"]) #sonda = np.array(sondapd["glo_avg"])
del(sondapd) #del(sondapd)
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";") bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
#best = np.array(bestpd["Anomaly"]) best = np.array(bestpd["Anomaly"])
#del(bestpd) del(bestpd)
#print(lag) #print(lag)
#print(a) #print(a)
#from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk
from pyFTS.benchmarks import Util from pyFTS.benchmarks import Util
from pyFTS.benchmarks import arima, quantreg, Measures from pyFTS.benchmarks import arima, quantreg, Measures
@ -102,7 +104,7 @@ bchmk.plot_compared_series(enrollments,[tmp], ['blue','red'], points=False, inte
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp) #kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
#print(kk) #print(kk)
#""" """
""" """
@ -120,9 +122,9 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted
dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv", dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
"""
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.WeightedFTS], # # bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
@ -131,28 +133,48 @@ bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.Weighte
"_interval_analytic.csv", "_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.WeightedFTS], # #
bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), transformation=diff, partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/sp500_analytic_diff.csv", dump=True, save=True, file="experiments/taiex_interval_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.2,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(10,200,step=10), indexer=ix,
dump=True, save=True, file="experiments/sondawind_ahead_analytic.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.2,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(3,20,step=2), transformation=diff, indexer=ix,
dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
""" """
#""" from pyFTS import pwfts
from pyFTS.common import Transformations
from pyFTS.partitioners import Grid
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.5, diff = Transformations.Differential(1)
partitioners=[Grid.GridPartitioner], fs = Grid.GridPartitioner(best, 190) #, transformation=diff)
partitions= np.arange(10,200,step=10), indexer=ix,
dump=True, save=True, file="experiments/sondasolar_ahead_analytic.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc=0.5, model = pwfts.ProbabilisticWeightedFTS("FTS 1")
partitioners=[Grid.GridPartitioner], #model.appendTransformation(diff)
partitions= np.arange(3,20,step=2), transformation=diff, indexer=ix, model.train(best[0:1600],fs.sets, order=3)
dump=True, save=True, file="experiments/sondasolar_ahead_analytic_diff.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'],
distributions=[True], save=True, file="pictures/best_ahead_forecasts",
time_from=40, time_to=60, resolution=100)
""" """
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid