- Benchmark refactoring to expose a unified and simpler interface (the method sliding_window_benchmarks)

- Improvements at FTS.fit and FTS.predict
 - PWFTS bugfixes and improvements
This commit is contained in:
Petrônio Cândido 2018-04-09 11:09:28 -03:00
parent 8ac8fec14c
commit 1d7801bdbf
9 changed files with 755 additions and 870 deletions

View File

@ -5,12 +5,14 @@ pyFTS module for common benchmark metrics
""" """
import time import time
import numba
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet,SortedCollection from pyFTS.common import FuzzySet,SortedCollection
from pyFTS.probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
def acf(data, k): def acf(data, k):
""" """
Autocorrelation function estimative Autocorrelation function estimative
@ -28,6 +30,7 @@ def acf(data, k):
return 1/((n-k)*sigma)*s return 1/((n-k)*sigma)*s
def rmse(targets, forecasts): def rmse(targets, forecasts):
""" """
Root Mean Squared Error Root Mean Squared Error
@ -42,6 +45,7 @@ def rmse(targets, forecasts):
return np.sqrt(np.nanmean((targets - forecasts) ** 2)) return np.sqrt(np.nanmean((targets - forecasts) ** 2))
def rmse_interval(targets, forecasts): def rmse_interval(targets, forecasts):
""" """
Root Mean Squared Error Root Mean Squared Error
@ -53,6 +57,7 @@ def rmse_interval(targets, forecasts):
return np.sqrt(np.nanmean((fmean - targets) ** 2)) return np.sqrt(np.nanmean((fmean - targets) ** 2))
def mape(targets, forecasts): def mape(targets, forecasts):
""" """
Mean Average Percentual Error Mean Average Percentual Error
@ -67,6 +72,7 @@ def mape(targets, forecasts):
return np.mean(np.abs(targets - forecasts) / targets) * 100 return np.mean(np.abs(targets - forecasts) / targets) * 100
def smape(targets, forecasts, type=2): def smape(targets, forecasts, type=2):
""" """
Symmetric Mean Average Percentual Error Symmetric Mean Average Percentual Error
@ -87,11 +93,13 @@ def smape(targets, forecasts, type=2):
return sum(np.abs(forecasts - targets)) / sum(forecasts + targets) return sum(np.abs(forecasts - targets)) / sum(forecasts + targets)
def mape_interval(targets, forecasts): def mape_interval(targets, forecasts):
fmean = [np.mean(i) for i in forecasts] fmean = [np.mean(i) for i in forecasts]
return np.mean(abs(fmean - targets) / fmean) * 100 return np.mean(abs(fmean - targets) / fmean) * 100
def UStatistic(targets, forecasts): def UStatistic(targets, forecasts):
""" """
Theil's U Statistic Theil's U Statistic
@ -108,11 +116,12 @@ def UStatistic(targets, forecasts):
naive = [] naive = []
y = [] y = []
for k in np.arange(0,l-1): for k in np.arange(0,l-1):
y.append((forecasts[k ] - targets[k ]) ** 2) y.append((forecasts[k ] - targets[k]) ** 2)
naive.append((targets[k + 1] - targets[k]) ** 2) naive.append((targets[k + 1] - targets[k]) ** 2)
return np.sqrt(sum(y) / sum(naive)) return np.sqrt(sum(y) / sum(naive))
def TheilsInequality(targets, forecasts): def TheilsInequality(targets, forecasts):
""" """
Theils Inequality Coefficient Theils Inequality Coefficient
@ -128,6 +137,7 @@ def TheilsInequality(targets, forecasts):
return us / (ys + fs) return us / (ys + fs)
def BoxPierceStatistic(data, h): def BoxPierceStatistic(data, h):
""" """
Q Statistic for Box-Pierce test Q Statistic for Box-Pierce test
@ -204,12 +214,15 @@ def pinball_mean(tau, targets, forecasts):
:param forecasts: list of prediction intervals :param forecasts: list of prediction intervals
:return: :return:
""" """
preds = [] try:
if tau <= 0.5: if tau <= 0.5:
preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))] preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
else: else:
preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))] preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
return np.nanmean(preds) return np.nanmean(preds)
except Exception as ex:
print(ex)
def pmf_to_cdf(density): def pmf_to_cdf(density):
@ -259,18 +272,17 @@ def crps(targets, densities):
return _crps / float(l * n) return _crps / float(l * n)
def get_point_statistics(data, model, indexer=None): def get_point_statistics(data, model, **kwargs):
"""Condensate all measures for point forecasters""" """Condensate all measures for point forecasters"""
indexer = kwargs.get('indexer', None)
if indexer is not None: if indexer is not None:
ndata = np.array(indexer.get_data(data)) ndata = np.array(indexer.get_data(data))
else: else:
ndata = np.array(data[model.order:]) ndata = np.array(data[model.order:])
if model.is_multivariate or indexer is None: forecasts = model.predict(data, **kwargs)
forecasts = model.forecast(data)
elif not model.is_multivariate and indexer is not None:
forecasts = model.forecast(indexer.get_data(data))
try: try:
if model.has_seasonality: if model.has_seasonality:
@ -281,29 +293,18 @@ def get_point_statistics(data, model, indexer=None):
print(ex) print(ex)
return [np.nan,np.nan,np.nan] return [np.nan,np.nan,np.nan]
ret = list() ret = list()
try:
ret.append(np.round(rmse(ndata, nforecasts), 2)) ret.append(np.round(rmse(ndata, nforecasts), 2))
except Exception as ex: ret.append(np.round(smape(ndata, nforecasts), 2))
print('Error in RMSE: {}'.format(ex)) ret.append(np.round(UStatistic(ndata, nforecasts), 2))
ret.append(np.nan)
try:
ret.append(np.round(smape(ndata, nforecasts), 2))
except Exception as ex:
print('Error in SMAPE: {}'.format(ex))
ret.append(np.nan)
try:
ret.append(np.round(UStatistic(ndata, nforecasts), 2))
except Exception as ex:
print('Error in U: {}'.format(ex))
ret.append(np.nan)
return ret return ret
def get_interval_statistics(original, model): def get_interval_statistics(original, model, **kwargs):
"""Condensate all measures for point_to_interval forecasters""" """Condensate all measures for point_to_interval forecasters"""
ret = list() ret = list()
forecasts = model.forecast_interval(original) forecasts = model.predict(original, **kwargs)
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
@ -314,27 +315,13 @@ def get_interval_statistics(original, model):
return ret return ret
def get_distribution_statistics(original, model, steps, resolution): def get_distribution_statistics(original, model, **kwargs):
ret = list() ret = list()
try: _s1 = time.time()
_s1 = time.time() densities1 = model.predict(original, **kwargs)
densities1 = model.forecast_ahead_distribution(original, steps, parameters=3) _e1 = time.time()
_e1 = time.time() ret.append(round(crps(original, densities1), 3))
ret.append(round(crps(original, densities1), 3)) ret.append(round(_e1 - _s1, 3))
ret.append(round(_e1 - _s1, 3))
except Exception as e:
print('Erro: ', e)
ret.append(np.nan)
ret.append(np.nan)
try:
_s2 = time.time()
densities2 = model.forecast_ahead_distribution(original, steps, parameters=2)
_e2 = time.time()
ret.append( round(crps(original, densities2), 3))
ret.append(round(_e2 - _s2, 3))
except:
ret.append(np.nan)
ret.append(np.nan)
return ret return ret

View File

@ -2,6 +2,7 @@
Benchmark utility functions Benchmark utility functions
""" """
import numba
import matplotlib as plt import matplotlib as plt
import matplotlib.cm as cmx import matplotlib.cm as cmx
import matplotlib.colors as pltcolors import matplotlib.colors as pltcolors
@ -11,8 +12,6 @@ import pandas as pd
#from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
from copy import deepcopy from copy import deepcopy
from pyFTS.common import Util from pyFTS.common import Util
@ -124,7 +123,7 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape,
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
print([n, o, s, p, l])
tmp = [n, o, s, p, l, 'RMSE'] tmp = [n, o, s, p, l, 'RMSE']
tmp.extend(rmse[k]) tmp.extend(rmse[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
@ -194,23 +193,31 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments):
dat.to_csv(outfile, sep=";", index=False) dat.to_csv(outfile, sep=";", index=False)
def analytical_data_columns(experiments): def analytical_data_columns(experiments):
data_columns = [str(k) for k in np.arange(0, experiments)] data_columns = [str(k) for k in np.arange(0, experiments)]
return data_columns return data_columns
def scale_params(data): def scale_params(data):
vmin = np.nanmin(data) vmin = np.nanmin(data)
vlen = np.nanmax(data) - vmin vlen = np.nanmax(data) - vmin
return (vmin, vlen) return (vmin, vlen)
def scale(data, params): def scale(data, params):
ndata = [(k-params[0])/params[1] for k in data] ndata = [(k-params[0])/params[1] for k in data]
return ndata return ndata
def stats(measure, data): def stats(measure, data):
print(measure, np.nanmean(data), np.nanstd(data)) print(measure, np.nanmean(data), np.nanstd(data))
def unified_scaled_point(experiments, tam, save=False, file=None, def unified_scaled_point(experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -320,6 +327,7 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None, def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
sort_ascend=[1, 1, 1, 1],save_best=False, sort_ascend=[1, 1, 1, 1],save_best=False,
@ -375,6 +383,7 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=F
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def check_replace_list(m, replace): def check_replace_list(m, replace):
if replace is not None: if replace is not None:
for r in replace: for r in replace:
@ -383,6 +392,7 @@ def check_replace_list(m, replace):
return m return m
def check_ignore_list(b, ignore): def check_ignore_list(b, ignore):
flag = False flag = False
if ignore is not None: if ignore is not None:
@ -475,6 +485,7 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
if save: dat.to_csv(Util.uniquefilename(file), sep=";") if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat return dat
def interval_dataframe_analytic_columns(experiments): def interval_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)] columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model") columns.insert(0, "Model")
@ -486,12 +497,14 @@ def interval_dataframe_analytic_columns(experiments):
return columns return columns
def interval_dataframe_synthetic_columns(): def interval_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG", columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD"] "COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD"]
return columns return columns
def cast_dataframe_to_synthetic_interval(infile, outfile, experiments): def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
columns = interval_dataframe_analytic_columns(experiments) columns = interval_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns) dat = pd.read_csv(infile, sep=";", usecols=columns)
@ -545,6 +558,7 @@ def cast_dataframe_to_synthetic_interval(infile, outfile, experiments):
dat.to_csv(outfile, sep=";", index=False) dat.to_csv(outfile, sep=";", index=False)
def unified_scaled_interval(experiments, tam, save=False, file=None, def unified_scaled_interval(experiments, tam, save=False, file=None,
sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
sort_ascend=[True, False, True, True],save_best=False, sort_ascend=[True, False, True, True],save_best=False,
@ -643,6 +657,7 @@ def unified_scaled_interval(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, save=False, file=None, def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
sort_ascend=[True, False, True, True],save_best=False, sort_ascend=[True, False, True, True],save_best=False,
@ -698,6 +713,7 @@ def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, sav
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def unified_scaled_interval_pinball(experiments, tam, save=False, file=None, def unified_scaled_interval_pinball(experiments, tam, save=False, file=None,
sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'], sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
sort_ascend=[True, False, True, True], save_best=False, sort_ascend=[True, False, True, True], save_best=False,
@ -795,6 +811,8 @@ def unified_scaled_interval_pinball(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments, tam, save=False, file=None, def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'], sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
sort_ascend=[True, False, True, True], save_best=False, sort_ascend=[True, False, True, True], save_best=False,
@ -846,7 +864,7 @@ def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic): def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, synthetic):
""" """
Save benchmark results for m-step ahead probabilistic forecasters Save benchmark results for m-step ahead probabilistic forecasters
:param experiments: :param experiments:
@ -854,7 +872,7 @@ def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, tim
:param objs: :param objs:
:param crps_interval: :param crps_interval:
:param crps_distr: :param crps_distr:
:param times1: :param times:
:param times2: :param times2:
:param save: :param save:
:param synthetic: :param synthetic:
@ -881,13 +899,11 @@ def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, tim
mod.append('-') mod.append('-')
mod.append('-') mod.append('-')
l = '-' l = '-'
mod.append(np.round(np.nanmean(crps_interval[k]), 2)) mod.append(np.round(np.nanmean(crps[k]), 2))
mod.append(np.round(np.nanstd(crps_interval[k]), 2)) mod.append(np.round(np.nanstd(crps[k]), 2))
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(l) mod.append(l)
mod.append(np.round(np.nanmean(times1[k]), 4)) mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4)) mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod) ret.append(mod)
except Exception as e: except Exception as e:
print('Erro: %s' % e) print('Erro: %s' % e)
@ -895,7 +911,7 @@ def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, tim
print("Erro ao salvar ", k) print("Erro ao salvar ", k)
print("Exceção ", ex) print("Exceção ", ex)
columns = ahead_dataframe_synthetic_columns() columns = probabilistic_dataframe_synthetic_columns()
else: else:
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
try: try:
@ -910,28 +926,23 @@ def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, tim
s = '-' s = '-'
p = '-' p = '-'
l = '-' l = '-'
tmp = [n, o, s, p, l, 'CRPS_Interval'] tmp = [n, o, s, p, l, 'CRPS']
tmp.extend(crps_interval[k]) tmp.extend(crps[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'CRPS_Distribution'] tmp = [n, o, s, p, l, 'TIME']
tmp.extend(crps_distr[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Interval']
tmp.extend(times1[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Distribution']
tmp.extend(times2[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
print("Erro ao salvar ", k) print("Erro ao salvar ", k)
print("Exceção ", ex) print("Exceção ", ex)
columns = ahead_dataframe_analytic_columns(experiments) columns = probabilistic_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns) dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";") if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat return dat
def ahead_dataframe_analytic_columns(experiments):
def probabilistic_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)] columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model") columns.insert(0, "Model")
columns.insert(1, "Order") columns.insert(1, "Order")
@ -942,14 +953,14 @@ def ahead_dataframe_analytic_columns(experiments):
return columns return columns
def ahead_dataframe_synthetic_columns(): def probabilistic_dataframe_synthetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD", columns = ["Model", "Order", "Scheme", "Partitions", "CRPSAVG", "CRPSSTD",
"TIME1AVG", "TIME1STD", "TIME2AVG", "TIME2STD"] "TIMEAVG", "TIMESTD"]
return columns return columns
def cast_dataframe_to_synthetic_ahead(infile, outfile, experiments): def cast_dataframe_to_synthetic_probabilistic(infile, outfile, experiments):
columns = ahead_dataframe_analytic_columns(experiments) columns = probabilistic_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns) dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique() models = dat.Model.unique()
orders = dat.Order.unique() orders = dat.Order.unique()
@ -967,36 +978,31 @@ def cast_dataframe_to_synthetic_ahead(infile, outfile, experiments):
mod = [] mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty: if not df.empty:
crps1 = extract_measure(df, 'CRPS_Interval', data_columns) crps1 = extract_measure(df, 'CRPS', data_columns)
crps2 = extract_measure(df, 'CRPS_Distribution', data_columns) times1 = extract_measure(df, 'TIME', data_columns)
times1 = extract_measure(df, 'TIME_Interval', data_columns)
times2 = extract_measure(df, 'TIME_Distribution', data_columns)
mod.append(m) mod.append(m)
mod.append(o) mod.append(o)
mod.append(s) mod.append(s)
mod.append(p) mod.append(p)
mod.append(np.round(np.nanmean(crps1), 2)) mod.append(np.round(np.nanmean(crps1), 2))
mod.append(np.round(np.nanstd(crps1), 2)) mod.append(np.round(np.nanstd(crps1), 2))
mod.append(np.round(np.nanmean(crps2), 2))
mod.append(np.round(np.nanstd(crps2), 2))
mod.append(np.round(np.nanmean(times1), 2)) mod.append(np.round(np.nanmean(times1), 2))
mod.append(np.round(np.nanstd(times1), 2)) mod.append(np.round(np.nanstd(times1), 2))
mod.append(np.round(np.nanmean(times2), 4))
mod.append(np.round(np.nanstd(times2), 4))
ret.append(mod) ret.append(mod)
dat = pd.DataFrame(ret, columns=ahead_dataframe_synthetic_columns()) dat = pd.DataFrame(ret, columns=probabilistic_dataframe_synthetic_columns())
dat.to_csv(outfile, sep=";", index=False) dat.to_csv(outfile, sep=";", index=False)
def unified_scaled_ahead(experiments, tam, save=False, file=None,
sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
sort_ascend=[True, True, True, True], save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)
axes[0].set_title('CRPS Interval Ahead') def unified_scaled_probabilistic(experiments, tam, save=False, file=None,
axes[1].set_title('CRPS Distribution Ahead') sort_columns=['CRPSAVG', 'CRPSSTD'],
sort_ascend=[True, True], save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)
axes.set_title('CRPS')
#axes[1].set_title('CRPS Distribution Ahead')
models = {} models = {}
@ -1006,11 +1012,11 @@ def unified_scaled_ahead(experiments, tam, save=False, file=None,
mdl = {} mdl = {}
dat_syn = pd.read_csv(experiment[0], sep=";", usecols=ahead_dataframe_synthetic_columns()) dat_syn = pd.read_csv(experiment[0], sep=";", usecols=probabilistic_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend) bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(experiment[1], sep=";", usecols=ahead_dataframe_analytic_columns(experiment[2])) dat_ana = pd.read_csv(experiment[1], sep=";", usecols=probabilistic_dataframe_analytic_columns(experiment[2]))
crps1 = [] crps1 = []
crps2 = [] crps2 = []
@ -1070,21 +1076,22 @@ def unified_scaled_ahead(experiments, tam, save=False, file=None,
Util.show_and_save_image(fig, file, save) Util.show_and_save_image(fig, file, save)
def plot_dataframe_ahead(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'], def plot_dataframe_probabilistic(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
sort_ascend=[True, True, True, True],save_best=False, sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
ignore=None, replace=None): sort_ascend=[True, True, True, True], save_best=False,
ignore=None, replace=None):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam) fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)
axes[0].set_title('CRPS Interval Ahead') axes[0].set_title('CRPS')
axes[1].set_title('CRPS Distribution Ahead') axes[1].set_title('CRPS')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=ahead_dataframe_synthetic_columns()) dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=probabilistic_dataframe_synthetic_columns())
bests = find_best(dat_syn, sort_columns, sort_ascend) bests = find_best(dat_syn, sort_columns, sort_ascend)
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=ahead_dataframe_analytic_columns(experiments)) dat_ana = pd.read_csv(file_analytic, sep=";", usecols=probabilistic_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments) data_columns = analytical_data_columns(experiments)

View File

@ -6,6 +6,7 @@
import datetime import datetime
import time import time
import numba
from copy import deepcopy from copy import deepcopy
import matplotlib as plt import matplotlib as plt
@ -20,15 +21,15 @@ from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ift
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
from pyFTS.benchmarks import Util as bUtil from pyFTS.benchmarks import Util as bUtil
from pyFTS.common import Util from pyFTS.common import Util as cUtil
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from matplotlib import rc from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) #rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use: ## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']}) #rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True) #rc('text', usetex=True)
colors = ['grey', 'darkgrey', 'rosybrown', 'maroon', 'red','orange', 'gold', 'yellow', 'olive', 'green', colors = ['grey', 'darkgrey', 'rosybrown', 'maroon', 'red','orange', 'gold', 'yellow', 'olive', 'green',
'darkgreen', 'cyan', 'lightblue','blue', 'darkblue', 'purple', 'darkviolet' ] 'darkgreen', 'cyan', 'lightblue','blue', 'darkblue', 'purple', 'darkviolet' ]
@ -40,6 +41,193 @@ styles = ['-','--','-.',':','.']
nsty = len(styles) nsty = len(styles)
def __pop(key, default, kwargs):
if key in kwargs:
return kwargs.pop(key)
else:
return default
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
"""
Sliding window benchmarks for FTS point forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param benchmark_methods: Non FTS models to benchmark
:param benchmark_methods_parameters: Non FTS models parameters
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
"""
distributed = __pop('distributed', False, kwargs)
save = __pop('save', False, kwargs)
transformation = kwargs.get('transformation', None)
progress = kwargs.get('progress', None)
type = kwargs.get("type", 'point')
orders = __pop("orders", [1,2,3], kwargs)
partitioners_models = __pop("partitioners_models", None, kwargs)
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
partitions = __pop("partitions", [10], kwargs)
methods = __pop('methods', None, kwargs)
models = __pop('models', None, kwargs)
pool = [] if models is None else models
if models is None and methods is None:
if type == 'point':
methods = get_point_methods()
elif type == 'interval':
methods = get_interval_methods()
elif type == 'distribution':
methods = get_probabilistic_methods()
if models is None:
for method in methods:
mfts = method("")
if mfts.is_high_order:
for order in orders:
if order >= mfts.min_order:
mfts = method("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
benchmark_models = __pop("benchmark_models", None, kwargs)
benchmark_methods = __pop("benchmark_methods", None, kwargs)
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
if benchmark_models != False:
if benchmark_models is None and benchmark_methods is None:
if type == 'point'or type == 'partition':
benchmark_methods = get_benchmark_point_methods()
elif type == 'interval':
benchmark_methods = get_benchmark_interval_methods()
elif type == 'distribution':
benchmark_methods = get_benchmark_probabilistic_methods()
if benchmark_models is not None:
pool.extend(benchmark_models)
elif benchmark_methods is not None:
for count, model in enumerate(benchmark_methods, start=0):
par = benchmark_methods_parameters[count]
mfts = model(str(par if par is not None else ""))
mfts.order = par
pool.append(mfts)
if type == 'point':
experiment_method = run_point
synthesis_method = process_point_jobs
elif type == 'interval':
experiment_method = run_interval
synthesis_method = process_interval_jobs
elif type == 'distribution':
experiment_method = run_probabilistic
synthesis_method = process_probabilistic_jobs
if distributed:
import dispy, dispy.httpd
nodes = kwargs.get("nodes", ['127.0.0.1'])
cluster, http_server = cUtil.start_dispy_cluster(experiment_method, nodes)
experiments = 0
jobs = []
if progress:
from tqdm import tqdm
progressbar = tqdm(total=len(data), desc="Sliding Window:")
inc = __pop("inc", 0.1, kwargs)
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
experiments += 1
if progress:
progressbar.update(windowsize * inc)
partitioners_pool = []
if partitioners_models is None:
for partition in partitions:
for partitioner in partitioners_methods:
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
partitioners_pool.append(data_train_fs)
else:
partitioners_pool = partitioners_models
rng1 = partitioners_pool
if progress:
rng1 = tqdm(partitioners_pool, desc="Partitioners")
for partitioner in rng1:
rng2 = enumerate(pool,start=0)
if progress:
rng2 = enumerate(tqdm(pool, desc="Models"),start=0)
for _id, model in rng2:
if not distributed:
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
jobs.append(job)
else:
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
job.id = id # associate an ID to identify jobs (if needed later)
jobs.append(job)
if progress:
progressbar.close()
if distributed:
jobs2 = []
rng = jobs
cluster.wait() # wait for all jobs to finish
if progress:
rng = tqdm(jobs)
for job in rng:
if job.status == dispy.DispyJob.Finished and job is not None:
tmp = job()
jobs2.append(tmp)
jobs = deepcopy(jobs2)
cUtil.stop_dispy_cluster(cluster, http_server)
file = kwargs.get('file', None)
sintetic = kwargs.get('sintetic', False)
return synthesis_method(jobs, experiments, save, file, sintetic)
def get_benchmark_point_methods(): def get_benchmark_point_methods():
"""Return all non FTS methods for point forecasting""" """Return all non FTS methods for point forecasting"""
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression] return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
@ -64,12 +252,17 @@ def get_interval_methods():
def get_probabilistic_methods(): def get_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting""" """Return all FTS methods for probabilistic forecasting"""
return [arima.ARIMA, ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS] return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): def get_benchmark_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [arima.ARIMA, quantreg.QuantileRegression]
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
""" """
Point forecast benchmark function to be executed on sliding window Point forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model :param mfts: FTS model
:param partitioner: Universe of Discourse partitioner :param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model :param train_data: data used to train the model
@ -79,6 +272,26 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
:param indexer: seasonal indexer :param indexer: seasonal indexer
:return: a dictionary with the benchmark results :return: a dictionary with the benchmark results
""" """
import time
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, quantreg
from pyFTS.common import Transformations
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
tmp3 = [Measures.get_point_statistics]
tmp5 = [Transformations.Differential]
transformation = kwargs.get('transformation', None)
indexer = kwargs.get('indexer', None)
if mfts.benchmark_only: if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
@ -86,16 +299,17 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
if transformation is not None:
mfts.append_transformation(transformation) if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time() _start = time.time()
mfts.train(train_data, sets=partitioner.sets, order=mfts.order) mfts.fit(train_data, order=mfts.order, **kwargs)
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
_start = time.time() _start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, indexer) _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time() _end = time.time()
times += _end - _start times += _end - _start
@ -104,113 +318,120 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
return ret return ret
def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner], def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
benchmark_models=None, benchmark_models_parameters = None,
save=False, file=None, sintetic=False):
""" """
Sliding window benchmarks for FTS point forecasters Interval forecast benchmark function to be executed on cluster nodes
:param data: :param mfts: FTS model
:param windowsize: size of sliding window :param partitioner: Universe of Discourse partitioner
:param train: percentual of sliding window data used to train the models :param train_data: data used to train the model
:param models: FTS point forecasters :param test_data: ata used to test the model
:param partitioners: Universe of Discourse partitioner :param window_key: id of the sliding window
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation :param transformation: data transformation
:param indexer: seasonal indexer :param indexer: seasonal indexer
:param dump: :return: a dictionary with the benchmark results
:param benchmark_models: Non FTS models to benchmark
:param benchmark_models_parameters: Non FTS models parameters
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
""" """
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg
if benchmark_models is None: # and models is None: tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
benchmark_models = [naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None: tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
benchmark_models_parameters = [1, (1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
_process_start = time.time() tmp4 = [arima.ARIMA, quantreg.QuantileRegression]
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) tmp3 = [Measures.get_interval_statistics]
pool = [] transformation = kwargs.get('transformation', None)
jobs = [] indexer = kwargs.get('indexer', None)
objs = {}
rmse = {}
smape = {}
u = {}
times = {}
if models is None: if mfts.benchmark_only:
models = get_point_methods() _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else:
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
for model in models: if transformation is not None:
mfts = model("") mfts.append_transformation(transformation)
if mfts.is_high_order: _start = time.time()
for order in np.arange(1, max_order + 1): mfts.fit(train_data, order=mfts.order, **kwargs)
if order >= mfts.min_order: _end = time.time()
mfts = model("") times = _end - _start
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None: _start = time.time()
for count, model in enumerate(benchmark_models, start=0): _sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts, **kwargs)
par = benchmark_models_parameters[count] _end = time.time()
mfts = model(str(par if par is not None else "")) times += _end - _start
mfts.order = par
pool.append(mfts)
experiments = 0 ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
for ct, train, test in Util.sliding_window(data, windowsize, train): 'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key}
experiments += 1
benchmarks_only = {} return ret
if dump: print('\nWindow: {0}\n'.format(ct))
for partition in partitions: def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
"""
Probabilistic forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param steps:
:param resolution:
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
import time
import numpy as np
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.ensemble import ensemble
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima
from pyFTS.models.seasonal import SeasonalIndexer
for partitioner in partitioners: tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS]
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation) tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
for _id, m in enumerate(pool,start=0): tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
if m.benchmark_only and m.shortname in benchmarks_only:
continue
else:
benchmarks_only[m.shortname] = m
tmp = run_point(deepcopy(m), data_train_fs, train, test, ct, transformation) transformation = kwargs.get('transformation', None)
indexer = kwargs.get('indexer', None)
if tmp['key'] not in objs: if mfts.benchmark_only:
objs[tmp['key']] = tmp['obj'] _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
rmse[tmp['key']] = [] else:
smape[tmp['key']] = [] pttr = str(partitioner.__module__).split('.')[-1]
u[tmp['key']] = [] _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
times[tmp['key']] = [] mfts.partitioner = partitioner
rmse[tmp['key']].append_rhs(tmp['rmse'])
smape[tmp['key']].append_rhs(tmp['smape'])
u[tmp['key']].append_rhs(tmp['u'])
times[tmp['key']].append_rhs(tmp['time'])
print(tmp['key'], tmp['window'])
_process_end = time.time() if transformation is not None:
mfts.append_transformation(transformation)
print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now())) if mfts.has_seasonality:
mfts.indexer = indexer
print("Process Duration: {0}".format(_process_end - _process_start)) try:
_start = time.time()
mfts.fit(train_data, order=mfts.order)
_end = time.time()
times = _end - _start
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) _crps1, _t1 = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
_t1 += times
except Exception as e:
print(e)
_crps1 = np.nan
_t1 = np.nan
ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'window': window_key}
return ret
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters): def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
@ -239,71 +460,84 @@ def build_model_pool_point(models, max_order, benchmark_models, benchmark_models
return pool return pool
def all_point_forecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True, def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False):
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None, objs = {}
distributions=False, benchmark_models=None, benchmark_models_parameters=None): rmse = {}
""" smape = {}
Fixed data benchmark for FTS point forecasters u = {}
:param data_train: data used to train the models times = {}
:param data_test: data used to test the models
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param statistics: print statistics
:param residuals: print and plot residuals
:param series: plot time series
:param save: save results
:param file: file path to save the results
:param tam: figure dimensions to plot the graphs
:param models: list of models to benchmark
:param transformation: data transformation
:param distributions: plot distributions
:return:
"""
models = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters)
objs = [] for job in jobs:
_key = job['key']
if _key not in objs:
objs[_key] = job['obj']
rmse[_key] = []
smape[_key] = []
u[_key] = []
times[_key] = []
rmse[_key].append(job['rmse'])
smape[_key].append(job['smape'])
u[_key].append(job['u'])
times[_key].append(job['time'])
data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
count = 1
lcolors = [] def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False):
objs = {}
sharpness = {}
resolution = {}
coverage = {}
q05 = {}
q25 = {}
q75 = {}
q95 = {}
times = {}
for count, model in enumerate(models, start=0): for job in jobs:
#print(model) _key = job['key']
if transformation is not None: if _key not in objs:
model.append_transformation(transformation) objs[_key] = job['obj']
model.train(data_train, sets=data_train_fs.sets, order=model.order) sharpness[_key] = []
objs.append(model) resolution[_key] = []
lcolors.append( colors[count % ncol] ) coverage[_key] = []
times[_key] = []
q05[_key] = []
q25[_key] = []
q75[_key] = []
q95[_key] = []
if statistics: sharpness[_key].append(job['sharpness'])
print_point_statistics(data_test, objs) resolution[_key].append(job['resolution'])
coverage[_key].append(job['coverage'])
times[_key].append(job['time'])
q05[_key].append(job['Q05'])
q25[_key].append(job['Q25'])
q75[_key].append(job['Q75'])
q95[_key].append(job['Q95'])
if residuals:
print(ResidualAnalysis.compare_residuals(data_test, objs))
ResidualAnalysis.plot_residuals(data_test, objs, save=save, file=file, tam=tam)
if series: return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, times, q05, q25, q75, q95)
intervals=False)
if distributions:
lcolors.insert(0,'black')
pmfs = []
pmfs.append(
ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_test), max(data_test)], data=data_test) )
for m in objs: def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
forecasts = m.forecast(data_test) objs = {}
pmfs.append( crps = {}
ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_test), max(data_test)], times = {}
data=forecasts))
print(getProbabilityDistributionStatistics(pmfs,data_test))
plot_probability_distributions(pmfs, lcolors, tam=tam) for job in jobs:
_key = job['key']
if _key not in objs:
objs[_key] = job['obj']
crps[_key] = []
times[_key] = []
crps[_key].append(job['CRPS'])
times[_key].append(job['time'])
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic)
return models
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
@ -330,163 +564,6 @@ def print_point_statistics(data, models, externalmodels = None, externalforecast
def getProbabilityDistributionStatistics(pmfs, data):
ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n"
for k in pmfs:
ret += k.name + " & "
ret += str(k.entropy()) + " & "
ret += str(k.empiricalloglikelihood())+ " & "
ret += str(k.pseudologlikelihood(data))
ret += " \\\\ \n"
return ret
def interval_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, synthetic=True):
if models is None:
models = get_interval_methods()
objs = {}
lcolors = {}
sharpness = {}
resolution = {}
coverage = {}
times = {}
experiments = 0
for ct, training,test in Util.sliding_window(data, windowsize, train):
experiments += 1
for partition in partitions:
for partitioner in partitioners:
pttr = str(partitioner.__module__).split('.')[-1]
data_train_fs = partitioner(data=training, npart=partition, transformation=transformation)
for count, model in enumerate(models, start=0):
mfts = model("")
_key = mfts.shortname + " " + pttr+ " q = " +str(partition)
mfts.partitioner = data_train_fs
if not mfts.is_high_order:
if dump: print(ct,_key)
if _key not in objs:
objs[_key] = mfts
lcolors[_key] = colors[count % ncol]
sharpness[_key] = []
resolution[_key] = []
coverage[_key] = []
times[_key] = []
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(training, sets=data_train_fs.sets)
_end = time.time()
_tdiff = _end - _start
_start = time.time()
_sharp, _res, _cov = Measures.get_interval_statistics(test, mfts)
_end = time.time()
_tdiff += _end - _start
sharpness[_key].append_rhs(_sharp)
resolution[_key].append_rhs(_res)
coverage[_key].append_rhs(_cov)
times[_key].append_rhs(_tdiff)
else:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
_key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition)
mfts.partitioner = data_train_fs
if dump: print(ct,_key)
if _key not in objs:
objs[_key] = mfts
lcolors[_key] = colors[count % ncol]
sharpness[_key] = []
resolution[_key] = []
coverage[_key] = []
times[_key] = []
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(training, sets=data_train_fs.sets, order=order)
_end = time.time()
_tdiff = _end - _start
_start = time.time()
_sharp, _res, _cov = Measures.get_interval_statistics(test, mfts)
_end = time.time()
_tdiff += _end - _start
sharpness[_key].append_rhs(_sharp)
resolution[_key].append_rhs(_res)
coverage[_key].append_rhs(_cov)
times[_key].append_rhs(_tdiff)
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times)
def build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters):
pool = []
if models is None:
models = get_interval_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
alphas = [0.05, 0.25]
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
par = benchmark_models_parameters[count]
for alpha in alphas:
mfts = model(str(alpha), alpha=alpha)
mfts.order = par
pool.append(mfts)
return pool
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
statistics=False, models=None, transformation=None,
benchmark_models=None, benchmark_models_parameters=None):
models = build_model_pool_interval(models, max_order, benchmark_models, benchmark_models_parameters)
data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation).sets
lcolors = []
objs = []
for count, model in Util.enumerate2(models, start=0, step=2):
if transformation is not None:
model.append_transformation(transformation)
model.train(data_train, sets=data_train_fs, order=model.order)
objs.append(model)
lcolors.append( colors[count % ncol] )
if statistics:
print_interval_statistics(data_test, objs)
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
points=False, intervals=True)
def print_interval_statistics(original, models): def print_interval_statistics(original, models):
ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n" ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
for fts in models: for fts in models:
@ -503,6 +580,7 @@ def print_interval_statistics(original, models):
print(ret) print(ret)
def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1): def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
lower = [kk[0] for kk in intervals] lower = [kk[0] for kk in intervals]
upper = [kk[1] for kk in intervals] upper = [kk[1] for kk in intervals]
@ -517,6 +595,7 @@ def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False
return [mi, ma] return [mi, ma]
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5): points=True, intervals=True, linewidth=1.5):
""" """
@ -596,6 +675,7 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0) ax.legend(handles0, labels0)
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, synthetic=False): save=False, file=None, synthetic=False):
@ -610,7 +690,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
times2 = {} times2 = {}
experiments = 0 experiments = 0
for ct, train,test in Util.sliding_window(data, windowsize, train): for ct, train,test in cUtil.sliding_window(data, windowsize, train):
experiments += 1 experiments += 1
for partition in partitions: for partition in partitions:
for partitioner in partitioners: for partitioner in partitioners:
@ -692,6 +772,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic) return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic)
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None, option=2): models=None, transformation=None, option=2):
if models is None: if models is None:
@ -704,7 +785,7 @@ def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resol
data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation).sets data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation).sets
lcolors = [] lcolors = []
for count, model in Util.enumerate2(models, start=0, step=2): for count, model in cUtil.enumerate2(models, start=0, step=2):
mfts = model("") mfts = model("")
if not mfts.is_high_order: if not mfts.is_high_order:
if transformation is not None: if transformation is not None:
@ -746,6 +827,7 @@ def print_distribution_statistics(original, models, steps, resolution):
print(ret) print(ret)
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True, def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
save=False, file=None, tam=[20, 5], resolution=None, save=False, file=None, tam=[20, 5], resolution=None,
cmap='Blues', linewidth=1.5): cmap='Blues', linewidth=1.5):
@ -820,7 +902,8 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
ax.set_xlabel('T') ax.set_xlabel('T')
ax.set_xlim([0, len(original)]) ax.set_xlim([0, len(original)])
Util.show_and_save_image(fig, file, save, lgd=lgd) cUtil.show_and_save_image(fig, file, save, lgd=lgd)
def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to): def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to):
@ -845,6 +928,7 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to
from pyFTS.common import Transformations from pyFTS.common import Transformations
def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from): def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from):
from matplotlib.patches import Rectangle from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection from matplotlib.collections import PatchCollection
@ -1102,86 +1186,11 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N
# plt.tight_layout() # plt.tight_layout()
Util.show_and_save_image(fig, file, save) cUtil.show_and_save_image(fig, file, save)
return ret return ret
def sliding_window_simple_search(data, windowsize, model, partitions, orders, save=False, file=None, tam=[10, 15],
plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None):
_3d = len(orders) > 1
ret = []
errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))])
forecasted_best = []
fig = plt.figure(figsize=tam)
# fig.suptitle("Comparação de modelos ")
if plotforecasts:
ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height
ax0.set_xlim([0, len(data)])
ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
ax0.set_title('Forecasts')
ax0.set_ylabel('F(T)')
ax0.set_xlabel('T')
min_rmse = 1000000.0
best = None
for pc, p in enumerate(partitions, start=0):
sets = Grid.GridPartitioner(data=data, npart=p).sets
for oc, o in enumerate(orders, start=0):
_error = []
for ct, train, test in Util.sliding_window(data, windowsize, 0.8):
fts = model("q = " + str(p) + " n = " + str(o))
fts.train(data, sets=sets, order=o, parameters=parameters)
if not intervals:
forecasted = fts.forecast(test)
if not fts.has_seasonality:
_error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) )
else:
_error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted)) )
for kk in range(o):
forecasted.insert(0, None)
if plotforecasts: ax0.plot(forecasted, label=fts.name)
else:
forecasted = fts.forecast_interval(test)
_error.append( 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) )
error = np.nanmean(_error)
errors[oc, pc] = error
if error < min_rmse:
min_rmse = error
best = fts
forecasted_best = forecasted
# print(min_rmse)
if plotforecasts:
# handles0, labels0 = ax0.get_legend_handles_labels()
# ax0.legend(handles0, labels0)
ax0.plot(test, label="Original", linewidth=3.0, color="black")
if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
# ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
if _3d:
ax1.set_title('Error Surface')
ax1.set_ylabel('Model order')
ax1.set_xlabel('Number of partitions')
ax1.set_zlabel('RMSE')
X, Y = np.meshgrid(partitions, orders)
surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True)
else:
ax1 = fig.add_axes([0, 1, 0.9, 0.9])
ax1.set_title('Error Curve')
ax1.set_ylabel('Number of partitions')
ax1.set_xlabel('RMSE')
ax0.plot(errors,partitions)
ret.append(best)
ret.append(forecasted_best)
# plt.tight_layout()
Util.show_and_save_image(fig, file, save)
return ret
def pftsExploreOrderAndPartitions(data,save=False, file=None): def pftsExploreOrderAndPartitions(data,save=False, file=None):
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8]) fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
@ -1242,5 +1251,5 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
plt.tight_layout() plt.tight_layout()
Util.show_and_save_image(fig, file, save) cUtil.show_and_save_image(fig, file, save)

View File

@ -8,6 +8,7 @@ python3 /usr/local/bin/dispynode.py -i [local IP] -d
import datetime import datetime
import time import time
import numba
import dispy import dispy
import dispy.httpd import dispy.httpd
@ -18,61 +19,7 @@ from pyFTS.common import Util
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): @numba.jit()
"""
Point forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
import time
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, quantreg
from pyFTS.common import Transformations
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
tmp3 = [Measures.get_point_statistics]
tmp5 = [Transformations.Differential]
if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
else:
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(train_data, partitioner.sets, order=mfts.order)
_end = time.time()
times = _end - _start
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, indexer)
_end = time.time()
times += _end - _start
ret = {'key': _key, 'obj': mfts, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'time': times, 'window': window_key}
return ret
def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner], def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
benchmark_models=None, benchmark_models_parameters = None, benchmark_models=None, benchmark_models_parameters = None,
@ -100,7 +47,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
:return: DataFrame with the results :return: DataFrame with the results
""" """
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies) cluster = dispy.JobCluster(benchmarks.run_point, nodes=nodes) #, depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -174,7 +121,7 @@ def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, part
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
@numba.jit()
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters): def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
pool = [] pool = []
@ -209,57 +156,7 @@ def build_model_pool_point(models, max_order, benchmark_models, benchmark_models
return pool return pool
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): @numba.jit()
"""
Interval forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression]
tmp3 = [Measures.get_interval_statistics]
if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else:
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(train_data, partitioner.sets, order=mfts.order)
_end = time.time()
times = _end - _start
_start = time.time()
_sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts)
_end = time.time()
times += _end - _start
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key}
return ret
def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner], def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
benchmark_models=None, benchmark_models_parameters = None, benchmark_models=None, benchmark_models_parameters = None,
@ -296,7 +193,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None,
if benchmark_models_parameters is None: if benchmark_models_parameters is None:
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2] benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
cluster = dispy.JobCluster(run_interval, nodes=nodes) #, depends=dependencies) cluster = dispy.JobCluster(benchmarks.run_interval, nodes=nodes) #, depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -407,70 +304,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None,
times, q05, q25, q75, q95) times, q05, q25, q75, q95)
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, window_key=None, transformation=None, indexer=None): @numba.jit()
"""
Probabilistic m-step ahead forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param steps:
:param resolution:
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
import time
import numpy as np
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.ensemble import ensemble
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima
from pyFTS.models.seasonal import SeasonalIndexer
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else:
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
if transformation is not None:
mfts.append_transformation(transformation)
if mfts.has_seasonality:
mfts.indexer = indexer
try:
_start = time.time()
mfts.train(train_data, partitioner.sets, order=mfts.order)
_end = time.time()
times = _end - _start
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test_data, mfts, steps=steps,
resolution=resolution)
_t1 += times
_t2 += times
except Exception as e:
print(e)
_crps1 = np.nan
_crps2 = np.nan
_t1 = np.nan
_t2 = np.nan
ret = {'key': _key, 'obj': mfts, 'CRPS_Interval': _crps1, 'CRPS_Distribution': _crps2, 'TIME_Interval': _t1,
'TIME_Distribution': _t2, 'window': window_key}
return ret
def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
benchmark_models=None, benchmark_models_parameters = None, benchmark_models=None, benchmark_models_parameters = None,
@ -505,7 +339,7 @@ def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1
if benchmark_models_parameters is None: if benchmark_models_parameters is None:
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0), (2, 0, 1), (2, 0, 2)] benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0), (2, 0, 1), (2, 0, 2)]
cluster = dispy.JobCluster(run_ahead, nodes=nodes) # , depends=dependencies) cluster = dispy.JobCluster(benchmarks.run_ahead, nodes=nodes) # , depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)

View File

@ -1,4 +1,5 @@
import time import time
import numba
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import dill import dill
import numpy as np import numpy as np
@ -15,6 +16,7 @@ def uniquefilename(name):
return name + str(current_milli_time()) return name + str(current_milli_time())
def show_and_save_image(fig, file, flag, lgd=None): def show_and_save_image(fig, file, flag, lgd=None):
""" """
Show and image and save on file Show and image and save on file
@ -38,7 +40,7 @@ def enumerate2(xs, start=0, step=1):
start += step start += step
def sliding_window(data, windowsize, train=0.8, inc=0.1): def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
""" """
Sliding window method of cross validation for time series Sliding window method of cross validation for time series
:param data: the entire dataset :param data: the entire dataset
@ -50,7 +52,16 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1):
l = len(data) l = len(data)
ttrain = int(round(windowsize * train, 0)) ttrain = int(round(windowsize * train, 0))
ic = int(round(windowsize * inc, 0)) ic = int(round(windowsize * inc, 0))
for count in np.arange(0,l-windowsize+ic,ic):
progressbar = kwargs.get('progress', None)
rng = np.arange(0,l-windowsize+ic,ic)
if progressbar:
from tqdm import tqdm
rng = tqdm(rng)
for count in rng:
if count + windowsize > l: if count + windowsize > l:
_end = l _end = l
else: else:
@ -91,11 +102,34 @@ def load_env(file):
dill.load_session(file) dill.load_session(file)
def start_dispy_cluster(method, nodes):
import dispy, dispy.httpd, logging
cluster = dispy.JobCluster(method, nodes=nodes, loglevel=logging.DEBUG, ping_interval=1000)
http_server = dispy.httpd.DispyHTTPServer(cluster)
return cluster, http_server
def stop_dispy_cluster(cluster, http_server):
cluster.wait() # wait for all jobs to finish
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
def simple_model_train(model, data, parameters): def simple_model_train(model, data, parameters):
model.train(data, **parameters) model.train(data, **parameters)
return model return model
def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10, def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10,
train_parameters={}, **kwargs): train_parameters={}, **kwargs):
import dispy, dispy.httpd, datetime import dispy, dispy.httpd, datetime
@ -106,9 +140,7 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches=
file_path = kwargs.get('file_path', None) file_path = kwargs.get('file_path', None)
cluster = dispy.JobCluster(train_method, nodes=nodes) # , depends=dependencies) cluster, http_server = start_dispy_cluster(train_method, nodes)
http_server = dispy.httpd.DispyHTTPServer(cluster)
print("[{0: %H:%M:%S}] Distrituted Train Started".format(datetime.datetime.now())) print("[{0: %H:%M:%S}] Distrituted Train Started".format(datetime.datetime.now()))
@ -149,26 +181,21 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches=
print("[{0: %H:%M:%S}] Distrituted Train Finished".format(datetime.datetime.now())) print("[{0: %H:%M:%S}] Distrituted Train Finished".format(datetime.datetime.now()))
cluster.wait() # wait for all jobs to finish stop_dispy_cluster(cluster, http_server)
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
return model return model
def simple_model_predict(model, data, parameters): def simple_model_predict(model, data, parameters):
return model.predict(data, **parameters) return model.predict(data, **parameters)
def distributed_predict(model, parameters, nodes, data, num_batches): def distributed_predict(model, parameters, nodes, data, num_batches):
import dispy, dispy.httpd import dispy, dispy.httpd
cluster = dispy.JobCluster(simple_model_predict, nodes=nodes) # , depends=dependencies) cluster, http_server = start_dispy_cluster(simple_model_predict, nodes)
http_server = dispy.httpd.DispyHTTPServer(cluster)
jobs = [] jobs = []
n = len(data) n = len(data)
@ -199,11 +226,6 @@ def distributed_predict(model, parameters, nodes, data, num_batches):
print(job.exception) print(job.exception)
print(job.stdout) print(job.stdout)
cluster.wait() # wait for all jobs to finish stop_dispy_cluster(cluster, http_server)
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
return ret return ret

View File

@ -5,7 +5,7 @@ from pyFTS.common import FuzzySet, SortedCollection, tree, Util
class FTS(object): class FTS(object):
""" """
Fuzzy Time Series Fuzzy Time Series object model
""" """
def __init__(self, order, name, **kwargs): def __init__(self, order, name, **kwargs):
""" """
@ -60,7 +60,14 @@ class FTS(object):
Forecast using trained model Forecast using trained model
:param data: time series with minimal length to the order of the model :param data: time series with minimal length to the order of the model
:param kwargs: :param kwargs:
:return:
:keyword
type: the forecasting type, one of these values: point(default), interval or distribution.
steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
start: in the multi step forecasting, the index of the data where to start forecasting
distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
nodes: a list with the dispy cluster nodes addresses
:return: a numpy array with the forecasted data
""" """
if 'distributed' in kwargs: if 'distributed' in kwargs:
@ -181,7 +188,7 @@ class FTS(object):
def fit(self, data, **kwargs): def fit(self, data, **kwargs):
""" """
:param data: :param data: the training time series
:param kwargs: :param kwargs:
:keyword :keyword
@ -189,12 +196,17 @@ class FTS(object):
save_model: save final model on disk save_model: save final model on disk
batch_save: save the model between each batch batch_save: save the model between each batch
file_path: path to save the model file_path: path to save the model
distributed: boolean, indicate if the training procedure will be distributed in a dispy cluster
nodes: a list with the dispy cluster nodes addresses
:return: :return:
""" """
import datetime import datetime
num_batches = kwargs.get('num_batches', 10) dump = kwargs.get('dump', None)
num_batches = kwargs.get('num_batches', None)
save = kwargs.get('save_model', False) # save model on disk save = kwargs.get('save_model', False) # save model on disk
@ -214,14 +226,24 @@ class FTS(object):
batch_save_interval=batch_save_interval) batch_save_interval=batch_save_interval)
else: else:
print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now())) if dump == 'time':
print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now()))
if num_batches is not None: if num_batches is not None:
n = len(data) n = len(data)
batch_size = int(n / num_batches) batch_size = int(n / num_batches)
bcount = 1 bcount = 1
for ct in range(self.order, n, batch_size):
print("[{0: %H:%M:%S}] Starting batch ".format(datetime.datetime.now()) + str(bcount)) rng = range(self.order, n, batch_size)
if dump == 'tqdm':
from tqdm import tqdm
rng = tqdm(rng)
for ct in rng:
if dump == 'time':
print("[{0: %H:%M:%S}] Starting batch ".format(datetime.datetime.now()) + str(bcount))
if self.is_multivariate: if self.is_multivariate:
ndata = data.iloc[ct - self.order:ct + batch_size] ndata = data.iloc[ct - self.order:ct + batch_size]
else: else:
@ -232,14 +254,16 @@ class FTS(object):
if batch_save: if batch_save:
Util.persist_obj(self,file_path) Util.persist_obj(self,file_path)
print("[{0: %H:%M:%S}] Finish batch ".format(datetime.datetime.now()) + str(bcount)) if dump == 'time':
print("[{0: %H:%M:%S}] Finish batch ".format(datetime.datetime.now()) + str(bcount))
bcount += 1 bcount += 1
else: else:
self.train(data, **kwargs) self.train(data, **kwargs)
print("[{0: %H:%M:%S}] Finish training".format(datetime.datetime.now())) if dump == 'time':
print("[{0: %H:%M:%S}] Finish training".format(datetime.datetime.now()))
if save: if save:
Util.persist_obj(self, file_path) Util.persist_obj(self, file_path)

View File

@ -201,8 +201,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
if flrg.get_key() in self.flrgs: if flrg.get_key() in self.flrgs:
return self.flrgs[flrg.get_key()].frequency_count / self.global_frequency_count return self.flrgs[flrg.get_key()].frequency_count / self.global_frequency_count
else: else:
self.add_new_PWFLGR(flrg) return 0.0
return self.flrg_lhs_unconditional_probability(flrg) #self.add_new_PWFLGR(flrg)
#return self.flrg_lhs_unconditional_probability(flrg)
def flrg_lhs_conditional_probability(self, x, flrg): def flrg_lhs_conditional_probability(self, x, flrg):
mv = flrg.get_membership(x, self.sets) mv = flrg.get_membership(x, self.sets)
@ -214,8 +215,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
tmp = self.flrgs[flrg.get_key()] tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_midpoint(self.sets) #sum(np.array([tmp.rhs_unconditional_probability(s) * self.setsDict[s].centroid for s in tmp.RHS])) ret = tmp.get_midpoint(self.sets) #sum(np.array([tmp.rhs_unconditional_probability(s) * self.setsDict[s].centroid for s in tmp.RHS]))
else: else:
pi = 1 / len(flrg.LHS) if len(flrg.LHS) > 0:
ret = sum(np.array([pi * self.sets[s].centroid for s in flrg.LHS])) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.sets[s].centroid for s in flrg.LHS]))
else:
ret = np.nan
return ret return ret
def flrg_rhs_conditional_probability(self, x, flrg): def flrg_rhs_conditional_probability(self, x, flrg):
@ -241,8 +245,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
tmp = self.flrgs[flrg.get_key()] tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper(self.sets) ret = tmp.get_upper(self.sets)
else: else:
pi = 1 / len(flrg.LHS) ret = 0
ret = sum(np.array([pi * self.sets[s].upper for s in flrg.LHS]))
return ret return ret
def get_lower(self, flrg): def get_lower(self, flrg):
@ -250,8 +253,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
tmp = self.flrgs[flrg.get_key()] tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower(self.sets) ret = tmp.get_lower(self.sets)
else: else:
pi = 1 / len(flrg.LHS) ret = 0
ret = sum(np.array([pi * self.sets[s].lower for s in flrg.LHS]))
return ret return ret
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
@ -322,92 +324,31 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append_rhs([lo_qt, up_qt]) ret.append_rhs([lo_qt, up_qt])
def interval_extremum(self, k, ndata, ret): def interval_extremum(self, k, ndata, ret):
affected_flrgs = []
affected_flrgs_memberships = [] sample = ndata[k - (self.order - 1): k + 1]
norms = []
flrgs = self.generate_lhs_flrg(sample)
up = [] up = []
lo = [] lo = []
# Find the sets which membership > 0 for each lag norms = []
count = 0 for flrg in flrgs:
lags = {} norm = self.flrg_lhs_conditional_probability(sample, flrg)
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyfy_instance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower:
idx = [0]
elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, instance))
lags[count] = idx
count += 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.build_tree(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.append_lhs(self.sets[kk])
assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS])
##
affected_flrgs.append(flrg)
# Find the general membership of FLRG
affected_flrgs_memberships.append(min(self.get_sequence_membership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyfy_instance(ndata[k], self.sets) # get all membership values
tmp = np.argwhere(mv) # get the indices of values > 0
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower:
idx = [0]
elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, ndata[k]))
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.append_lhs(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
for count, flrg in enumerate(affected_flrgs):
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.flrg_lhs_unconditional_probability(flrg) * affected_flrgs_memberships[count]
if norm == 0: if norm == 0:
norm = self.flrg_lhs_unconditional_probability(flrg) # * 0.001 norm = self.flrg_lhs_unconditional_probability(flrg) # * 0.001
up.append(norm * self.get_upper(flrg)) up.append(norm * self.get_upper(flrg))
lo.append(norm * self.get_lower(flrg)) lo.append(norm * self.get_lower(flrg))
norms.append(norm) norms.append(norm)
# gerar o intervalo # gerar o intervalo
norm = sum(norms) norm = sum(norms)
if norm == 0: if norm == 0:
ret.append_rhs([0, 0]) ret.append([0, 0])
else: else:
lo_ = sum(lo) / norm lo_ = sum(lo) / norm
up_ = sum(up) / norm up_ = sum(up) / norm
ret.append_rhs([lo_, up_]) ret.append([lo_, up_])
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, data, **kwargs):
@ -415,15 +356,18 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
data = [data] data = [data]
smooth = kwargs.get("smooth", "none") smooth = kwargs.get("smooth", "none")
nbins = kwargs.get("num_bins", 100)
ndata = np.array(self.apply_transformations(data)) ndata = np.array(self.apply_transformations(data))
l = len(ndata) l = len(ndata)
uod = self.get_UoD()
if 'bins' in kwargs:
_bins = kwargs.pop('bins')
else:
nbins = kwargs.get("num_bins", 100)
_bins = np.linspace(uod[0], uod[1], nbins)
ret = [] ret = []
uod = self.get_UoD()
_bins = np.linspace(uod[0], uod[1], nbins)
for k in np.arange(self.order - 1, l): for k in np.arange(self.order - 1, l):
sample = ndata[k - (self.order - 1): k + 1] sample = ndata[k - (self.order - 1): k + 1]
@ -487,120 +431,63 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = [] ret = []
method = kwargs.get('method', 2) smooth = kwargs.get("smooth", "none")
smooth = "KDE" if method != 4 else "none"
nbins = kwargs.get("num_bins", 100) ndata = np.array(self.apply_transformations(data))
uod = self.get_UoD() uod = self.get_UoD()
_bins = np.linspace(uod[0], uod[1], nbins).tolist()
if method != 4: if 'bins' in kwargs:
intervals = self.forecast_ahead_interval(data, steps) _bins = kwargs.pop('bins')
else: else:
l = len(data) nbins = kwargs.get("num_bins", 100)
for k in np.arange(l - self.order, l): _bins = np.linspace(uod[0], uod[1], nbins)
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
dist.set(data[k], 1.0)
ret.append(dist)
for k in np.arange(self.order, steps + self.order): start = kwargs.get('start', self.order)
data = [] sample = ndata[start - (self.order - 1): start + 1]
if method == 1: for dat in sample:
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0)
ret.append(tmp)
lags = {} dist = self.forecast_distribution(sample, bins=_bins)
cc = 0 ret.append(dist)
for i in intervals[k - self.order : k]: for k in np.arange(self.order, steps+self.order):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
quantiles = [] lags = {}
for qt in np.arange(0, 50, 2): # Find all bins of past distributions with probability greater than zero
quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100))
quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100))
quantiles.append(i[0] + ((i[1] - i[0]) / 2))
quantiles = list(set(quantiles)) for ct, dd in enumerate(ret[k - self.order: k]):
vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0]
lags[ct] = sorted(vals)
quantiles.sort() root = tree.FLRGTreeNode(None)
lags[cc] = quantiles self.build_tree_without_order(root, lags, 0)
cc += 1 # Trace all possible combinations between the bins of past distributions
# Build the tree with all possible paths for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
root = tree.FLRGTreeNode(None) # get the combined probabilities for this path
self.build_tree_without_order(root, lags, 0) pk = np.prod([ret[k - self.order + o].density(path[o])
for o in np.arange(0, self.order)])
# Trace the possible paths
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
qtle = np.ravel(self.forecast_interval(path)) d = self.forecast_distribution(path)[0]
data.extend(np.linspace(qtle[0],qtle[1],100).tolist()) for bin in _bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin))
elif method == 2: ret = ret[self.order:]
for qt in np.arange(0, 50, 1):
# print(qt)
qtle_lower = self.forecast_interval(
[intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
qtle_lower = np.ravel(qtle_lower)
data.extend(np.linspace(qtle_lower[0], qtle_lower[1], 100).tolist())
qtle_upper = self.forecast_interval(
[intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
qtle_upper = np.ravel(qtle_upper)
data.extend(np.linspace(qtle_upper[0], qtle_upper[1], 100).tolist())
qtle_mid = self.forecast_interval(
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
qtle_mid = np.ravel(qtle_mid)
data.extend(np.linspace(qtle_mid[0], qtle_mid[1], 100).tolist())
elif method == 3:
i = intervals[k]
data = np.linspace(i[0],i[1],100).tolist()
else:
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins,
uod=uod, **kwargs)
lags = {}
cc = 0
for dd in ret[k - self.order: k]:
vals = [float(v) for v in dd.bins if round(dd.density(v),4) > 0]
lags[cc] = sorted(vals)
cc += 1
root = tree.FLRGTreeNode(None)
self.build_tree_without_order(root, lags, 0)
# Trace the possible paths
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
pk = np.prod([ret[k - self.order + o].density(path[o])
for o in np.arange(0,self.order)])
d = self.forecast_distribution(path)[0]
for bin in _bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin))
if method != 4:
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins, data=data,
uod=uod, **kwargs)
ret.append(dist)
return ret return ret

View File

@ -1,9 +1,12 @@
import numba
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib as plt import matplotlib as plt
import matplotlib.colors as pltcolors import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
from pyFTS.benchmarks import Measures
from pyFTS.common import Membership, Util from pyFTS.common import Membership, Util
from pyFTS.partitioners import Grid,Huarng,FCM,Entropy from pyFTS.partitioners import Grid,Huarng,FCM,Entropy
@ -12,6 +15,107 @@ all_methods = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitio
mfs = [Membership.trimf, Membership.gaussmf, Membership.trapmf] mfs = [Membership.trimf, Membership.gaussmf, Membership.trapmf]
@numba.jit()
def sliding_window_simple_search(data, windowsize, model, partitions, orders, **kwargs):
_3d = len(orders) > 1
ret = []
errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))])
forecasted_best = []
figsize = kwargs.get('figsize', [10, 15])
fig = plt.figure(figsize=figsize)
plotforecasts = kwargs.get('plotforecasts',False)
if plotforecasts:
ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height
ax0.set_xlim([0, len(data)])
ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
ax0.set_title('Forecasts')
ax0.set_ylabel('F(T)')
ax0.set_xlabel('T')
min_rmse = 1000000.0
best = None
intervals = kwargs.get('intervals',False)
threshold = kwargs.get('threshold',0.5)
progressbar = kwargs.get('progressbar', None)
rng1 = enumerate(partitions, start=0)
if progressbar:
from tqdm import tqdm
rng1 = enumerate(tqdm(partitions), start=0)
for pc, p in rng1:
fs = Grid.GridPartitioner(data=data, npart=p)
rng2 = enumerate(orders, start=0)
if progressbar:
rng2 = enumerate(tqdm(orders), start=0)
for oc, o in rng2:
_error = []
for ct, train, test in Util.sliding_window(data, windowsize, 0.8, **kwargs):
fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
fts.fit(train, order=o)
if not intervals:
forecasted = fts.forecast(test)
if not fts.has_seasonality:
_error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) )
else:
_error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted)) )
for kk in range(o):
forecasted.insert(0, None)
if plotforecasts: ax0.plot(forecasted, label=fts.name)
else:
forecasted = fts.forecast_interval(test)
_error.append( 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) )
error = np.nanmean(_error)
errors[oc, pc] = error
if (min_rmse - error) > threshold:
min_rmse = error
best = fts
forecasted_best = forecasted
# print(min_rmse)
if plotforecasts:
# handles0, labels0 = ax0.get_legend_handles_labels()
# ax0.legend(handles0, labels0)
elev = kwargs.get('elev', 30)
azim = kwargs.get('azim', 144)
ax0.plot(test, label="Original", linewidth=3.0, color="black")
if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
# ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
if _3d:
ax1.set_title('Error Surface')
ax1.set_ylabel('Model order')
ax1.set_xlabel('Number of partitions')
ax1.set_zlabel('RMSE')
X, Y = np.meshgrid(partitions, orders)
surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True)
else:
ax1 = fig.add_axes([0, 1, 0.9, 0.9])
ax1.set_title('Error Curve')
ax1.set_ylabel('Number of partitions')
ax1.set_xlabel('RMSE')
ax0.plot(errors,partitions)
ret.append(best)
ret.append(forecasted_best)
# plt.tight_layout()
file = kwargs.get('file', None)
save = kwargs.get('save', False)
Util.show_and_save_image(fig, file, save)
return ret
def plot_sets(data, sets, titles, tam=[12, 10], save=False, file=None): def plot_sets(data, sets, titles, tam=[12, 10], save=False, file=None):
num = len(sets) num = len(sets)
#fig = plt.figure(figsize=tam) #fig = plt.figure(figsize=tam)

View File

@ -9,6 +9,17 @@ import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import Transformations from pyFTS.common import Transformations
from pyFTS.data import INMET from pyFTS.data import TAIEX
print(INMET.get_dataframe()) dataset = TAIEX.get_data()
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import pwfts
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2),
progress=False, type='distribution',
distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
save=True, file="pwfts_taiex_interval.csv")