From 5c8c80cd8d91adf93f3d77bb83124562479aba3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Fri, 3 Mar 2017 08:53:55 -0300 Subject: [PATCH] - new sliding window benchmarks - statsmodels ARIMA wrapper for benchmarks - method refactoring at PWFTS - auto_update at PWFTS - method refactoring at ResidualAnalysis --- benchmarks/Measures.py | 44 +- benchmarks/ProbabilityDistribution.py | 6 + benchmarks/ResidualAnalysis.py | 2 +- benchmarks/arima.py | 52 ++ benchmarks/benchmarks.py | 916 ++++++++++++++++++++------ benchmarks/naive.py | 4 +- fts.py | 2 + partitioners/Grid.py | 2 +- pfts.py | 487 -------------- pwfts.py | 48 +- tests/general.py | 57 +- 11 files changed, 881 insertions(+), 739 deletions(-) create mode 100644 benchmarks/arima.py delete mode 100644 pfts.py diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index ff5ff2a..d0a7a6d 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from pyFTS.common import FuzzySet,SortedCollection # Autocorrelation function estimative @@ -32,7 +33,6 @@ def mape(targets, forecasts): def smape(targets, forecasts, type=2): - return mape(targets, forecasts) if type == 1: return np.mean(np.abs(forecasts - targets) / ((forecasts + targets)/2)) elif type == 2: @@ -52,10 +52,8 @@ def UStatistic(targets, forecasts): naive = [] y = [] for k in np.arange(0,l-1): - #y.append((forecasts[k ] - targets[k ]) ** 2) - y.append(((forecasts[k + 1] - targets[k + 1]) / targets[k]) ** 2) - #naive.append((targets[k + 1] - targets[k]) ** 2) - naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2) + y.append((forecasts[k ] - targets[k ]) ** 2) + naive.append((targets[k + 1] - targets[k]) ** 2) return np.sqrt(sum(y) / sum(naive)) @@ -111,3 +109,39 @@ def coverage(targets, forecasts): else: preds.append(0) return np.mean(preds) + + +def pmf_to_cdf(density): + ret = [] + for row in density.index: + tmp = [] + prev = 0 + for col in density.columns: + prev += density[col][row] + tmp.append( prev ) + ret.append(tmp) + df = pd.DataFrame(ret, columns=density.columns) + return df + + +def heavyside_cdf(bins, targets): + ret = [] + for t in targets: + result = [1 if b >= t else 0 for b in bins] + ret.append(result) + df = pd.DataFrame(ret, columns=bins) + return df + + +# Continuous Ranked Probability Score +def crps(targets, densities): + l = len(densities.columns) + n = len(densities.index) + Ff = pmf_to_cdf(densities) + Fa = heavyside_cdf(densities.columns, targets) + + _crps = float(0.0) + for k in densities.index: + _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) + + return _crps / float(l * n) diff --git a/benchmarks/ProbabilityDistribution.py b/benchmarks/ProbabilityDistribution.py index 29bdad0..c42d73c 100644 --- a/benchmarks/ProbabilityDistribution.py +++ b/benchmarks/ProbabilityDistribution.py @@ -38,6 +38,12 @@ class ProbabilityDistribution(object): ret.append(self.distribution[v] / self.count) return ret + def cummulative(self, values): + pass + + def quantile(self, qt): + pass + def entropy(self): h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 for k in self.bins]) diff --git a/benchmarks/ResidualAnalysis.py b/benchmarks/ResidualAnalysis.py index 87620f6..4334a7f 100644 --- a/benchmarks/ResidualAnalysis.py +++ b/benchmarks/ResidualAnalysis.py @@ -71,7 +71,7 @@ def plotResiduals(targets, models, tam=[8, 8], save=False, file=None): Util.showAndSaveImage(fig, file, save) -def plotResiduals2(targets, models, tam=[8, 8], save=False, file=None): +def plot_residuals(targets, models, tam=[8, 8], save=False, file=None): fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) for c, mfts in enumerate(models, start=0): diff --git a/benchmarks/arima.py b/benchmarks/arima.py new file mode 100644 index 0000000..961277b --- /dev/null +++ b/benchmarks/arima.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import numpy as np +from statsmodels.tsa.arima_model import ARIMA as stats_arima +from pyFTS import fts + + +class ARIMA(fts.FTS): + def __init__(self, name): + super(ARIMA, self).__init__(1, "ARIMA") + self.name = "ARIMA" + self.detail = "Auto Regressive Integrated Moving Average" + self.isHighOrder = True + self.model = None + self.model_fit = None + self.trained_data = None + self.p = 1 + self.d = 0 + self.q = 0 + self.benchmark_only = True + self.minOrder = 1 + + def train(self, data, sets, order=1, parameters=None): + if parameters is not None: + self.p = parameters[0] + self.d = parameters[1] + self.q = parameters[2] + self.order = max([self.p, self.d, self.q]) + self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")" + + old_fit = self.model_fit + self.model = stats_arima(data, order=(self.p, self.d, self.q)) + try: + self.model_fit = self.model.fit(disp=0) + except: + try: + self.model = stats_arima(data, order=(self.p, self.d, self.q)) + self.model_fit = self.model.fit(disp=1) + except: + self.model_fit = old_fit + + self.trained_data = data #.tolist() + + def forecast(self, data): + ret = [] + for t in data: + output = self.model_fit.forecast() + ret.append( output[0] ) + self.trained_data = np.append(self.trained_data, t) #.append(t) + self.train(self.trained_data,None,order=self.order, parameters=(self.p, self.d, self.q)) + return ret \ No newline at end of file diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index ad51d01..4814647 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,15 +3,17 @@ import numpy as np import pandas as pd +import time import matplotlib as plt import matplotlib.colors as pltcolors +import matplotlib.cm as cmx import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # from sklearn.cross_validation import KFold -from pyFTS.benchmarks import Measures, naive, ResidualAnalysis +from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution from pyFTS.partitioners import Grid from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util -from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts +from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green', 'cyan', 'blue', 'darkblue', 'purple', 'darkviolet'] @@ -22,21 +24,208 @@ styles = ['-','--','-.',':','.'] nsty = len(styles) -def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True, - series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None): +def get_benchmark_point_methods(): + return [naive.Naive, arima.ARIMA] + +def get_point_methods(): + return [chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, + sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pwfts.ProbabilisticWeightedFTS] + +def get_interval_methods(): + return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS] + +def external_point_sliding_window(models, parameters, data, windowsize,train=0.8, dump=False, save=False, file=None): + objs = {} + lcolors = {} + rmse = {} + smape = {} + u = {} + times = {} + + for ct, train, test in Util.sliding_window(data, windowsize, train): + for count, method in enumerate(models, start=0): + model = method("") + + _start = time.time() + model.train(train, None, parameters=parameters[count]) + _end = time.time() + + _key = model.shortname + + if dump: print(ct, _key) + + if _key not in objs: + objs[_key] = model + lcolors[_key] = colors[count % ncol] + rmse[_key] = [] + smape[_key] = [] + u[_key] = [] + times[_key] = [] + + + times[_key].append(_end - _start) + + _start = time.time() + _rmse, _smape, _u = get_point_statistics(test, model, None) + _end = time.time() + rmse[_key].append(_rmse) + smape[_key].append(_smape) + u[_key].append(_u) + times[_key].append(_end - _start) + + if dump: print(_rmse, _smape, _u) + + ret = [] + for k in sorted(objs.keys()): + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(round(np.nanmean(rmse[k]), 2)) + mod.append(round(np.nanstd(rmse[k]), 2)) + mod.append(round(np.nanmean(smape[k]), 2)) + mod.append(round(np.nanstd(smape[k]), 2)) + mod.append(round(np.nanmean(u[k]), 2)) + mod.append(round(np.nanstd(u[k]), 2)) + mod.append(round(np.nanmean(times[k]), 4)) + ret.append(mod) + + columns = ["Model", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"] + + dat = pd.DataFrame(ret, columns=columns) + + if save: dat.to_csv(Util.uniquefilename(file), sep=";") + + return dat + + +def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner], + partitions=[10], max_order=3,transformation=None,indexer=None,dump=False, + save=False, file=None): + if models is None: + models = get_point_methods() + + + objs = {} + lcolors = {} + rmse = {} + smape = {} + u = {} + times = {} + + for ct, train,test in Util.sliding_window(data, windowsize, train): + mocks = {} + for partition in partitions: + for partitioner in partitioners: + pttr = str(partitioner.__module__).split('.')[-1] + data_train_fs = partitioner(train, partition, transformation=transformation) + + for count, model in enumerate(models, start=0): + + mfts = model("") + + _key = mfts.shortname + " " + pttr + " q = " + str(partition) + + mfts.partitioner = data_train_fs + if not mfts.isHighOrder: + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + rmse[_key] = [] + smape[_key] = [] + u[_key] = [] + times[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + + _start = time.time() + mfts.train(train, data_train_fs.sets) + _end = time.time() + times[_key].append(_end - _start) + + _start = time.time() + _rmse, _smape, _u = get_point_statistics(test, mfts, indexer) + _end = time.time() + rmse[_key].append(_rmse) + smape[_key].append(_smape) + u[_key].append(_u) + times[_key].append(_end - _start) + else: + for order in np.arange(1, max_order + 1): + if order >= mfts.minOrder: + mfts = model("") + + _key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition) + + mfts.partitioner = data_train_fs + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + rmse[_key] = [] + smape[_key] = [] + u[_key] = [] + times[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + _start = time.time() + mfts.train(train, data_train_fs.sets, order=order) + _end = time.time() + times[_key].append(_end - _start) + + _start = time.time() + _rmse, _smape, _u = get_point_statistics(test, mfts, indexer) + _end = time.time() + rmse[_key].append(_rmse) + smape[_key].append(_smape) + u[_key].append(_u) + times[_key].append(_end - _start) + ret = [] + for k in sorted(objs.keys()): + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(mfts.order ) + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + mod.append(round(np.nanmean(rmse[k]),2)) + mod.append(round(np.nanstd(rmse[k]), 2)) + mod.append(round(np.nanmean(smape[k]), 2)) + mod.append(round(np.nanstd(smape[k]), 2)) + mod.append(round(np.nanmean(u[k]), 2)) + mod.append(round(np.nanstd(u[k]), 2)) + mod.append(len(mfts)) + mod.append(round(np.nanmean(times[k]),4)) + ret.append(mod) + + columns = ["Model","Order","Scheme","Partitions","RMSEAVG","RMSESTD","SMAPEAVG","SMAPESTD","UAVG","USTD","SIZE","TIMEAVG"] + + dat = pd.DataFrame(ret,columns=columns) + + if save: dat.to_csv(Util.uniquefilename(file),sep=";") + + return dat + + + +def all_point_forecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True, + series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None, + distributions=False): if models is None: - models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, - sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pfts.ProbabilisticFTS] + models = get_point_methods() objs = [] - data_train_fs = Grid.GridPartitioner(data_train, partitions, transformation=transformation).sets - -# if transformation is not None: -# data_train_fs = Grid.GridPartitionerTrimf(transformation.apply(data_train),partitions) -# else: -# data_train_fs = Grid.GridPartitionerTrimf(data_train, partitions) + data_train_fs = Grid.GridPartitioner(data_train, partitions, transformation=transformation) count = 1 @@ -48,7 +237,7 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti if not mfts.isHighOrder: if transformation is not None: mfts.appendTransformation(transformation) - mfts.train(data_train, data_train_fs) + mfts.train(data_train, data_train_fs.sets) objs.append(mfts) lcolors.append( colors[count % ncol] ) else: @@ -57,31 +246,78 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti mfts = model(" n = " + str(order)) if transformation is not None: mfts.appendTransformation(transformation) - mfts.train(data_train, data_train_fs, order=order) + mfts.train(data_train, data_train_fs.sets, order=order) objs.append(mfts) - lcolors.append(colors[count % ncol]) + lcolors.append(colors[(count + order) % ncol]) if statistics: - print(getPointStatistics(data_test, objs)) + print_point_statistics(data_test, objs) if residuals: print(ResidualAnalysis.compareResiduals(data_test, objs)) - ResidualAnalysis.plotResiduals2(data_test, objs, save=save, file=file, tam=tam) + ResidualAnalysis.plot_residuals(data_test, objs, save=save, file=file, tam=tam) if series: - plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, - intervals=False) + plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, + intervals=False) + + if distributions: + lcolors.insert(0,'black') + pmfs = [] + pmfs.append( + ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_test), max(data_test)], data=data_test) ) + + for m in objs: + forecasts = m.forecast(data_test) + pmfs.append( + ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_test), max(data_test)], + data=forecasts)) + print(getProbabilityDistributionStatistics(pmfs,data_test)) + + plot_probability_distributions(pmfs, lcolors, tam=tam) -def getPointStatistics(data, models, externalmodels = None, externalforecasts = None): - ret = "Model & Order & RMSE & MAPE & Theil's U \\\\ \n" - for fts in models: - forecasts = fts.forecast(data) - ret += fts.shortname + " & " - ret += str(fts.order) + " & " - ret += str(round(Measures.rmse(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + " & " - ret += str(round(Measures.smape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))+ " & " - ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) +def get_point_statistics(data, model, indexer=None): + if indexer is not None: + ndata = np.array(indexer.get_data(data[model.order:])) + else: + ndata = np.array(data[model.order:]) + + if model.isMultivariate or indexer is None: + forecasts = model.forecast(data) + elif not model.isMultivariate and indexer is not None: + forecasts = model.forecast(indexer.get_data(data)) + + if model.hasSeasonality: + nforecasts = np.array(forecasts) + else: + nforecasts = np.array(forecasts[:-1]) + ret = list() + try: + ret.append(np.round(Measures.rmse(ndata, nforecasts), 2)) + except: + ret.append(np.nan) + try: + ret.append(np.round(Measures.smape(ndata, nforecasts), 2)) + except: + ret.append(np.nan) + try: + ret.append(np.round(Measures.UStatistic(ndata, nforecasts), 2)) + except: + ret.append(np.nan) + + return ret + + +def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): + ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" + for count,model in enumerate(models,start=0): + _rmse, _smape, _u = get_point_statistics(data, model, indexers) + ret += model.shortname + " & " + ret += str(model.order) + " & " + ret += str(_rmse) + " & " + ret += str(_smape)+ " & " + ret += str(_u) #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4)) ret += " \\\\ \n" if externalmodels is not None: @@ -89,24 +325,130 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts = for k in np.arange(0,l): ret += externalmodels[k] + " & " ret += " 1 & " - ret += str(round(Measures.rmse(data[fts.order:], externalforecasts[k][:-1]), 2)) + " & " - ret += str(round(Measures.smape(data[fts.order:], externalforecasts[k][:-1]), 2))+ " & " - ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & " + ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ " & " + ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2)) ret += " \\\\ \n" + print(ret) + + + +def getProbabilityDistributionStatistics(pmfs, data): + ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n" + for k in pmfs: + ret += k.name + " & " + ret += str(k.entropy()) + " & " + ret += str(k.empiricalloglikelihood())+ " & " + ret += str(k.pseudologlikelihood(data)) + ret += " \\\\ \n" return ret -def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], +def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner], + partitions=[10], max_order=3,transformation=None,indexer=None,dump=False, + save=False, file=None): + if models is None: + models = get_interval_methods() + + objs = {} + lcolors = {} + sharpness = {} + resolution = {} + coverage = {} + + for ct, train,test in Util.sliding_window(data, windowsize, train): + for partition in partitions: + for partitioner in partitioners: + pttr = str(partitioner.__module__).split('.')[-1] + data_train_fs = partitioner(train, partition, transformation=transformation) + + for count, model in enumerate(models, start=0): + + mfts = model("") + _key = mfts.shortname + " " + pttr+ " q = " +str(partition) + + mfts.partitioner = data_train_fs + if not mfts.isHighOrder: + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + sharpness[_key] = [] + resolution[_key] = [] + coverage[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + mfts.train(train, data_train_fs.sets) + + _sharp, _res, _cov = get_interval_statistics(test, mfts) + sharpness[_key].append(_sharp) + resolution[_key].append(_res) + coverage[_key].append(_cov) + + else: + for order in np.arange(1, max_order + 1): + if order >= mfts.minOrder: + mfts = model("") + _key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition) + mfts.partitioner = data_train_fs + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + sharpness[_key] = [] + resolution[_key] = [] + coverage[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + mfts.train(train, data_train_fs.sets, order=order) + + _sharp, _res, _cov = get_interval_statistics(test, mfts) + sharpness[_key].append(_sharp) + resolution[_key].append(_res) + coverage[_key].append(_cov) + + ret = [] + for k in sorted(objs.keys()): + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(mfts.order ) + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + mod.append(round(np.nanmean(sharpness[k]),2)) + mod.append(round(np.nanstd(sharpness[k]), 2)) + mod.append(round(np.nanmean(resolution[k]), 2)) + mod.append(round(np.nanstd(resolution[k]), 2)) + mod.append(round(np.nanmean(coverage[k]), 2)) + mod.append(round(np.nanstd(coverage[k]), 2)) + mod.append(len(mfts)) + ret.append(mod) + + columns = ["Model","Order","Scheme","Partitions","SHARPAVG","SHARPSTD","RESAVG","RESSTD","COVAVG","COVSTD","SIZE"] + + dat = pd.DataFrame(ret,columns=columns) + + if save: dat.to_csv(Util.uniquefilename(file),sep=";") + + return dat + + +def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], models=None, transformation=None): if models is None: - models = [ifts.IntervalFTS, pfts.ProbabilisticFTS] + models = get_interval_methods() objs = [] - if transformation is not None: - data_train_fs = Grid.GridPartitionerTrimf(transformation.apply(data_train),partitions) - else: - data_train_fs = Grid.GridPartitionerTrimf(data_train, partitions) + data_train_fs = Grid.GridPartitioner(data_train,partitions, transformation=transformation).sets lcolors = [] @@ -128,24 +470,33 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F objs.append(mfts) lcolors.append(colors[count % ncol]) - print(getIntervalStatistics(data_test, objs)) + print_interval_statistics(data_test, objs) - plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True) + plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True) -def getIntervalStatistics(original, models): - ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n" - for fts in models: - forecasts = fts.forecastInterval(original) - ret += fts.shortname + " & " - ret += str(fts.order) + " & " - ret += str(round(Measures.sharpness(forecasts), 2)) + " & " - ret += str(round(Measures.resolution(forecasts), 2)) + " & " - ret += str(round(Measures.coverage(original[fts.order:], forecasts[:-1]), 2)) + " \\\\ \n" +def get_interval_statistics(original, model): + ret = list() + forecasts = model.forecastInterval(original) + ret.append(round(Measures.sharpness(forecasts), 2)) + ret.append(round(Measures.resolution(forecasts), 2)) + ret.append(round(Measures.coverage(original[model.order:], forecasts[:-1]), 2)) return ret -def plotDistribution(dist): +def print_interval_statistics(original, models): + ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n" + for fts in models: + _sharp, _res, _cov = get_interval_statistics(original, fts) + ret += fts.shortname + " & " + ret += str(fts.order) + " & " + ret += str(_sharp) + " & " + ret += str(_res) + " & " + ret += str(_cov) + " \\\\ \n" + print(ret) + + +def plot_distribution(dist): for k in dist.index: alpha = np.array([dist[x][k] for x in dist]) * 100 x = [k for x in np.arange(0, len(alpha))] @@ -154,8 +505,8 @@ def plotDistribution(dist): vmin=0, vmax=1, edgecolors=None) -def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], - intervals=True): +def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], + points=True, intervals=True, linewidth=1.5): fig = plt.figure(figsize=tam) ax = fig.add_subplot(111) @@ -164,10 +515,10 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, legends = [] - ax.plot(original, color='black', label="Original", linewidth=1.5) + ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5) for count, fts in enumerate(models, start=0): - if fts.hasPointForecasting and not intervals: + if fts.hasPointForecasting and points: forecasted = fts.forecast(original) mi.append(min(forecasted) * 0.95) ma.append(max(forecasted) * 1.05) @@ -175,7 +526,7 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, forecasted.insert(0, None) lbl = fts.shortname if typeonlegend: lbl += " (Point)" - ax.plot(forecasted, color=colors[count], label=lbl, ls="-") + ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth) if fts.hasIntervalForecasting and intervals: forecasted = fts.forecastInterval(original) @@ -188,8 +539,8 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, upper.insert(0, None) lbl = fts.shortname if typeonlegend: lbl += " (Interval)" - ax.plot(lower, color=colors[count], label=lbl, ls="-") - ax.plot(upper, color=colors[count], ls="-") + ax.plot(lower, color=colors[count], label=lbl, ls="--",linewidth=linewidth) + ax.plot(upper, color=colors[count], ls="--",linewidth=linewidth) handles0, labels0 = ax.get_legend_handles_labels() lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) @@ -204,11 +555,176 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, Util.showAndSaveImage(fig, file, save, lgd=legends) -def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to, - interpol=False, save=False, file=None, tam=[20, 5], resolution=None): +def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]): fig = plt.figure(figsize=tam) ax = fig.add_subplot(111) + for k,m in enumerate(pmfs,start=0): + m.plot(ax, color=lcolors[k]) + + handles0, labels0 = ax.get_legend_handles_labels() + ax.legend(handles0, labels0) + + +def ahead_sliding_window(data, windowsize, train=0.9,models=None, resolution = None, partitioners=[Grid.GridPartitioner], + partitions=[10], max_order=3,transformation=None,indexer=None,dump=False, + save=False, file=None): + if models is None: + models = [pwfts.ProbabilisticWeightedFTS] + + objs = {} + lcolors = {} + crps_interval = {} + crps_distr = {} + + steps = int(round(windowsize*(1.0-train),0)) + + for ct, train,test in Util.sliding_window(data, windowsize, train): + for partition in partitions: + for partitioner in partitioners: + pttr = str(partitioner.__module__).split('.')[-1] + data_train_fs = partitioner(train, partition, transformation=transformation) + + for count, model in enumerate(models, start=0): + + mfts = model("") + _key = mfts.shortname + " " + pttr+ " q = " +str(partition) + + mfts.partitioner = data_train_fs + if not mfts.isHighOrder: + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + crps_interval[_key] = [] + crps_distr[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + mfts.train(train, data_train_fs.sets) + + _crps1, _crps2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution) + crps_interval[_key].append(_crps1) + crps_distr[_key].append(_crps2) + + else: + for order in np.arange(1, max_order + 1): + if order >= mfts.minOrder: + mfts = model("") + _key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition) + mfts.partitioner = data_train_fs + + if dump: print(ct,_key) + + if _key not in objs: + objs[_key] = mfts + lcolors[_key] = colors[count % ncol] + crps_interval[_key] = [] + crps_distr[_key] = [] + + if transformation is not None: + mfts.appendTransformation(transformation) + + mfts.train(train, data_train_fs.sets, order=order) + + _crps1, _crps2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution) + crps_interval[_key].append(_crps1) + crps_distr[_key].append(_crps2) + + ret = [] + for k in sorted(objs.keys()): + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(mfts.order ) + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + mod.append(round(np.nanmean(crps_interval[k]),2)) + mod.append(round(np.nanstd(crps_interval[k]), 2)) + mod.append(round(np.nanmean(crps_distr[k]), 2)) + mod.append(round(np.nanstd(crps_distr[k]), 2)) + mod.append(len(mfts)) + ret.append(mod) + + columns = ["Model","Order","Scheme","Partitions","CRPS1AVG","CRPS1STD","CRPS2AVG","CRPS2STD","SIZE"] + + dat = pd.DataFrame(ret,columns=columns) + + if save: dat.to_csv(Util.uniquefilename(file),sep=";") + + return dat + + +def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], + models=None, transformation=None, option=2): + if models is None: + models = [pwfts.ProbabilisticWeightedFTS] + + if resolution is None: resolution = (max(data_train) - min(data_train)) / 100 + + objs = [] + + data_train_fs = Grid.GridPartitioner(data_train, partitions, transformation=transformation).sets + lcolors = [] + + for count, model in Util.enumerate2(models, start=0, step=2): + mfts = model("") + if not mfts.isHighOrder: + if transformation is not None: + mfts.appendTransformation(transformation) + mfts.train(data_train, data_train_fs) + objs.append(mfts) + lcolors.append( colors[count % ncol] ) + else: + for order in np.arange(1,max_order+1): + if order >= mfts.minOrder: + mfts = model(" n = " + str(order)) + if transformation is not None: + mfts.appendTransformation(transformation) + mfts.train(data_train, data_train_fs, order=order) + objs.append(mfts) + lcolors.append(colors[count % ncol]) + + distributions = [False for k in objs] + + distributions[0] = True + + print_distribution_statistics(data_test[start:], objs, steps, resolution) + + #plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=, save=save, file=file, tam=tam, intervals=True) + +def get_distribution_statistics(original, model, steps, resolution): + ret = list() + densities1 = model.forecastAheadDistribution(original,steps,resolution, parameters=3) + densities2 = model.forecastAheadDistribution(original, steps, resolution, parameters=2) + ret.append( round(Measures.crps(original, densities1), 3)) + ret.append( round(Measures.crps(original, densities2), 3)) + return ret + +def print_distribution_statistics(original, models, steps, resolution): + ret = "Model & Order & Interval & Distribution \\\\ \n" + for fts in models: + _crps1, _crps2 = get_distribution_statistics(original, fts, steps, resolution) + ret += fts.shortname + " & " + ret += str(fts.order) + " & " + ret += str(_crps1) + " & " + ret += str(_crps2) + " \\\\ \n" + print(ret) + + +def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to, + interpol=False, save=False, file=None, tam=[20, 5], resolution=None, + cmap='Blues',option=2): + fig = plt.figure(figsize=tam) + ax = fig.add_subplot(111) + + cm = plt.get_cmap(cmap) + cNorm = pltcolors.Normalize(vmin=0, vmax=1) + scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm) + if resolution is None: resolution = (max(original) - min(original)) / 100 mi = [] @@ -217,26 +733,44 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro for count, fts in enumerate(models, start=0): if fts.hasDistributionForecasting and distributions[count]: density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], - time_to, resolution, parameters=True) + time_to, resolution, parameters=option) + Y = [] + X = [] + C = [] + S = [] y = density.columns t = len(y) + ss = time_to ** 2 + for k in density.index: - alpha = np.array([density[q][k] for q in density]) * 100 + #alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns] + col = [density[col][k]*5 for col in density.columns] x = [time_from + k for x in np.arange(0, t)] - for cc in np.arange(0, resolution, 5): - ax.scatter(x, y + cc, c=alpha, marker='s', linewidths=0, cmap='Oranges', edgecolors=None) - if interpol and k < max(density.index): - diffs = [(density[q][k + 1] - density[q][k]) / 50 for q in density] - for p in np.arange(0, 50): - xx = [time_from + k + 0.02 * p for q in np.arange(0, t)] - alpha2 = np.array( - [density[density.columns[q]][k] + diffs[q] * p for q in np.arange(0, t)]) * 100 - ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges', - norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None) + s = [ss for x in np.arange(0, t)] + + ic = resolution/10 + + for cc in np.arange(0, resolution, ic): + Y.append(y + cc) + X.append(x) + C.append(col) + S.append(s) + + Y = np.hstack(Y) + X = np.hstack(X) + C = np.hstack(C) + S = np.hstack(S) + + s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap) + s.set_clim([0, 1]) + cb = fig.colorbar(s) + + cb.set_label('Density') + if fts.hasIntervalForecasting: forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) @@ -278,6 +812,8 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro ax.set_xlabel('T') ax.set_xlim([0, len(original)]) + #plt.colorbar() + Util.showAndSaveImage(fig, file, save) @@ -295,128 +831,6 @@ def plotCompared(original, forecasts, labels, title): ax.set_xlim([0, len(original)]) ax.set_ylim([min(original), max(original)]) - -def SelecaoKFold_MenorRMSE(original, parameters, modelo): - nfolds = 5 - ret = [] - errors = np.array([[0 for k in parameters] for z in np.arange(0, nfolds)]) - forecasted_best = [] - print("Série Original") - fig = plt.figure(figsize=[18, 10]) - fig.suptitle("Comparação de modelos ") - ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) # left, bottom, width, height - ax0.set_xlim([0, len(original)]) - ax0.set_ylim([min(original), max(original)]) - ax0.set_title('Série Temporal') - ax0.set_ylabel('F(T)') - ax0.set_xlabel('T') - ax0.plot(original, label="Original") - min_rmse_fold = 100000.0 - best = None - fc = 0 # Fold count - kf = KFold(len(original), n_folds=nfolds) - for train_ix, test_ix in kf: - train = original[train_ix] - test = original[test_ix] - min_rmse = 100000.0 - best_fold = None - forecasted_best_fold = [] - errors_fold = [] - pc = 0 # Parameter count - for p in parameters: - sets = Grid.GridPartitionerTrimf(train, p) - fts = modelo(str(p) + " particoes") - fts.train(train, sets) - forecasted = [fts.forecast(xx) for xx in test] - error = Measures.rmse(np.array(forecasted), np.array(test)) - errors_fold.append(error) - print(fc, p, error) - errors[fc, pc] = error - if error < min_rmse: - min_rmse = error - best_fold = fts - forecasted_best_fold = forecasted - pc = pc + 1 - forecasted_best_fold = [best_fold.forecast(xx) for xx in original] - ax0.plot(forecasted_best_fold, label=best_fold.name) - if np.mean(errors_fold) < min_rmse_fold: - min_rmse_fold = np.mean(errors) - best = best_fold - forecasted_best = forecasted_best_fold - fc = fc + 1 - handles0, labels0 = ax0.get_legend_handles_labels() - ax0.legend(handles0, labels0) - ax1 = Axes3D(fig, rect=[0.7, 0.5, 0.3, 0.45], elev=30, azim=144) - # ax1 = fig.add_axes([0.6, 0.0, 0.45, 0.45], projection='3d') - ax1.set_title('Comparação dos Erros Quadráticos Médios') - ax1.set_zlabel('RMSE') - ax1.set_xlabel('K-fold') - ax1.set_ylabel('Partições') - X, Y = np.meshgrid(np.arange(0, nfolds), parameters) - surf = ax1.plot_surface(X, Y, errors.T, rstride=1, cstride=1, antialiased=True) - ret.append(best) - ret.append(forecasted_best) - - # Modelo diferencial - print("\nSérie Diferencial") - errors = np.array([[0 for k in parameters] for z in np.arange(0, nfolds)]) - forecastedd_best = [] - ax2 = fig.add_axes([0, 0, 0.65, 0.45]) # left, bottom, width, height - ax2.set_xlim([0, len(original)]) - ax2.set_ylim([min(original), max(original)]) - ax2.set_title('Série Temporal') - ax2.set_ylabel('F(T)') - ax2.set_xlabel('T') - ax2.plot(original, label="Original") - min_rmse = 100000.0 - min_rmse_fold = 100000.0 - bestd = None - fc = 0 - diff = Transformations.differential(original) - kf = KFold(len(original), n_folds=nfolds) - for train_ix, test_ix in kf: - train = diff[train_ix] - test = diff[test_ix] - min_rmse = 100000.0 - best_fold = None - forecasted_best_fold = [] - errors_fold = [] - pc = 0 - for p in parameters: - sets = Grid.GridPartitionerTrimf(train, p) - fts = modelo(str(p) + " particoes") - fts.train(train, sets) - forecasted = [fts.forecastDiff(test, xx) for xx in np.arange(len(test))] - error = Measures.rmse(np.array(forecasted), np.array(test)) - print(fc, p, error) - errors[fc, pc] = error - errors_fold.append(error) - if error < min_rmse: - min_rmse = error - best_fold = fts - pc = pc + 1 - forecasted_best_fold = [best_fold.forecastDiff(original, xx) for xx in np.arange(len(original))] - ax2.plot(forecasted_best_fold, label=best_fold.name) - if np.mean(errors_fold) < min_rmse_fold: - min_rmse_fold = np.mean(errors) - best = best_fold - forecasted_best = forecasted_best_fold - fc = fc + 1 - handles0, labels0 = ax2.get_legend_handles_labels() - ax2.legend(handles0, labels0) - ax3 = Axes3D(fig, rect=[0.7, 0, 0.3, 0.45], elev=30, azim=144) - # ax1 = fig.add_axes([0.6, 0.0, 0.45, 0.45], projection='3d') - ax3.set_title('Comparação dos Erros Quadráticos Médios') - ax3.set_zlabel('RMSE') - ax3.set_xlabel('K-fold') - ax3.set_ylabel('Partições') - X, Y = np.meshgrid(np.arange(0, nfolds), parameters) - surf = ax3.plot_surface(X, Y, errors.T, rstride=1, cstride=1, antialiased=True) - ret.append(best) - ret.append(forecasted_best) - return ret - - def SelecaoSimples_MenorRMSE(original, parameters, modelo): ret = [] errors = [] @@ -563,7 +977,8 @@ def compareModelsTable(original, models_fo, models_ho): def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], - plotforecasts=False, elev=30, azim=144, intervals=False): + plotforecasts=False, elev=30, azim=144, intervals=False,parameters=None): + _3d = len(orders) > 1 ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] @@ -581,16 +996,19 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N for pc, p in enumerate(partitions, start=0): - sets = Grid.GridPartitionerTrimf(train, p) + sets = Grid.GridPartitioner(train, p).sets for oc, o in enumerate(orders, start=0): fts = model("q = " + str(p) + " n = " + str(o)) - fts.train(train, sets, o) + fts.train(train, sets, o,parameters=parameters) if not intervals: forecasted = fts.forecast(test) - error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) + if not fts.hasSeasonality: + error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) + else: + error = Measures.rmse(np.array(test[o:]), np.array(forecasted)) for kk in range(o): forecasted.insert(0, None) - if plotforecasts: ax0.plot(forecasted, label=fts.name) + if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecastInterval(test) error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) @@ -605,15 +1023,98 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) ax0.plot(test, label="Original", linewidth=3.0, color="black") - ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) + if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') - ax1.set_title('Error Surface') - ax1.set_ylabel('Model order') - ax1.set_xlabel('Number of partitions') - ax1.set_zlabel('RMSE') - X, Y = np.meshgrid(partitions, orders) - surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) + if _3d: + ax1.set_title('Error Surface') + ax1.set_ylabel('Model order') + ax1.set_xlabel('Number of partitions') + ax1.set_zlabel('RMSE') + X, Y = np.meshgrid(partitions, orders) + surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) + else: + ax1 = fig.add_axes([0, 1, 0.9, 0.9]) + ax1.set_title('Error Curve') + ax1.set_ylabel('Number of partitions') + ax1.set_xlabel('RMSE') + ax0.plot(errors,partitions) + ret.append(best) + ret.append(forecasted_best) + + # plt.tight_layout() + + Util.showAndSaveImage(fig, file, save) + + return ret + + +def sliding_window_simple_search(data, windowsize, model, partitions, orders, save=False, file=None, tam=[10, 15], + plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None): + _3d = len(orders) > 1 + ret = [] + errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) + forecasted_best = [] + fig = plt.figure(figsize=tam) + # fig.suptitle("Comparação de modelos ") + if plotforecasts: + ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height + ax0.set_xlim([0, len(data)]) + ax0.set_ylim([min(data) * 0.9, max(data) * 1.1]) + ax0.set_title('Forecasts') + ax0.set_ylabel('F(T)') + ax0.set_xlabel('T') + min_rmse = 1000000.0 + best = None + + for pc, p in enumerate(partitions, start=0): + + sets = Grid.GridPartitioner(data, p).sets + for oc, o in enumerate(orders, start=0): + _error = [] + for ct, train, test in Util.sliding_window(data, windowsize, 0.8): + fts = model("q = " + str(p) + " n = " + str(o)) + fts.train(data, sets, o, parameters=parameters) + if not intervals: + forecasted = fts.forecast(test) + if not fts.hasSeasonality: + _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) ) + else: + _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted)) ) + for kk in range(o): + forecasted.insert(0, None) + if plotforecasts: ax0.plot(forecasted, label=fts.name) + else: + forecasted = fts.forecastInterval(test) + _error.append( 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) ) + error = np.nanmean(_error) + errors[oc, pc] = error + if error < min_rmse: + min_rmse = error + best = fts + forecasted_best = forecasted + + # print(min_rmse) + if plotforecasts: + # handles0, labels0 = ax0.get_legend_handles_labels() + # ax0.legend(handles0, labels0) + ax0.plot(test, label="Original", linewidth=3.0, color="black") + if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) + if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) + # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') + if _3d: + ax1.set_title('Error Surface') + ax1.set_ylabel('Model order') + ax1.set_xlabel('Number of partitions') + ax1.set_zlabel('RMSE') + X, Y = np.meshgrid(partitions, orders) + surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) + else: + ax1 = fig.add_axes([0, 1, 0.9, 0.9]) + ax1.set_title('Error Curve') + ax1.set_ylabel('Number of partitions') + ax1.set_xlabel('RMSE') + ax0.plot(errors,partitions) ret.append(best) ret.append(forecasted_best) @@ -634,7 +1135,7 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None): axes[2].set_title('Interval Forecasts by Order') for order in np.arange(1, 6): - fts = pfts.ProbabilisticFTS("") + fts = pwfts.ProbabilisticWeightedFTS("") fts.shortname = "n = " + str(order) fts.train(data, data_fs1, order=order) point_forecasts = fts.forecast(data) @@ -656,7 +1157,7 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None): for partitions in np.arange(5, 11): data_fs = Grid.GridPartitionerTrimf(data, partitions) - fts = pfts.ProbabilisticFTS("") + fts = pwfts.ProbabilisticWeightedFTS("") fts.shortname = "q = " + str(partitions) fts.train(data, data_fs, 1) point_forecasts = fts.forecast(data) @@ -683,4 +1184,5 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None): plt.tight_layout() - Util.showAndSaveImage(fig, file, save) \ No newline at end of file + Util.showAndSaveImage(fig, file, save) + diff --git a/benchmarks/naive.py b/benchmarks/naive.py index 7e6674e..7563094 100644 --- a/benchmarks/naive.py +++ b/benchmarks/naive.py @@ -6,9 +6,11 @@ from pyFTS import fts class Naive(fts.FTS): def __init__(self, name): - super(Naive, self).__init__(1, "Naïve " + name) + super(Naive, self).__init__(1, "Naive " + name) self.name = "Naïve Model" self.detail = "Naïve Model" + self.benchmark_only = True + self.isHighOrder = False def forecast(self, data): return [k for k in data] diff --git a/fts.py b/fts.py index a48fb04..696e4e4 100644 --- a/fts.py +++ b/fts.py @@ -26,6 +26,8 @@ class FTS(object): self.original_max = 0 self.original_min = 0 self.partitioner = None + self.auto_update = False + self.benchmark_only = False def fuzzy(self, data): best = {"fuzzyset": "", "membership": 0.0} diff --git a/partitioners/Grid.py b/partitioners/Grid.py index 176afeb..1effa7d 100644 --- a/partitioners/Grid.py +++ b/partitioners/Grid.py @@ -7,7 +7,7 @@ from pyFTS.partitioners import partitioner class GridPartitioner(partitioner.Partitioner): - def __init__(self, data,npart,func = Membership.trimf, transformation=None): + def __init__(self, data, npart, func = Membership.trimf, transformation=None): super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation) def build(self, data): diff --git a/pfts.py b/pfts.py deleted file mode 100644 index a0c6052..0000000 --- a/pfts.py +++ /dev/null @@ -1,487 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf8 -*- - -import numpy as np -import pandas as pd -import math -from operator import itemgetter -from pyFTS.common import FuzzySet, SortedCollection -from pyFTS import hofts, ifts, tree - - -class ProbabilisticFLRG(hofts.HighOrderFLRG): - def __init__(self, order): - super(ProbabilisticFLRG, self).__init__(order) - self.RHS = {} - self.frequencyCount = 0.0 - - def appendRHS(self, c): - self.frequencyCount += 1.0 - if c.name in self.RHS: - self.RHS[c.name] += 1.0 - else: - self.RHS[c.name] = 1.0 - - def getProbability(self, c): - return self.RHS[c] / self.frequencyCount - - def __str__(self): - tmp2 = "" - for c in sorted(self.RHS): - if len(tmp2) > 0: - tmp2 = tmp2 + ", " - tmp2 = tmp2 + "(" + str(round(self.RHS[c] / self.frequencyCount, 3)) + ")" + c - return self.strLHS() + " -> " + tmp2 - - -class ProbabilisticFTS(ifts.IntervalFTS): - def __init__(self, name): - super(ProbabilisticFTS, self).__init__("PFTS") - self.shortname = "PFTS " + name - self.name = "Probabilistic FTS" - self.detail = "Silva, P.; Guimarães, F.; Sadaei, H." - self.flrgs = {} - self.globalFrequency = 0 - self.hasPointForecasting = True - self.hasIntervalForecasting = True - self.hasDistributionForecasting = True - self.isHighOrder = True - - def generateFLRG(self, flrs): - flrgs = {} - l = len(flrs) - for k in np.arange(self.order, l+1): - if self.dump: print("FLR: " + str(k)) - flrg = ProbabilisticFLRG(self.order) - - for kk in np.arange(k - self.order, k): - flrg.appendLHS(flrs[kk].LHS) - if self.dump: print("LHS: " + str(flrs[kk])) - - if flrg.strLHS() in flrgs: - flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) - else: - flrgs[flrg.strLHS()] = flrg; - flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) - if self.dump: print("RHS: " + str(flrs[k-1])) - - self.globalFrequency += 1 - return (flrgs) - - def addNewPFLGR(self,flrg): - if flrg.strLHS() not in self.flrgs: - tmp = ProbabilisticFLRG(self.order) - for fs in flrg.LHS: tmp.appendLHS(fs) - tmp.appendRHS(flrg.LHS[-1]) - self.flrgs[tmp.strLHS()] = tmp; - self.globalFrequency += 1 - - def getProbability(self, flrg): - if flrg.strLHS() in self.flrgs: - return self.flrgs[flrg.strLHS()].frequencyCount / self.globalFrequency - else: - self.addNewPFLGR(flrg) - return self.getProbability(flrg) - - def getMidpoints(self, flrg): - if flrg.strLHS() in self.flrgs: - tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].centroid for s in tmp.RHS])) - else: - pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * s.centroid for s in flrg.LHS])) - return ret - - def getUpper(self, flrg): - if flrg.strLHS() in self.flrgs: - tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].upper for s in tmp.RHS])) - else: - pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * s.upper for s in flrg.LHS])) - return ret - - def getLower(self, flrg): - if flrg.strLHS() in self.flrgs: - tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].lower for s in tmp.RHS])) - else: - pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * s.lower for s in flrg.LHS])) - return ret - - def forecast(self, data): - - ndata = np.array(self.doTransformations(data)) - - l = len(ndata) - - ret = [] - - for k in np.arange(self.order - 1, l): - - # print(k) - - affected_flrgs = [] - affected_flrgs_memberships = [] - norms = [] - - mp = [] - - # Find the sets which membership > 0 for each lag - count = 0 - lags = {} - if self.order > 1: - subset = ndata[k - (self.order - 1): k + 1] - - for instance in subset: - mb = FuzzySet.fuzzyInstance(instance, self.sets) - tmp = np.argwhere(mb) - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if instance <= self.sets[0].lower: - idx = [0] - elif instance >= self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception(instance) - - lags[count] = idx - count = count + 1 - - # Build the tree with all possible paths - - root = tree.FLRGTreeNode(None) - - self.buildTree(root, lags, 0) - - # Trace the possible paths and build the PFLRG's - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) - flrg = hofts.HighOrderFLRG(self.order) - for kk in path: flrg.appendLHS(self.sets[kk]) - - assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS]) - - ## - affected_flrgs.append(flrg) - - # Find the general membership of FLRG - affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS))) - - else: - - mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values - tmp = np.argwhere(mv) # get the indices of values > 0 - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if ndata[k] <= self.sets[0].lower: - idx = [0] - elif ndata[k] >= self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception(ndata[k]) - - for kk in idx: - flrg = hofts.HighOrderFLRG(self.order) - flrg.appendLHS(self.sets[kk]) - affected_flrgs.append(flrg) - affected_flrgs_memberships.append(mv[kk]) - - count = 0 - for flrg in affected_flrgs: - # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência - norm = self.getProbability(flrg) * affected_flrgs_memberships[count] - if norm == 0: - norm = self.getProbability(flrg) # * 0.001 - mp.append(norm * self.getMidpoints(flrg)) - norms.append(norm) - count = count + 1 - - # gerar o intervalo - norm = sum(norms) - if norm == 0: - ret.append(0) - else: - ret.append(sum(mp) / norm) - - ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) - - return ret - - def forecastInterval(self, data): - - ndata = np.array(self.doTransformations(data)) - - l = len(ndata) - - ret = [] - - for k in np.arange(self.order - 1, l): - - # print(k) - - affected_flrgs = [] - affected_flrgs_memberships = [] - norms = [] - - up = [] - lo = [] - - # Find the sets which membership > 0 for each lag - count = 0 - lags = {} - if self.order > 1: - subset = ndata[k - (self.order - 1): k + 1] - - for instance in subset: - mb = FuzzySet.fuzzyInstance(instance, self.sets) - tmp = np.argwhere(mb) - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if instance <= self.sets[0].lower: - idx = [0] - elif instance >= self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception(instance) - - lags[count] = idx - count = count + 1 - - # Build the tree with all possible paths - - root = tree.FLRGTreeNode(None) - - self.buildTree(root, lags, 0) - - # Trace the possible paths and build the PFLRG's - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) - flrg = hofts.HighOrderFLRG(self.order) - for kk in path: flrg.appendLHS(self.sets[kk]) - - assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS]) - - ## - affected_flrgs.append(flrg) - - # Find the general membership of FLRG - affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS))) - - else: - - mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values - tmp = np.argwhere(mv) # get the indices of values > 0 - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if ndata[k] <= self.sets[0].lower: - idx = [0] - elif ndata[k] >= self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception(ndata[k]) - - for kk in idx: - flrg = hofts.HighOrderFLRG(self.order) - flrg.appendLHS(self.sets[kk]) - affected_flrgs.append(flrg) - affected_flrgs_memberships.append(mv[kk]) - - count = 0 - for flrg in affected_flrgs: - # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência - norm = self.getProbability(flrg) * affected_flrgs_memberships[count] - if norm == 0: - norm = self.getProbability(flrg) # * 0.001 - up.append(norm * self.getUpper(flrg)) - lo.append(norm * self.getLower(flrg)) - norms.append(norm) - count = count + 1 - - # gerar o intervalo - norm = sum(norms) - if norm == 0: - ret.append([0, 0]) - else: - lo_ = self.doInverseTransformations(sum(lo) / norm, params=[data[k - (self.order - 1): k + 1]]) - up_ = self.doInverseTransformations(sum(up) / norm, params=[data[k - (self.order - 1): k + 1]]) - ret.append([lo_, up_]) - - return ret - - def forecastAhead(self, data, steps): - ret = [data[k] for k in np.arange(len(data) - self.order, len(data))] - - for k in np.arange(self.order - 1, steps): - - if ret[-1] <= self.sets[0].lower or ret[-1] >= self.sets[-1].upper: - ret.append(ret[-1]) - else: - mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)]) - - ret.append(mp) - - return ret - - def forecastAheadInterval(self, data, steps): - ret = [[data[k], data[k]] for k in np.arange(len(data) - self.order, len(data))] - - for k in np.arange(self.order, steps+self.order): - - if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper: - ret.append(ret[-1]) - else: - lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)]) - upper = self.forecastInterval([ret[x][1] for x in np.arange(k - self.order, k)]) - - ret.append([np.min(lower), np.max(upper)]) - - return ret - - def getGridClean(self, resolution): - grid = {} - - if len(self.transformations) == 0: - _min = self.sets[0].lower - _max = self.sets[-1].upper - else: - _min = self.original_min - _max = self.original_max - - for sbin in np.arange(_min,_max, resolution): - grid[sbin] = 0 - - return grid - - def gridCount(self, grid, resolution, index, interval): - #print(interval) - for k in index.inside(interval[0],interval[1]): - #print(k) - grid[k] += 1 - return grid - - def gridCountPoint(self, grid, resolution, index, point): - k = index.find_ge(point) - # print(k) - grid[k] += 1 - return grid - - def buildTreeWithoutOrder(self, node, lags, level): - - if level not in lags: - return - - for s in lags[level]: - node.appendChild(tree.FLRGTreeNode(s)) - - for child in node.getChildren(): - self.buildTreeWithoutOrder(child, lags, level + 1) - - def forecastAheadDistribution(self, data, steps, resolution, parameters=None): - - ret = [] - - intervals = self.forecastAheadInterval(data, steps) - - grid = self.getGridClean(resolution) - - index = SortedCollection.SortedCollection(iterable=grid.keys()) - - if parameters is None: - - grids = [] - for k in np.arange(0, steps): - grids.append(self.getGridClean(resolution)) - - for k in np.arange(self.order, steps + self.order): - - lags = {} - - cc = 0 - - for i in intervals[k - self.order : k]: - - quantiles = [] - - for qt in np.arange(0, 50, 2): - quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100)) - quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100)) - quantiles.append(i[0] + ((i[1] - i[0]) / 2)) - - quantiles = list(set(quantiles)) - - quantiles.sort() - - lags[cc] = quantiles - - cc += 1 - - # Build the tree with all possible paths - - root = tree.FLRGTreeNode(None) - - self.buildTreeWithoutOrder(root, lags, 0) - - # Trace the possible paths - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) - - qtle = self.forecastInterval(path) - - grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle)) - - for k in np.arange(0, steps): - tmp = np.array([grids[k][q] for q in sorted(grids[k])]) - ret.append(tmp / sum(tmp)) - - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df - else: - - print("novo") - - ret = [] - - for k in np.arange(self.order, steps + self.order): - - grid = self.getGridClean(resolution) - grid = self.gridCount(grid, resolution, index, intervals[k]) - - for qt in np.arange(0, 50, 1): - # print(qt) - qtle_lower = self.forecastInterval( - [intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in - np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower)) - qtle_upper = self.forecastInterval( - [intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in - np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper)) - qtle_mid = self.forecastInterval( - [intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid)) - - tmp = np.array([grid[k] for k in sorted(grid)]) - - ret.append(tmp / sum(tmp)) - - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df - - - def __str__(self): - tmp = self.name + ":\n" - for r in sorted(self.flrgs): - p = round(self.flrgs[r].frequencyCount / self.globalFrequency, 3) - tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n" - return tmp \ No newline at end of file diff --git a/pwfts.py b/pwfts.py index 805bb30..2aaeefa 100644 --- a/pwfts.py +++ b/pwfts.py @@ -29,7 +29,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): else: self.RHS[c.name] = mv - def getProbability(self, c): + def get_probability(self, c): return self.RHS[c] / self.frequencyCount def __str__(self): @@ -42,7 +42,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): class ProbabilisticWeightedFTS(ifts.IntervalFTS): - def __init__(self, name): + def __init__(self, name, update=True): super(ProbabilisticWeightedFTS, self).__init__("PWFTS") self.shortname = "PWFTS " + name self.name = "Probabilistic FTS" @@ -53,6 +53,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.hasIntervalForecasting = True self.hasDistributionForecasting = True self.isHighOrder = True + self.auto_update = update def train(self, data, sets, order=1,parameters=None): @@ -125,14 +126,30 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): if flrg.strLHS() in flrgs: flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) else: - flrgs[flrg.strLHS()] = flrg; + flrgs[flrg.strLHS()] = flrg flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) if self.dump: print("RHS: " + str(flrs[k-1])) self.globalFrequency += 1 return (flrgs) - def addNewPFLGR(self,flrg): + def update_model(self,data): + + fzzy = FuzzySet.fuzzySeries(data, self.sets) + + flrg = ProbabilisticWeightedFLRG(self.order) + + for k in np.arange(0, self.order): flrg.appendLHS(fzzy[k]) + + if flrg.strLHS() in self.flrgs: + self.flrgs[flrg.strLHS()].appendRHS(fzzy[self.order]) + else: + self.flrgs[flrg.strLHS()] = flrg + self.flrgs[flrg.strLHS()].appendRHS(fzzy[self.order]) + + self.globalFrequency += 1 + + def add_new_PWFLGR(self, flrg): if flrg.strLHS() not in self.flrgs: tmp = ProbabilisticWeightedFLRG(self.order) for fs in flrg.LHS: tmp.appendLHS(fs) @@ -140,17 +157,17 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.flrgs[tmp.strLHS()] = tmp; self.globalFrequency += 1 - def getProbability(self, flrg): + def get_probability(self, flrg): if flrg.strLHS() in self.flrgs: return self.flrgs[flrg.strLHS()].frequencyCount / self.globalFrequency else: - self.addNewPFLGR(flrg) - return self.getProbability(flrg) + self.add_new_PWFLGR(flrg) + return self.get_probability(flrg) def getMidpoints(self, flrg): if flrg.strLHS() in self.flrgs: tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].centroid for s in tmp.RHS])) + ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].centroid for s in tmp.RHS])) else: pi = 1 / len(flrg.LHS) ret = sum(np.array([pi * s.centroid for s in flrg.LHS])) @@ -159,7 +176,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def getUpper(self, flrg): if flrg.strLHS() in self.flrgs: tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].upper for s in tmp.RHS])) + ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].upper for s in tmp.RHS])) else: pi = 1 / len(flrg.LHS) ret = sum(np.array([pi * s.upper for s in flrg.LHS])) @@ -168,7 +185,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def getLower(self, flrg): if flrg.strLHS() in self.flrgs: tmp = self.flrgs[flrg.strLHS()] - ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].lower for s in tmp.RHS])) + ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].lower for s in tmp.RHS])) else: pi = 1 / len(flrg.LHS) ret = sum(np.array([pi * s.lower for s in flrg.LHS])) @@ -187,6 +204,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): # print(k) affected_flrgs = [] + affected_rhs = [] affected_flrgs_memberships = [] norms = [] @@ -258,9 +276,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): count = 0 for flrg in affected_flrgs: # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência - norm = self.getProbability(flrg) * affected_flrgs_memberships[count] + norm = self.get_probability(flrg) * affected_flrgs_memberships[count] if norm == 0: - norm = self.getProbability(flrg) # * 0.001 + norm = self.get_probability(flrg) # * 0.001 mp.append(norm * self.getMidpoints(flrg)) norms.append(norm) count = count + 1 @@ -272,6 +290,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): else: ret.append(sum(mp) / norm) + if self.auto_update and k > self.order+1: self.update_model(ndata[k - self.order - 1 : k]) + ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) return ret @@ -361,9 +381,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): count = 0 for flrg in affected_flrgs: # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência - norm = self.getProbability(flrg) * affected_flrgs_memberships[count] + norm = self.get_probability(flrg) * affected_flrgs_memberships[count] if norm == 0: - norm = self.getProbability(flrg) # * 0.001 + norm = self.get_probability(flrg) # * 0.001 up.append(norm * self.getUpper(flrg)) lo.append(norm * self.getLower(flrg)) norms.append(norm) diff --git a/tests/general.py b/tests/general.py index d24425e..a049804 100644 --- a/tests/general.py +++ b/tests/general.py @@ -11,15 +11,15 @@ from mpl_toolkits.mplot3d import Axes3D import pandas as pd from pyFTS.partitioners import Grid, Entropy, FCM, Huarng from pyFTS.common import FLR,FuzzySet,Membership,Transformations -from pyFTS import fts,hofts,ifts,pwfts,tree, chen, pfts +from pyFTS import fts,hofts,ifts,pwfts,tree, chen from pyFTS.benchmarks import benchmarks as bchmk -from pyFTS.benchmarks import naive +from pyFTS.benchmarks import naive, arima from pyFTS.benchmarks import Measures from numpy import random #print(FCM.FCMPartitionerTrimf.__module__) -#gauss = random.normal(0,1.0,2000) +#gauss = random.normal(0,1.0,5000) #gauss_teste = random.normal(0,1.0,400) @@ -28,9 +28,12 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe #taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") #taiex = np.array(taiexpd["avg"][:5000]) -taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") -taiex_treino = np.array(taiex["avg"][2500:3900]) -taiex_teste = np.array(taiex["avg"][3901:4500]) +nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") +nasdaq = np.array(nasdaqpd["avg"][:5000]) + +#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") +#taiex_treino = np.array(taiex["avg"][2500:3900]) +#taiex_teste = np.array(taiex["avg"][3901:4500]) #print(len(taiex)) @@ -38,30 +41,38 @@ taiex_teste = np.array(taiex["avg"][3901:4500]) #, , -diff = Transformations.Differential(1) - -#bchmk.sliding_window(taiex,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], -# partitioners=[Grid.GridPartitioner, FCM.FCMPartitioner, Entropy.EntropyPartitioner], -# partitions=[10, 15, 20, 25, 30, 35, 40], dump=True, save=True, file="experiments/points.csv") +#diff = Transformations.Differential(1) -bchmk.allPointForecasters(taiex_treino, taiex_treino, 7, transformation=diff, - models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS], - statistics=True, residuals=False, series=False) +bchmk.external_point_sliding_window([naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA], + [None, (1,0,0),(1,1,0),(2,0,0), (2,1,0), (1,1,1), (1,0,1)], + nasdaq,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # # + dump=True, save=True, file="experiments/arima_nasdaq.csv") -data_train_fs = Grid.GridPartitioner(taiex_treino, 10, transformation=diff).sets -fts1 = pfts.ProbabilisticFTS("") -fts1.appendTransformation(diff) -fts1.train(taiex_treino, data_train_fs, order=1) +#bchmk.point_sliding_window(taiex,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # # +# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], +# partitions= [45,55, 65, 75, 85, 95,105,115,125,135, 150], #np.arange(5,150,step=10), # +# dump=True, save=True, file="experiments/taiex_point_new.csv") -print(fts1.forecast([5000, 5000])) -fts2 = pwfts.ProbabilisticWeightedFTS("") -fts2.appendTransformation(diff) -fts2.train(taiex_treino, data_train_fs, order=1) +#bchmk.allPointForecasters(taiex_treino, taiex_treino, 95, #transformation=diff, +# models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS], +# statistics=True, residuals=False, series=False) -print(fts2.forecast([5000, 5000])) +#data_train_fs = Grid.GridPartitioner(taiex_treino, 10, transformation=diff).sets + +#fts1 = pfts.ProbabilisticFTS("") +#fts1.appendTransformation(diff) +#fts1.train(taiex_treino, data_train_fs, order=1) + +#print(fts1.forecast([5000, 5000])) + +#fts2 = pwfts.ProbabilisticWeightedFTS("") +#fts2.appendTransformation(diff) +#fts2.train(taiex_treino, data_train_fs, order=1) + +#print(fts2.forecast([5000, 5000])) #tmp = Grid.GridPartitioner(taiex_treino,7,transformation=diff)