- Bugfixes and improvements on Ensemble FTS and distributed_benchmarks

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-20 13:43:39 -03:00
parent f533dd249a
commit d1b18ef5c4
5 changed files with 101 additions and 42 deletions

View File

@ -64,7 +64,7 @@ def get_interval_methods():
def get_probabilistic_methods(): def get_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting""" """Return all FTS methods for probabilistic forecasting"""
return [quantreg.QuantileRegression, ensemble.EnsembleFTS, pwfts.ProbabilisticWeightedFTS] return [arima.ARIMA, ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):

View File

@ -417,19 +417,24 @@ def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, windo
:return: a dictionary with the benchmark results :return: a dictionary with the benchmark results
""" """
import time import time
from pyFTS import hofts, ifts, pwfts import numpy as np
from pyFTS import hofts, ifts, pwfts, ensemble
from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg from pyFTS.benchmarks import Measures, arima, quantreg
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, quantreg.QuantileRegression] tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner] tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp3 = [Measures.get_distribution_statistics] tmp3 = [Measures.get_distribution_statistics]
if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
@ -456,9 +461,10 @@ def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, windo
return ret return ret
def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False,nodes=None, depends=None): benchmark_models=None, benchmark_models_parameters = None,
save=False, file=None, synthetic=False, nodes=None):
""" """
Distributed sliding window benchmarks for FTS probabilistic forecasters Distributed sliding window benchmarks for FTS probabilistic forecasters
:param data: :param data:
@ -475,12 +481,21 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
:param dump: :param dump:
:param save: save results :param save: save results
:param file: file path to save the results :param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results :param synthetic: if true only the average and standard deviation of the results
:param nodes: list of cluster nodes to distribute tasks :param nodes: list of cluster nodes to distribute tasks
:param depends: list of module dependencies :param depends: list of module dependencies
:return: DataFrame with the results :return: DataFrame with the results
""" """
cluster = dispy.JobCluster(run_point, nodes=nodes) # , depends=dependencies)
alphas = [0.05, 0.25]
if benchmark_models is None and models is None:
benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA]
if benchmark_models_parameters is None:
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0), (2, 0, 1), (2, 0, 2)]
cluster = dispy.JobCluster(run_ahead, nodes=nodes) # , depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -511,10 +526,20 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
else: else:
pool.append(mfts) pool.append(mfts)
if benchmark_models is not None:
for count, model in enumerate(benchmark_models, start=0):
for a in alphas:
par = benchmark_models_parameters[count]
mfts = model(str(par if par is not None else ""), alpha=a, dist=True)
mfts.order = par
pool.append(mfts)
experiments = 0 experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train): for ct, train, test in Util.sliding_window(data, windowsize, train, inc=inc):
experiments += 1 experiments += 1
benchmarks_only = {}
if dump: print('\nWindow: {0}\n'.format(ct)) if dump: print('\nWindow: {0}\n'.format(ct))
for partition in partitions: for partition in partitions:
@ -524,7 +549,11 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
data_train_fs = partitioner(train, partition, transformation=transformation) data_train_fs = partitioner(train, partition, transformation=transformation)
for id, m in enumerate(pool,start=0): for id, m in enumerate(pool,start=0):
job = cluster.submit(m, data_train_fs, train, test, ct, transformation) if m.benchmark_only and m.shortname in benchmarks_only:
continue
else:
benchmarks_only[m.shortname] = m
job = cluster.submit(m, data_train_fs, train, test, steps, resolution, ct, transformation)
job.id = id # associate an ID to identify jobs (if needed later) job.id = id # associate an ID to identify jobs (if needed later)
jobs.append(job) jobs.append(job)
@ -559,4 +588,4 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
http_server.shutdown() # this waits until browser gets all updates http_server.shutdown() # this waits until browser gets all updates
cluster.close() cluster.close()
return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic)

View File

@ -6,7 +6,7 @@ import pandas as pd
import math import math
from operator import itemgetter from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, SortedCollection from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu, sfts
from pyFTS.benchmarks import arima, quantreg from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations from pyFTS.common import Transformations
import scipy.stats as st import scipy.stats as st
@ -127,24 +127,36 @@ class EnsembleFTS(fts.FTS):
if 'method' in kwargs: if 'method' in kwargs:
self.interval_method = kwargs.get('method','quantile') self.interval_method = kwargs.get('method','quantile')
if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha', self.alpha)
ret = [] ret = []
samples = [[k,k] for k in data[-self.order:]] samples = [[k] for k in data[-self.order:]]
for k in np.arange(self.order, steps+self.order): for k in np.arange(self.order, steps + self.order):
forecasts = [] forecasts = []
sample = samples[k - self.order : k] lags = {}
lo_sample = [i[0] for i in sample] for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
up_sample = [i[1] for i in sample]
forecasts.extend(self.get_models_forecasts(lo_sample) ) # Build the tree with all possible paths
forecasts.extend(self.get_models_forecasts(up_sample))
root = tree.FLRGTreeNode(None)
tree.buildTreeWithoutOrder(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
forecasts.extend(self.get_models_forecasts(path))
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.2)))
interval = self.get_interval(forecasts) interval = self.get_interval(forecasts)
if len(interval) == 1: if len(interval) == 1:
interval = interval[0] interval = interval[0]
ret.append(interval) ret.append(interval)
samples.append(interval)
return ret return ret
@ -183,7 +195,7 @@ class EnsembleFTS(fts.FTS):
forecasts.extend(self.get_models_forecasts(path)) forecasts.extend(self.get_models_forecasts(path))
samples.append(sampler(forecasts, [0.05, 0.25, 0.5, 0.75, 0.95 ])) samples.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
grid = self.gridCountPoint(grid, resolution, index, forecasts) grid = self.gridCountPoint(grid, resolution, index, forecasts)
@ -197,7 +209,7 @@ class EnsembleFTS(fts.FTS):
class AllMethodEnsembleFTS(EnsembleFTS): class AllMethodEnsembleFTS(EnsembleFTS):
def __init__(self, **kwargs): def __init__(self, name, **kwargs):
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs) super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs)
self.min_order = 3 self.min_order = 3
@ -210,7 +222,7 @@ class AllMethodEnsembleFTS(EnsembleFTS):
self.original_min = min(data) self.original_min = min(data)
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS] sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS, sfts.SeasonalFTS]
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS] ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
@ -227,3 +239,5 @@ class AllMethodEnsembleFTS(EnsembleFTS):
self.set_transformations(model) self.set_transformations(model)
model.train(data, sets, order=o) model.train(data, sets, order=o)
self.appendModel(model) self.appendModel(model)

View File

@ -25,7 +25,7 @@ passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"]) passengers = np.array(passengers["Passengers"])
e = ensemble.AllMethodEnsembleFTS() e = ensemble.AllMethodEnsembleFTS(alpha=0.25, point_method="median", interval_method='quantile')
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS,
ismailefendi.ImprovedWeightedFTS] ismailefendi.ImprovedWeightedFTS]
@ -99,24 +99,28 @@ print(_normal)
""" """
#""" #"""
#_extremum = e.forecastAheadInterval(passengers, 10, method="extremum") _extremum = e.forecastAheadInterval(passengers, 10, method="extremum")
#print(_extremum) print(_extremum)
#_quantile = e.forecastAheadInterval(passengers[:50], 40, method="quantile", alpha=0.25) _quantile = e.forecastAheadInterval(passengers[:50], 10, method="quantile", alpha=0.05)
#print(_quantile) print(_quantile)
_quantile = e.forecastAheadInterval(passengers[:50], 10, method="quantile", alpha=0.25)
print(_quantile)
#_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25) _normal = e.forecastAheadInterval(passengers[:50], 10, method="normal", alpha=0.05)
#print(_normal) print(_normal)
_normal = e.forecastAheadInterval(passengers[:50], 10, method="normal", alpha=0.25)
print(_normal)
#""" #"""
#dist = e.forecastAheadDistribution(passengers, 20) #dist = e.forecastAheadDistribution(passengers, 20)
#print(dist) #print(dist)
bchmk.plot_compared_intervals_ahead(passengers[:120],[e], ['blue','red'], #bchmk.plot_compared_intervals_ahead(passengers[:120],[e], ['blue','red'],
distributions=[True,False], save=True, file="pictures/distribution_ahead_arma", # distributions=[True,False], save=True, file="pictures/distribution_ahead_arma",
time_from=60, time_to=10, tam=[12,5]) # time_from=60, time_to=10, tam=[12,5])

View File

@ -28,8 +28,8 @@ diff = Transformations.Differential(1)
DATASETS DATASETS
""" """
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",") #passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"]) #passengers = np.array(passengers["Passengers"])
#sunspots = pd.read_csv("DataSets/sunspots.csv", sep=",") #sunspots = pd.read_csv("DataSets/sunspots.csv", sep=",")
#sunspots = np.array(sunspots["SUNACTIVITY"]) #sunspots = np.array(sunspots["SUNACTIVITY"])
@ -37,8 +37,8 @@ passengers = np.array(passengers["Passengers"])
#gauss = random.normal(0,1.0,5000) #gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400) #gauss_teste = random.normal(0,1.0,400)
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000]) taiex = np.array(taiexpd["avg"][:5000])
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000]) #nasdaq = np.array(nasdaqpd["avg"][0:5000])
@ -59,8 +59,8 @@ passengers = np.array(passengers["Passengers"])
#print(lag) #print(lag)
#print(a) #print(a)
from pyFTS.benchmarks import benchmarks as bchmk #from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk
from pyFTS.benchmarks import Util from pyFTS.benchmarks import Util
from pyFTS.benchmarks import arima, quantreg, Measures from pyFTS.benchmarks import arima, quantreg, Measures
@ -68,7 +68,7 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#Util.cast_dataframe_to_synthetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11) #Util.cast_dataframe_to_synthetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#""" """
arima100 = arima.ARIMA("", alpha=0.25) arima100 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff) #tmp.appendTransformation(diff)
arima100.train(passengers, None, order=(1,0,0)) arima100.train(passengers, None, order=(1,0,0))
@ -137,6 +137,18 @@ bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.Weigh
#""" #"""
bchmk.ahead_sliding_window(taiex, 2000, steps=10, resolution=100, train=0.8, inc=0.1,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(10,200,step=10),
dump=True, save=True, file="experiments/taiex_ahead_analytic.csv",
nodes=['192.168.0.105', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.ahead_sliding_window(taiex, 2000, steps=10, resolution=100, train=0.8, inc=0.1,
partitioners=[Grid.GridPartitioner],
partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/taiex_ahead_analytic_diff.csv",
nodes=['192.168.0.105', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
""" """
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS import pwfts from pyFTS import pwfts