- Several bugfixes in benchmarks methods and optimizations

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-08 14:49:45 -03:00
parent ff67356d64
commit 692503704e
6 changed files with 93 additions and 121 deletions

View File

@ -10,7 +10,7 @@ class ARIMA(fts.FTS):
""" """
Façade for statsmodels.tsa.arima_model Façade for statsmodels.tsa.arima_model
""" """
def __init__(self, order, **kwargs): def __init__(self, name, **kwargs):
super(ARIMA, self).__init__(1, "ARIMA") super(ARIMA, self).__init__(1, "ARIMA")
self.name = "ARIMA" self.name = "ARIMA"
self.detail = "Auto Regressive Integrated Moving Average" self.detail = "Auto Regressive Integrated Moving Average"
@ -24,34 +24,41 @@ class ARIMA(fts.FTS):
self.benchmark_only = True self.benchmark_only = True
self.min_order = 1 self.min_order = 1
def train(self, data, sets, order=1, parameters=None): def train(self, data, sets, order=(2,1,1), parameters=None):
if parameters is not None: self.p = order[0]
self.p = parameters[0] self.d = order[1]
self.d = parameters[1] self.q = order[2]
self.q = parameters[2] self.order = max([self.p, self.d, self.q])
self.order = max([self.p, self.d, self.q]) self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")"
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")"
old_fit = self.model_fit old_fit = self.model_fit
self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model = stats_arima(data, order=(self.p, self.d, self.q))
try: self.model_fit = self.model.fit(disp=0)
self.model_fit = self.model.fit(disp=0)
except:
try:
self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=1)
except:
self.model_fit = old_fit
self.trained_data = data #.tolist() def ar(self, data):
return data.dot(self.model_fit.arparams)
def ma(self, data):
return data.dot(self.model_fit.maparams)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
if self.model_fit is None: if self.model_fit is None:
return np.nan return np.nan
ndata = np.array(self.doTransformations(data))
l = len(ndata)
ret = [] ret = []
for t in data:
output = self.model_fit.forecast() ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l)])
ret.append( output[0] )
self.trained_data = np.append(self.trained_data, t) #.append(t) residuals = np.array([ar[k - self.p] - ndata[k] for k in np.arange(self.p, l)])
self.train(self.trained_data,None,order=self.order, parameters=(self.p, self.d, self.q))
ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(ar) + 1)])
ret = ar + ma
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
return ret return ret

View File

@ -60,74 +60,10 @@ def get_probabilistic_methods():
return [quantreg.QuantileRegression, ensemble.EnsembleFTS, pwfts.ProbabilisticWeightedFTS] return [quantreg.QuantileRegression, ensemble.EnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def external_point_sliding_window(models, parameters, data, windowsize,train=0.8, dump=False,
save=False, file=None, sintetic=True):
"""
Sliding window benchmarks for non FTS point forecasters
:param models: non FTS point forecasters
:param parameters: parameters for each model
:param data: data set
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param dump:
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
"""
objs = {}
lcolors = {}
rmse = {}
smape = {}
u = {}
times = {}
experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train):
experiments += 1
for count, method in enumerate(models, start=0):
model = method("")
_start = time.time()
model.train(train, None, parameters=parameters[count])
_end = time.time()
_key = model.shortname
if dump: print(ct, _key)
if _key not in objs:
objs[_key] = model
lcolors[_key] = colors[count % ncol]
rmse[_key] = []
smape[_key] = []
u[_key] = []
times[_key] = []
_tdiff = _end - _start
try:
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test, model, None)
_end = time.time()
rmse[_key].append(_rmse)
smape[_key].append(_smape)
u[_key].append(_u)
_tdiff += _end - _start
times[_key].append(_tdiff)
if dump: print(_rmse, _smape, _u, _tdiff)
except:
rmse[_key].append(np.nan)
smape[_key].append(np.nan)
u[_key].append(np.nan)
times[_key].append(np.nan)
return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner], def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3,transformation=None,indexer=None,dump=False, partitions=[10], max_order=3,transformation=None,indexer=None,dump=False,
save=False, file=None, sintetic=True): benchmark_models=None, benchmark_models_parameters = None,
save=False, file=None, sintetic=True):
""" """
Sliding window benchmarks for FTS point forecasters Sliding window benchmarks for FTS point forecasters
:param data: :param data:
@ -153,7 +89,6 @@ def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[G
if models is None: if models is None:
models = get_point_methods() models = get_point_methods()
objs = {} objs = {}
lcolors = {} lcolors = {}
rmse = {} rmse = {}

View File

@ -13,7 +13,7 @@ import dispy
import dispy.httpd import dispy.httpd
import numpy as np import numpy as np
from pyFTS.benchmarks import benchmarks, Util as bUtil from pyFTS.benchmarks import benchmarks, Util as bUtil, naive, quantreg, arima
from pyFTS.common import Util from pyFTS.common import Util
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
@ -31,22 +31,28 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
:return: a dictionary with the benchmark results :return: a dictionary with the benchmark results
""" """
import time import time
from pyFTS import yu,chen,hofts,ifts,pwfts,ismailefendi,sadaei from pyFTS import yu,chen,hofts,ifts,pwfts,ismailefendi,sadaei, song, cheng, hwang
from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures from pyFTS.benchmarks import Measures, naive, arima, quantreg
tmp = [yu.WeightedFTS, chen.ConventionalFTS, hofts.HighOrderFTS, ifts.IntervalFTS, tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
pwfts.ProbabilisticWeightedFTS, ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS] cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner] tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
tmp3 = [Measures.get_point_statistics] tmp3 = [Measures.get_point_statistics]
pttr = str(partitioner.__module__).split('.')[-1] if mfts.benchmark_only:
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
mfts.partitioner = partitioner else:
if transformation is not None: pttr = str(partitioner.__module__).split('.')[-1]
mfts.appendTransformation(transformation) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
if transformation is not None:
mfts.appendTransformation(transformation)
_start = time.time() _start = time.time()
mfts.train(train_data, partitioner.sets, order=mfts.order) mfts.train(train_data, partitioner.sets, order=mfts.order)
@ -65,6 +71,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner], def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
benchmark_models=None, benchmark_models_parameters = None,
save=False, file=None, sintetic=False,nodes=None, depends=None): save=False, file=None, sintetic=False,nodes=None, depends=None):
""" """
Distributed sliding window benchmarks for FTS point forecasters Distributed sliding window benchmarks for FTS point forecasters
@ -78,6 +85,8 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
:param transformation: data transformation :param transformation: data transformation
:param indexer: seasonal indexer :param indexer: seasonal indexer
:param dump: :param dump:
:param benchmark_models: Non FTS models to benchmark
:param benchmark_models_parameters: Non FTS models parameters
:param save: save results :param save: save results
:param file: file path to save the results :param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results :param sintetic: if true only the average and standard deviation of the results
@ -86,6 +95,13 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
:return: DataFrame with the results :return: DataFrame with the results
""" """
if benchmark_models is None:
benchmark_models = [naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
quantreg.QuantileRegression, quantreg.QuantileRegression]
if benchmark_models_parameters is None:
benchmark_models_parameters = [None, (1, 0, 1), (1, 1, 1), (2, 1, 1), (2, 1, 2), 1, 2]
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies) cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster) http_server = dispy.httpd.DispyHTTPServer(cluster)
@ -116,11 +132,20 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
pool.append(mfts) pool.append(mfts)
else: else:
pool.append(mfts) pool.append(mfts)
mfts.order = 1
pool.append(mfts)
for count, model in enumerate(benchmark_models, start=0):
mfts = model("")
mfts.order = benchmark_models_parameters[count]
pool.append(mfts)
experiments = 0 experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train): for ct, train, test in Util.sliding_window(data, windowsize, train):
experiments += 1 experiments += 1
benchmarks_only = {}
if dump: print('\nWindow: {0}\n'.format(ct)) if dump: print('\nWindow: {0}\n'.format(ct))
for partition in partitions: for partition in partitions:
@ -129,9 +154,13 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
data_train_fs = partitioner(train, partition, transformation=transformation) data_train_fs = partitioner(train, partition, transformation=transformation)
for id, m in enumerate(pool,start=0): for _id, m in enumerate(pool,start=0):
if m.benchmark_only and m.shortname in benchmarks_only:
continue
else:
benchmarks_only[m.shortname] = m
job = cluster.submit(m, data_train_fs, train, test, ct, transformation) job = cluster.submit(m, data_train_fs, train, test, ct, transformation)
job.id = id # associate an ID to identify jobs (if needed later) job.id = _id # associate an ID to identify jobs (if needed later)
jobs.append(job) jobs.append(job)
for job in jobs: for job in jobs:

View File

@ -6,7 +6,7 @@ from pyFTS import fts
class Naive(fts.FTS): class Naive(fts.FTS):
"""Naïve Forecasting method""" """Naïve Forecasting method"""
def __init__(self, order, name, **kwargs): def __init__(self, name, **kwargs):
super(Naive, self).__init__(1, "Naive " + name) super(Naive, self).__init__(1, "Naive " + name)
self.name = "Naïve Model" self.name = "Naïve Model"
self.detail = "Naïve Model" self.detail = "Naïve Model"

View File

@ -9,7 +9,7 @@ from pyFTS import fts
class QuantileRegression(fts.FTS): class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression""" """Façade for statsmodels.regression.quantile_regression"""
def __init__(self, order, **kwargs): def __init__(self, name, **kwargs):
super(QuantileRegression, self).__init__(1, "QR") super(QuantileRegression, self).__init__(1, "QR")
self.name = "QR" self.name = "QR"
self.detail = "Quantile Regression" self.detail = "Quantile Regression"
@ -27,19 +27,21 @@ class QuantileRegression(fts.FTS):
def train(self, data, sets, order=1, parameters=None): def train(self, data, sets, order=1, parameters=None):
self.order = order self.order = order
if parameters is not None: self.alpha = parameters
self.alpha = parameters
tmp = np.array(self.doTransformations(data)) tmp = np.array(self.doTransformations(data))
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep') lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
uqt = QuantReg(ndata, lagdata).fit(1 - self.alpha)
mqt = QuantReg(ndata, lagdata).fit(0.5)
lqt = QuantReg(ndata, lagdata).fit(self.alpha)
self.upper_qt = [uqt.params[k] for k in uqt.params.keys()] mqt = QuantReg(ndata, lagdata).fit(0.5)
self.mean_qt = [mqt.params[k] for k in mqt.params.keys()] if self.alpha is not None:
self.lower_qt = [lqt.params[k] for k in lqt.params.keys()] uqt = QuantReg(ndata, lagdata).fit(1 - self.alpha)
lqt = QuantReg(ndata, lagdata).fit(self.alpha)
self.mean_qt = [k for k in mqt.params]
if self.alpha is not None:
self.upper_qt = [uqt.params[k] for k in uqt.params.keys()]
self.lower_qt = [lqt.params[k] for k in lqt.params.keys()]
def linearmodel(self,data,params): def linearmodel(self,data,params):
return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ]) return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ])

View File

@ -38,9 +38,8 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#gauss = random.normal(0,1.0,5000) #gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400) #gauss_teste = random.normal(0,1.0,400)
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") taiex = np.array(taiexpd["avg"][:5000])
#taiex = np.array(taiexpd["avg"][:5000])
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000]) #nasdaq = np.array(nasdaqpd["avg"][0:5000])
@ -62,7 +61,7 @@ from pyFTS.benchmarks import Util
#Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11) #Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#tmp = arima.ARIMA("") #tmp = arima.ARIMA("")
#tmp.train(taiex[:1600],None,parameters=(2,0,1)) #tmp.train(taiex[:1600],None,parameters=(2,0,1))
@ -72,11 +71,11 @@ Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/ta
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
#bchmk.point_sliding_window(gauss,2000,train=0.8, #models=[yu.WeightedFTS], # # bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
# partitions= np.arange(3,10,step=1), #transformation=diff, partitions= np.arange(10,200,step=10), #transformation=diff,
# dump=True, save=True, file="experiments/gauss_point_distributed.csv", dump=True, save=True, file="experiments/taiex_point_analytic.csv",
# nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101']) #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])