From d804e15211b1ab1ffe0a113ab64e0acf488f2f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Thu, 13 Apr 2017 12:36:22 -0300 Subject: [PATCH] =?UTF-8?q?=20-=20Refactoring=20of=20ARIMA=20fa=C3=A7ade?= =?UTF-8?q?=20for=20statsmodels=20=20-=20QuantReg=20fa=C3=A7ade=20for=20st?= =?UTF-8?q?atsmodels=20=20-=20EnsembleFTS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/arima.py | 50 ++++++++++++++++------- benchmarks/quantreg.py | 24 +++++++++++ ensemble.py | 93 ++++++++++++++++++++++++++++++++++++++++++ tests/general.py | 42 +++++++++++++++---- 4 files changed, 187 insertions(+), 22 deletions(-) create mode 100644 benchmarks/quantreg.py create mode 100644 ensemble.py diff --git a/benchmarks/arima.py b/benchmarks/arima.py index 358649b..8719a38 100644 --- a/benchmarks/arima.py +++ b/benchmarks/arima.py @@ -3,6 +3,7 @@ import numpy as np from statsmodels.tsa.arima_model import ARIMA as stats_arima +from statsmodels.tsa.arima_model import ARMA from pyFTS import fts @@ -31,24 +32,43 @@ class ARIMA(fts.FTS): old_fit = self.model_fit self.model = stats_arima(data, order=(self.p, self.d, self.q)) - try: - self.model_fit = self.model.fit(disp=0) - except: - try: - self.model = stats_arima(data, order=(self.p, self.d, self.q)) - self.model_fit = self.model.fit(disp=1) - except: - self.model_fit = old_fit + #try: + self.model_fit = self.model.fit(disp=0) + #except: + # try: + # self.model = stats_arima(data, order=(self.p, self.d, self.q)) + # self.model_fit = self.model.fit(disp=1) + # except: + # self.model_fit = old_fit - self.trained_data = data #.tolist() + #self.trained_data = data #.tolist() + + def ar(self, data): + return data.dot(self.model_fit.arparams) + + def ma(self, data): + return data.dot(self.model_fit.maparams) def forecast(self, data): if self.model_fit is None: return np.nan + + order = self.p + + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + ret = [] - for t in data: - output = self.model_fit.forecast() - ret.append( output[0] ) - self.trained_data = np.append(self.trained_data, t) #.append(t) - self.train(self.trained_data,None,order=self.order, parameters=(self.p, self.d, self.q)) - return ret \ No newline at end of file + + ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l)]) + + residuals = np.array([ar[k - self.p] - ndata[k] for k in np.arange(self.p, l)]) + + ma = np.array([self.ma(residuals[k - self.q : k]) for k in np.arange(self.q, len(ar)+1)]) + + ret = ar + ma + + ret = self.doInverseTransformations(ret, params=[data[order - 1:]]) + + return ret diff --git a/benchmarks/quantreg.py b/benchmarks/quantreg.py new file mode 100644 index 0000000..8c0903e --- /dev/null +++ b/benchmarks/quantreg.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import numpy as np +from statsmodels.regression.quantile_regression import QuantReg +from pyFTS import fts + + +class QuantileRegression(fts.FTS): + def __init__(self, name): + super(QuantileRegression, self).__init__(1, "QR") + self.name = "QR" + self.detail = "Quantile Regression" + self.isHighOrder = True + self.hasIntervalForecasting = True + self.benchmark_only = True + self.minOrder = 1 + self.alpha = 0.5 + + def train(self, data, sets, order=1, parameters=None): + pass + + def forecast(self, data): + pass diff --git a/ensemble.py b/ensemble.py new file mode 100644 index 0000000..c0d0fc7 --- /dev/null +++ b/ensemble.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import numpy as np +import pandas as pd +import math +from operator import itemgetter +from pyFTS.common import FLR, FuzzySet, SortedCollection +from pyFTS import fts + +class EnsembleFTS(fts.FTS): + def __init__(self, name, update=True): + super(EnsembleFTS, self).__init__("Ensemble FTS") + self.shortname = "Ensemble FTS " + name + self.name = "Ensemble FTS" + self.flrgs = {} + self.hasPointForecasting = True + self.hasIntervalForecasting = True + self.hasDistributionForecasting = True + self.isHighOrder = True + self.models = [] + self.parameters = [] + + def train(self, data, sets, order=1,parameters=None): + + pass + + def forecast(self, data): + + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + + ret = [] + + for k in np.arange(self.order - 1, l): + pass + + ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) + + return ret + + def forecastInterval(self, data): + + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + + ret = [] + + for k in np.arange(self.order - 1, l): + pass + + return ret + + def forecastAhead(self, data, steps): + pass + + def forecastAheadInterval(self, data, steps): + pass + + + def getGridClean(self, resolution): + grid = {} + + if len(self.transformations) == 0: + _min = self.sets[0].lower + _max = self.sets[-1].upper + else: + _min = self.original_min + _max = self.original_max + + for sbin in np.arange(_min,_max, resolution): + grid[sbin] = 0 + + return grid + + def gridCount(self, grid, resolution, index, interval): + #print(interval) + for k in index.inside(interval[0],interval[1]): + #print(k) + grid[k] += 1 + return grid + + def gridCountPoint(self, grid, resolution, index, point): + k = index.find_ge(point) + # print(k) + grid[k] += 1 + return grid + + def forecastAheadDistribution(self, data, steps, resolution, parameters=2): + pass + diff --git a/tests/general.py b/tests/general.py index 32bf43f..7557062 100644 --- a/tests/general.py +++ b/tests/general.py @@ -28,18 +28,46 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") taiex = np.array(taiexpd["avg"][:5000]) -from pyFTS.benchmarks import distributed_benchmarks as bchmk +from statsmodels.tsa.arima_model import ARIMA as stats_arima + +model = stats_arima(taiex[:1600], (2,0,1)).fit(disp=0) + +ar = np.array(taiex[1598:1600]).dot( model.arparams ) + +#print(ar) + +res = ar - taiex[1600] + +#print(res) + +ma = np.array([res]).dot(model.maparams) + +#print(ma) + +print(ar + ma) +print(taiex[1598:1601]) +print(taiex[1600]) + + +#from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import benchmarks as bchmk -from pyFTS import yu +from pyFTS.benchmarks import arima + + +tmp = arima.ARIMA("") +tmp.train(taiex[:1600],None,parameters=(2,0,1)) +teste = tmp.forecast(taiex[1598:1601]) + +print(teste) #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) -bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # - partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], - partitions= np.arange(10,200,step=5), #transformation=diff, - dump=False, save=True, file="experiments/nasdaq_point_distributed.csv", - nodes=['192.168.0.109', '192.168.0.101']) #, depends=[hofts, ifts]) +#bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # +# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], +# partitions= np.arange(10,200,step=5), #transformation=diff, +# dump=False, save=True, file="experiments/nasdaq_point_distributed.csv", +# nodes=['192.168.0.109', '192.168.0.101']) #, depends=[hofts, ifts]) #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])