From 0d8d6c9240b1364b4ee365b70c28037e8eec27ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Sun, 14 May 2017 08:54:41 -0300 Subject: [PATCH] - Bugfix on interval forecast of arima --- benchmarks/arima.py | 24 +++++++++++++---------- benchmarks/distributed_benchmarks.py | 2 +- benchmarks/quantreg.py | 5 +++-- tests/general.py | 29 +++++++++++++++------------- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/benchmarks/arima.py b/benchmarks/arima.py index 7e7162a..3f07502 100644 --- a/benchmarks/arima.py +++ b/benchmarks/arima.py @@ -26,7 +26,8 @@ class ARIMA(fts.FTS): self.q = 0 self.benchmark_only = True self.min_order = 1 - self.alpha = (1 - kwargs.get("alpha", 0.90))/2 + self.alpha = kwargs.get("alpha", 0.05) + self.shortname += str(self.alpha) def train(self, data, sets, order, parameters=None): self.p = order[0] @@ -35,6 +36,8 @@ class ARIMA(fts.FTS): self.order = self.p + self.q self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) + data = self.doTransformations(data, updateUoD=True) + old_fit = self.model_fit try: self.model = stats_arima(data, order=(self.p, self.d, self.q)) @@ -85,25 +88,28 @@ class ARIMA(fts.FTS): sigma = np.sqrt(self.model_fit.sigma2) - ndata = np.array(self.doTransformations(data)) + #ndata = np.array(self.doTransformations(data)) - l = len(ndata) + l = len(data) ret = [] for k in np.arange(self.order, l+1): tmp = [] - sample = [ndata[i] for i in np.arange(k - self.order, k)] + sample = [data[i] for i in np.arange(k - self.order, k)] - mean = self.forecast(sample)[0] + mean = self.forecast(sample) + + if isinstance(mean,(list, np.ndarray)): + mean = mean[0] tmp.append(mean + st.norm.ppf(self.alpha) * sigma) tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma) ret.append(tmp) - ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) + #ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) return ret @@ -113,8 +119,6 @@ class ARIMA(fts.FTS): smoothing = kwargs.get("smoothing",0.2) - alpha = (1 - kwargs.get("alpha", 0.95))/2 - sigma = np.sqrt(self.model_fit.sigma2) ndata = np.array(self.doTransformations(data)) @@ -130,8 +134,8 @@ class ARIMA(fts.FTS): hsigma = (1 + k*smoothing)*sigma - tmp.append(means[k] + st.norm.ppf(alpha) * hsigma) - tmp.append(means[k] + st.norm.ppf(1 - alpha) * hsigma) + tmp.append(means[k] + st.norm.ppf(self.alpha) * hsigma) + tmp.append(means[k] + st.norm.ppf(1 - self.alpha) * hsigma) ret.append(tmp) diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py index d9c9b3e..68d6997 100644 --- a/benchmarks/distributed_benchmarks.py +++ b/benchmarks/distributed_benchmarks.py @@ -282,7 +282,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, :return: DataFrame with the results """ - alphas = [0.5, 0.25] + alphas = [0.05, 0.25] if benchmark_models is None and models is None: benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, diff --git a/benchmarks/quantreg.py b/benchmarks/quantreg.py index 57d6090..418b87a 100644 --- a/benchmarks/quantreg.py +++ b/benchmarks/quantreg.py @@ -10,7 +10,7 @@ from pyFTS import fts class QuantileRegression(fts.FTS): """Façade for statsmodels.regression.quantile_regression""" def __init__(self, name, **kwargs): - super(QuantileRegression, self).__init__(1, "QR"+name) + super(QuantileRegression, self).__init__(1, "") self.name = "QR" self.detail = "Quantile Regression" self.is_high_order = True @@ -23,11 +23,12 @@ class QuantileRegression(fts.FTS): self.upper_qt = None self.mean_qt = None self.lower_qt = None + self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")" def train(self, data, sets, order=1, parameters=None): self.order = order - tmp = np.array(self.doTransformations(data)) + tmp = np.array(self.doTransformations(data, updateUoD=True)) lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep') diff --git a/tests/general.py b/tests/general.py index ed7abac..645832c 100644 --- a/tests/general.py +++ b/tests/general.py @@ -22,6 +22,8 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") #enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") #enrollments = np.array(enrollments["Enrollments"]) +diff = Transformations.Differential(1) + """ DATASETS """ @@ -60,25 +62,26 @@ from pyFTS.benchmarks import arima, quantreg, Measures #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) -#tmp = arima.ARIMA("") -#tmp.train(taiex[:1600], None, order=(2,0,2)) -#teste = tmp.forecastInterval(taiex[1600:1605]) +tmp = arima.ARIMA("", alpha=0.25) +#tmp.appendTransformation(diff) +tmp.train(nasdaq[:1600], None, order=(2,0,2)) +teste = tmp.forecastInterval(nasdaq[1600:1604]) """ tmp = quantreg.QuantileRegression("", alpha=0.25) tmp.train(taiex[:1600], None, order=1) teste = tmp.forecastInterval(taiex[1600:1605]) - -print(taiex[1600:1605]) +""" +print(nasdaq[1600:1605]) print(teste) -kk = Measures.get_interval_statistics(taiex[1600:1605], tmp) +kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp) print(kk) -""" + #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) -diff = Transformations.Differential(1) + """ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4,#models=[yu.WeightedFTS], # # @@ -95,23 +98,23 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) """ -#""" +""" -bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # # +bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitions= np.arange(10,200,step=10), #transformation=diff, - dump=True, save=True, file="experiments/nasdaq_interval_analytic.csv", + dump=True, save=True, file="experiments/taiex_interval_analytic.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], - partitions= np.arange(3,20,step=2), #transformation=diff, + partitions= np.arange(3,20,step=2), transformation=diff, dump=True, save=True, file="experiments/nasdaq_interval_analytic_diff.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -#""" +""" """ from pyFTS.partitioners import Grid