- Bugfix on interval forecast of arima

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-14 08:54:41 -03:00
parent a0f49ea151
commit 0d8d6c9240
4 changed files with 34 additions and 26 deletions

View File

@ -26,7 +26,8 @@ class ARIMA(fts.FTS):
self.q = 0 self.q = 0
self.benchmark_only = True self.benchmark_only = True
self.min_order = 1 self.min_order = 1
self.alpha = (1 - kwargs.get("alpha", 0.90))/2 self.alpha = kwargs.get("alpha", 0.05)
self.shortname += str(self.alpha)
def train(self, data, sets, order, parameters=None): def train(self, data, sets, order, parameters=None):
self.p = order[0] self.p = order[0]
@ -35,6 +36,8 @@ class ARIMA(fts.FTS):
self.order = self.p + self.q self.order = self.p + self.q
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
data = self.doTransformations(data, updateUoD=True)
old_fit = self.model_fit old_fit = self.model_fit
try: try:
self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model = stats_arima(data, order=(self.p, self.d, self.q))
@ -85,25 +88,28 @@ class ARIMA(fts.FTS):
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.doTransformations(data)) #ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(data)
ret = [] ret = []
for k in np.arange(self.order, l+1): for k in np.arange(self.order, l+1):
tmp = [] tmp = []
sample = [ndata[i] for i in np.arange(k - self.order, k)] sample = [data[i] for i in np.arange(k - self.order, k)]
mean = self.forecast(sample)[0] mean = self.forecast(sample)
if isinstance(mean,(list, np.ndarray)):
mean = mean[0]
tmp.append(mean + st.norm.ppf(self.alpha) * sigma) tmp.append(mean + st.norm.ppf(self.alpha) * sigma)
tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma) tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma)
ret.append(tmp) ret.append(tmp)
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) #ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True)
return ret return ret
@ -113,8 +119,6 @@ class ARIMA(fts.FTS):
smoothing = kwargs.get("smoothing",0.2) smoothing = kwargs.get("smoothing",0.2)
alpha = (1 - kwargs.get("alpha", 0.95))/2
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
@ -130,8 +134,8 @@ class ARIMA(fts.FTS):
hsigma = (1 + k*smoothing)*sigma hsigma = (1 + k*smoothing)*sigma
tmp.append(means[k] + st.norm.ppf(alpha) * hsigma) tmp.append(means[k] + st.norm.ppf(self.alpha) * hsigma)
tmp.append(means[k] + st.norm.ppf(1 - alpha) * hsigma) tmp.append(means[k] + st.norm.ppf(1 - self.alpha) * hsigma)
ret.append(tmp) ret.append(tmp)

View File

@ -282,7 +282,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None,
:return: DataFrame with the results :return: DataFrame with the results
""" """
alphas = [0.5, 0.25] alphas = [0.05, 0.25]
if benchmark_models is None and models is None: if benchmark_models is None and models is None:
benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,

View File

@ -10,7 +10,7 @@ from pyFTS import fts
class QuantileRegression(fts.FTS): class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression""" """Façade for statsmodels.regression.quantile_regression"""
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(QuantileRegression, self).__init__(1, "QR"+name) super(QuantileRegression, self).__init__(1, "")
self.name = "QR" self.name = "QR"
self.detail = "Quantile Regression" self.detail = "Quantile Regression"
self.is_high_order = True self.is_high_order = True
@ -23,11 +23,12 @@ class QuantileRegression(fts.FTS):
self.upper_qt = None self.upper_qt = None
self.mean_qt = None self.mean_qt = None
self.lower_qt = None self.lower_qt = None
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
def train(self, data, sets, order=1, parameters=None): def train(self, data, sets, order=1, parameters=None):
self.order = order self.order = order
tmp = np.array(self.doTransformations(data)) tmp = np.array(self.doTransformations(data, updateUoD=True))
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep') lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')

View File

@ -22,6 +22,8 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") #enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
#enrollments = np.array(enrollments["Enrollments"]) #enrollments = np.array(enrollments["Enrollments"])
diff = Transformations.Differential(1)
""" """
DATASETS DATASETS
""" """
@ -60,25 +62,26 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#tmp = arima.ARIMA("") tmp = arima.ARIMA("", alpha=0.25)
#tmp.train(taiex[:1600], None, order=(2,0,2)) #tmp.appendTransformation(diff)
#teste = tmp.forecastInterval(taiex[1600:1605]) tmp.train(nasdaq[:1600], None, order=(2,0,2))
teste = tmp.forecastInterval(nasdaq[1600:1604])
""" """
tmp = quantreg.QuantileRegression("", alpha=0.25) tmp = quantreg.QuantileRegression("", alpha=0.25)
tmp.train(taiex[:1600], None, order=1) tmp.train(taiex[:1600], None, order=1)
teste = tmp.forecastInterval(taiex[1600:1605]) teste = tmp.forecastInterval(taiex[1600:1605])
"""
print(taiex[1600:1605]) print(nasdaq[1600:1605])
print(teste) print(teste)
kk = Measures.get_interval_statistics(taiex[1600:1605], tmp) kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
print(kk) print(kk)
"""
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
diff = Transformations.Differential(1)
""" """
bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4,#models=[yu.WeightedFTS], # # bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4,#models=[yu.WeightedFTS], # #
@ -95,23 +98,23 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted
dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv", dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
""" """
#""" """
bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # # bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(10,200,step=10), #transformation=diff, partitions= np.arange(10,200,step=10), #transformation=diff,
dump=True, save=True, file="experiments/nasdaq_interval_analytic.csv", dump=True, save=True, file="experiments/taiex_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # # bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), #transformation=diff, partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/nasdaq_interval_analytic_diff.csv", dump=True, save=True, file="experiments/nasdaq_interval_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
#""" """
""" """
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid