2017-03-03 15:53:55 +04:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf8 -*-
|
|
|
|
|
|
|
|
import numpy as np
|
2017-05-15 21:06:26 +04:00
|
|
|
import pandas as pd
|
2017-03-03 15:53:55 +04:00
|
|
|
from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
2017-05-14 04:03:49 +04:00
|
|
|
import scipy.stats as st
|
2017-03-03 15:53:55 +04:00
|
|
|
from pyFTS import fts
|
2017-05-15 21:06:26 +04:00
|
|
|
from pyFTS.common import SortedCollection
|
2017-03-03 15:53:55 +04:00
|
|
|
|
|
|
|
|
|
|
|
class ARIMA(fts.FTS):
|
2017-05-02 18:32:03 +04:00
|
|
|
"""
|
|
|
|
Façade for statsmodels.tsa.arima_model
|
|
|
|
"""
|
2017-05-08 21:49:45 +04:00
|
|
|
def __init__(self, name, **kwargs):
|
2017-05-08 22:20:16 +04:00
|
|
|
super(ARIMA, self).__init__(1, "ARIMA"+name)
|
2017-03-03 15:53:55 +04:00
|
|
|
self.name = "ARIMA"
|
|
|
|
self.detail = "Auto Regressive Integrated Moving Average"
|
2017-05-02 18:32:03 +04:00
|
|
|
self.is_high_order = True
|
2017-05-14 04:03:49 +04:00
|
|
|
self.has_point_forecasting = True
|
|
|
|
self.has_interval_forecasting = True
|
2017-05-17 17:45:10 +04:00
|
|
|
self.has_probability_forecasting = True
|
2017-03-03 15:53:55 +04:00
|
|
|
self.model = None
|
|
|
|
self.model_fit = None
|
|
|
|
self.trained_data = None
|
|
|
|
self.p = 1
|
|
|
|
self.d = 0
|
|
|
|
self.q = 0
|
|
|
|
self.benchmark_only = True
|
2017-05-02 18:32:03 +04:00
|
|
|
self.min_order = 1
|
2017-05-14 15:54:41 +04:00
|
|
|
self.alpha = kwargs.get("alpha", 0.05)
|
|
|
|
self.shortname += str(self.alpha)
|
2017-03-03 15:53:55 +04:00
|
|
|
|
2017-05-09 00:50:35 +04:00
|
|
|
def train(self, data, sets, order, parameters=None):
|
2017-05-08 21:49:45 +04:00
|
|
|
self.p = order[0]
|
|
|
|
self.d = order[1]
|
|
|
|
self.q = order[2]
|
2017-05-09 00:50:35 +04:00
|
|
|
self.order = self.p + self.q
|
2017-05-14 08:19:49 +04:00
|
|
|
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
|
2017-03-03 15:53:55 +04:00
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
data = self.doTransformations(data, updateUoD=True)
|
|
|
|
|
2017-03-03 15:53:55 +04:00
|
|
|
old_fit = self.model_fit
|
2017-05-09 17:27:47 +04:00
|
|
|
try:
|
|
|
|
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
|
|
|
self.model_fit = self.model.fit(disp=0)
|
2017-05-10 02:03:53 +04:00
|
|
|
except Exception as ex:
|
|
|
|
print(ex)
|
2017-05-09 17:27:47 +04:00
|
|
|
self.model_fit = None
|
2017-04-15 02:57:39 +04:00
|
|
|
|
2017-05-08 21:49:45 +04:00
|
|
|
def ar(self, data):
|
|
|
|
return data.dot(self.model_fit.arparams)
|
|
|
|
|
|
|
|
def ma(self, data):
|
|
|
|
return data.dot(self.model_fit.maparams)
|
2017-04-15 02:57:39 +04:00
|
|
|
|
|
|
|
def forecast(self, data, **kwargs):
|
2017-03-22 06:17:06 +04:00
|
|
|
if self.model_fit is None:
|
|
|
|
return np.nan
|
2017-05-08 21:49:45 +04:00
|
|
|
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
|
|
|
l = len(ndata)
|
|
|
|
|
2017-03-03 15:53:55 +04:00
|
|
|
ret = []
|
2017-05-08 21:49:45 +04:00
|
|
|
|
2017-05-09 00:50:35 +04:00
|
|
|
if self.d == 0:
|
2017-05-10 02:03:53 +04:00
|
|
|
ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags
|
2017-05-09 00:50:35 +04:00
|
|
|
else:
|
2017-05-10 02:03:53 +04:00
|
|
|
ar = np.array([ndata[k] + self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)])
|
2017-05-08 21:49:45 +04:00
|
|
|
|
2017-05-09 00:50:35 +04:00
|
|
|
if self.q > 0:
|
|
|
|
residuals = np.array([ndata[k] - ar[k - self.p] for k in np.arange(self.p, l)])
|
2017-05-08 21:49:45 +04:00
|
|
|
|
2017-05-10 02:03:53 +04:00
|
|
|
ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals)+1)])
|
2017-05-08 21:49:45 +04:00
|
|
|
|
2017-05-09 00:50:35 +04:00
|
|
|
ret = ar[self.q:] + ma
|
|
|
|
else:
|
|
|
|
ret = ar
|
2017-05-08 21:49:45 +04:00
|
|
|
|
|
|
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
|
|
|
|
2017-05-14 04:03:49 +04:00
|
|
|
return ret
|
|
|
|
|
|
|
|
def forecastInterval(self, data, **kwargs):
|
|
|
|
|
|
|
|
if self.model_fit is None:
|
|
|
|
return np.nan
|
|
|
|
|
|
|
|
sigma = np.sqrt(self.model_fit.sigma2)
|
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
#ndata = np.array(self.doTransformations(data))
|
2017-05-14 04:03:49 +04:00
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
l = len(data)
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
|
|
|
for k in np.arange(self.order, l+1):
|
|
|
|
tmp = []
|
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
sample = [data[i] for i in np.arange(k - self.order, k)]
|
2017-05-14 04:03:49 +04:00
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
mean = self.forecast(sample)
|
|
|
|
|
|
|
|
if isinstance(mean,(list, np.ndarray)):
|
|
|
|
mean = mean[0]
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
tmp.append(mean + st.norm.ppf(self.alpha) * sigma)
|
|
|
|
tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma)
|
|
|
|
|
|
|
|
ret.append(tmp)
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
#ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], point_to_interval=True)
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
|
|
|
if self.model_fit is None:
|
|
|
|
return np.nan
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
smoothing = kwargs.get("smoothing",0.5)
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
sigma = np.sqrt(self.model_fit.sigma2)
|
|
|
|
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
|
|
|
l = len(ndata)
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
|
|
|
for k in np.arange(0, steps):
|
|
|
|
tmp = []
|
|
|
|
|
|
|
|
hsigma = (1 + k*smoothing)*sigma
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
tmp.append(nmeans[k] + st.norm.ppf(self.alpha) * hsigma)
|
|
|
|
tmp.append(nmeans[k] + st.norm.ppf(1 - self.alpha) * hsigma)
|
2017-05-14 04:03:49 +04:00
|
|
|
|
|
|
|
ret.append(tmp)
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0,steps)]], interval=True)
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
def empty_grid(self, resolution):
|
|
|
|
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
def forecastAheadDistribution(self, data, steps, **kwargs):
|
|
|
|
smoothing = kwargs.get("smoothing", 0.5)
|
|
|
|
|
|
|
|
sigma = np.sqrt(self.model_fit.sigma2)
|
|
|
|
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
|
|
|
l = len(ndata)
|
|
|
|
|
|
|
|
percentile_size = (self.original_max - self.original_min)/100
|
|
|
|
|
|
|
|
resolution = kwargs.get('resolution', percentile_size)
|
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
grid = self.empty_grid(resolution)
|
2017-05-15 21:06:26 +04:00
|
|
|
|
|
|
|
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
|
|
|
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
|
|
|
|
|
|
|
for k in np.arange(0, steps):
|
2017-05-17 17:45:10 +04:00
|
|
|
grid = self.empty_grid(resolution)
|
2017-05-15 21:06:26 +04:00
|
|
|
for alpha in np.arange(0.05, 0.5, 0.05):
|
|
|
|
tmp = []
|
|
|
|
|
|
|
|
hsigma = (1 + k * smoothing) * sigma
|
|
|
|
|
|
|
|
tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
|
|
|
|
tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
|
|
|
|
|
|
|
|
grid = self.gridCount(grid, resolution, index, tmp)
|
|
|
|
|
|
|
|
tmp = np.array([grid[i] for i in sorted(grid)])
|
|
|
|
|
|
|
|
ret.append(tmp / sum(tmp))
|
2017-05-14 04:03:49 +04:00
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
grid = self.empty_grid(resolution)
|
2017-05-15 21:06:26 +04:00
|
|
|
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
|
|
return df
|