2017-04-13 19:36:22 +04:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf8 -*-
|
|
|
|
|
|
|
|
import numpy as np
|
2017-05-15 21:06:26 +04:00
|
|
|
import pandas as pd
|
2017-04-13 19:36:22 +04:00
|
|
|
from statsmodels.regression.quantile_regression import QuantReg
|
2017-04-14 00:27:38 +04:00
|
|
|
from statsmodels.tsa.tsatools import lagmat
|
2017-04-13 19:36:22 +04:00
|
|
|
from pyFTS import fts
|
2017-05-15 21:06:26 +04:00
|
|
|
from pyFTS.common import SortedCollection
|
2017-04-13 19:36:22 +04:00
|
|
|
|
|
|
|
|
|
|
|
class QuantileRegression(fts.FTS):
|
2017-05-02 18:32:03 +04:00
|
|
|
"""Façade for statsmodels.regression.quantile_regression"""
|
2017-05-08 21:49:45 +04:00
|
|
|
def __init__(self, name, **kwargs):
|
2017-05-14 15:54:41 +04:00
|
|
|
super(QuantileRegression, self).__init__(1, "")
|
2017-04-13 19:36:22 +04:00
|
|
|
self.name = "QR"
|
|
|
|
self.detail = "Quantile Regression"
|
2017-05-02 18:32:03 +04:00
|
|
|
self.is_high_order = True
|
|
|
|
self.has_point_forecasting = True
|
|
|
|
self.has_interval_forecasting = True
|
2017-05-07 00:04:37 +04:00
|
|
|
self.has_probability_forecasting = True
|
2017-04-13 19:36:22 +04:00
|
|
|
self.benchmark_only = True
|
|
|
|
self.minOrder = 1
|
2017-05-14 04:03:49 +04:00
|
|
|
self.alpha = kwargs.get("alpha", 0.05)
|
2017-05-15 21:06:26 +04:00
|
|
|
self.dist = kwargs.get("dist", False)
|
2017-04-14 00:27:38 +04:00
|
|
|
self.upper_qt = None
|
|
|
|
self.mean_qt = None
|
|
|
|
self.lower_qt = None
|
2017-05-15 21:06:26 +04:00
|
|
|
self.dist_qt = None
|
2017-05-14 15:54:41 +04:00
|
|
|
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
2017-04-13 19:36:22 +04:00
|
|
|
|
|
|
|
def train(self, data, sets, order=1, parameters=None):
|
2017-04-14 00:27:38 +04:00
|
|
|
self.order = order
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-14 15:54:41 +04:00
|
|
|
tmp = np.array(self.doTransformations(data, updateUoD=True))
|
2017-04-14 00:27:38 +04:00
|
|
|
|
|
|
|
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
|
|
|
|
|
2017-05-08 21:49:45 +04:00
|
|
|
mqt = QuantReg(ndata, lagdata).fit(0.5)
|
|
|
|
if self.alpha is not None:
|
|
|
|
uqt = QuantReg(ndata, lagdata).fit(1 - self.alpha)
|
|
|
|
lqt = QuantReg(ndata, lagdata).fit(self.alpha)
|
|
|
|
|
|
|
|
self.mean_qt = [k for k in mqt.params]
|
|
|
|
if self.alpha is not None:
|
2017-05-14 05:32:40 +04:00
|
|
|
self.upper_qt = [k for k in uqt.params]
|
|
|
|
self.lower_qt = [k for k in lqt.params]
|
2017-04-14 00:27:38 +04:00
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
if self.dist:
|
|
|
|
self.dist_qt = []
|
|
|
|
for alpha in np.arange(0.05,0.5,0.05):
|
|
|
|
lqt = QuantReg(ndata, lagdata).fit(alpha)
|
|
|
|
uqt = QuantReg(ndata, lagdata).fit(1 - alpha)
|
|
|
|
lo_qt = [k for k in lqt.params]
|
|
|
|
up_qt = [k for k in uqt.params]
|
|
|
|
self.dist_qt.append([lo_qt, up_qt])
|
|
|
|
|
|
|
|
self.original_min = min(data)
|
|
|
|
self.original_max = max(data)
|
|
|
|
|
2017-05-14 08:19:49 +04:00
|
|
|
self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
|
2017-05-08 22:20:16 +04:00
|
|
|
|
2017-04-14 00:27:38 +04:00
|
|
|
def linearmodel(self,data,params):
|
2017-05-08 22:20:16 +04:00
|
|
|
#return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ])
|
|
|
|
return sum([data[k] * params[k] for k in np.arange(0, self.order)])
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
def point_to_interval(self, data, lo_params, up_params):
|
|
|
|
lo = self.linearmodel(data, lo_params)
|
|
|
|
up = self.linearmodel(data, up_params)
|
|
|
|
return [lo, up]
|
|
|
|
|
|
|
|
def interval_to_interval(self, data, lo_params, up_params):
|
|
|
|
lo = self.linearmodel([k[0] for k in data], lo_params)
|
|
|
|
up = self.linearmodel([k[1] for k in data], up_params)
|
|
|
|
return [lo, up]
|
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecast(self, data, **kwargs):
|
2017-04-14 00:27:38 +04:00
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
l = len(ndata)
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
2017-05-10 02:04:51 +04:00
|
|
|
for k in np.arange(self.order, l+1): #+1 to forecast one step ahead given all available lags
|
2017-04-14 00:27:38 +04:00
|
|
|
sample = ndata[k - self.order : k]
|
|
|
|
|
|
|
|
ret.append(self.linearmodel(sample, self.mean_qt))
|
|
|
|
|
|
|
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecastInterval(self, data, **kwargs):
|
2017-04-14 00:27:38 +04:00
|
|
|
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
|
|
|
l = len(ndata)
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
2017-05-10 02:04:51 +04:00
|
|
|
for k in np.arange(self.order , l):
|
2017-04-14 00:27:38 +04:00
|
|
|
sample = ndata[k - self.order: k]
|
2017-05-15 21:06:26 +04:00
|
|
|
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
|
2017-04-14 00:27:38 +04:00
|
|
|
|
2017-05-14 04:37:10 +04:00
|
|
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True)
|
2017-04-14 00:27:38 +04:00
|
|
|
|
|
|
|
return ret
|
2017-05-15 21:06:26 +04:00
|
|
|
|
|
|
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
smoothing = kwargs.get("smoothing", 0.9)
|
|
|
|
|
2017-05-15 21:06:26 +04:00
|
|
|
l = len(ndata)
|
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
ret = []
|
|
|
|
|
|
|
|
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
|
|
|
|
|
|
|
for k in np.arange(0, self.order):
|
|
|
|
nmeans.insert(k,ndata[-(k+1)])
|
2017-05-15 21:06:26 +04:00
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
for k in np.arange(self.order, steps+self.order):
|
|
|
|
intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
|
2017-05-15 21:06:26 +04:00
|
|
|
|
2017-05-17 17:45:10 +04:00
|
|
|
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
|
2017-05-15 21:06:26 +04:00
|
|
|
|
|
|
|
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
|
|
|
|
|
|
|
|
return ret[-steps:]
|
|
|
|
|
|
|
|
def forecastAheadDistribution(self, data, steps, **kwargs):
|
|
|
|
ndata = np.array(self.doTransformations(data))
|
|
|
|
|
|
|
|
percentile_size = (self.original_max - self.original_min) / 100
|
|
|
|
|
|
|
|
resolution = kwargs.get('resolution', percentile_size)
|
|
|
|
|
|
|
|
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
|
|
|
|
|
|
|
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
tmps = []
|
|
|
|
|
|
|
|
grids = {}
|
|
|
|
for k in np.arange(self.order, steps + self.order):
|
|
|
|
grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
|
|
|
|
|
|
|
for qt in self.dist_qt:
|
|
|
|
intervals = [[k, k] for k in ndata[-self.order:]]
|
|
|
|
for k in np.arange(self.order, steps + self.order):
|
|
|
|
intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
|
|
|
|
intervals.append(intl)
|
|
|
|
grids[k] = self.gridCount(grids[k], resolution, index, intl)
|
|
|
|
|
|
|
|
for k in np.arange(self.order, steps + self.order):
|
|
|
|
tmp = np.array([grids[k][i] for i in sorted(grids[k])])
|
|
|
|
ret.append(tmp / sum(tmp))
|
|
|
|
|
|
|
|
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
|
|
|
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
|
|
return df
|