- Implementation of interval and distribution m-step ahead forecasts for arima and quantreg

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-15 14:06:26 -03:00
parent 0d8d6c9240
commit b3e1a70602
11 changed files with 206 additions and 124 deletions

View File

@ -272,7 +272,7 @@ def get_point_statistics(data, model, indexer=None):
def get_interval_statistics(original, model):
"""Condensate all measures for interval forecasters"""
"""Condensate all measures for point_to_interval forecasters"""
ret = list()
forecasts = model.forecastInterval(original)
ret.append(round(sharpness(forecasts), 2))

View File

@ -2,9 +2,11 @@
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_model import ARIMA as stats_arima
import scipy.stats as st
from pyFTS import fts
from pyFTS.common import SortedCollection
class ARIMA(fts.FTS):
@ -109,7 +111,7 @@ class ARIMA(fts.FTS):
ret.append(tmp)
#ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True)
#ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], point_to_interval=True)
return ret
@ -117,7 +119,7 @@ class ARIMA(fts.FTS):
if self.model_fit is None:
return np.nan
smoothing = kwargs.get("smoothing",0.2)
smoothing = kwargs.get("smoothing",0.5)
sigma = np.sqrt(self.model_fit.sigma2)
@ -125,7 +127,7 @@ class ARIMA(fts.FTS):
l = len(ndata)
means = self.forecastAhead(data,steps,kwargs)
nmeans = self.forecastAhead(ndata, steps, **kwargs)
ret = []
@ -134,11 +136,52 @@ class ARIMA(fts.FTS):
hsigma = (1 + k*smoothing)*sigma
tmp.append(means[k] + st.norm.ppf(self.alpha) * hsigma)
tmp.append(means[k] + st.norm.ppf(1 - self.alpha) * hsigma)
tmp.append(nmeans[k] + st.norm.ppf(self.alpha) * hsigma)
tmp.append(nmeans[k] + st.norm.ppf(1 - self.alpha) * hsigma)
ret.append(tmp)
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0,steps)]], interval=True)
return ret
return ret
def forecastAheadDistribution(self, data, steps, **kwargs):
smoothing = kwargs.get("smoothing", 0.5)
sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.doTransformations(data))
l = len(ndata)
percentile_size = (self.original_max - self.original_min)/100
resolution = kwargs.get('resolution', percentile_size)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
ret = []
nmeans = self.forecastAhead(ndata, steps, **kwargs)
for k in np.arange(0, steps):
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
for alpha in np.arange(0.05, 0.5, 0.05):
tmp = []
hsigma = (1 + k * smoothing) * sigma
tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
grid = self.gridCount(grid, resolution, index, tmp)
tmp = np.array([grid[i] for i in sorted(grid)])
ret.append(tmp / sum(tmp))
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df

View File

@ -47,12 +47,12 @@ def get_point_methods():
def get_benchmark_interval_methods():
"""Return all non FTS methods for interval forecasting"""
"""Return all non FTS methods for point_to_interval forecasting"""
return [quantreg.QuantileRegression]
def get_interval_methods():
"""Return all FTS methods for interval forecasting"""
"""Return all FTS methods for point_to_interval forecasting"""
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]

View File

@ -260,7 +260,7 @@ def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None,
benchmark_models=None, benchmark_models_parameters = None,
save=False, file=None, sintetic=False,nodes=None, depends=None):
"""
Distributed sliding window benchmarks for FTS interval forecasters
Distributed sliding window benchmarks for FTS point_to_interval forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models

View File

@ -187,7 +187,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False):
"""
Parallel sliding window benchmarks for FTS interval forecasters
Parallel sliding window benchmarks for FTS point_to_interval forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models

View File

@ -2,9 +2,11 @@
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
from statsmodels.regression.quantile_regression import QuantReg
from statsmodels.tsa.tsatools import lagmat
from pyFTS import fts
from pyFTS.common import SortedCollection
class QuantileRegression(fts.FTS):
@ -20,9 +22,11 @@ class QuantileRegression(fts.FTS):
self.benchmark_only = True
self.minOrder = 1
self.alpha = kwargs.get("alpha", 0.05)
self.dist = kwargs.get("dist", False)
self.upper_qt = None
self.mean_qt = None
self.lower_qt = None
self.dist_qt = None
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
def train(self, data, sets, order=1, parameters=None):
@ -42,12 +46,34 @@ class QuantileRegression(fts.FTS):
self.upper_qt = [k for k in uqt.params]
self.lower_qt = [k for k in lqt.params]
if self.dist:
self.dist_qt = []
for alpha in np.arange(0.05,0.5,0.05):
lqt = QuantReg(ndata, lagdata).fit(alpha)
uqt = QuantReg(ndata, lagdata).fit(1 - alpha)
lo_qt = [k for k in lqt.params]
up_qt = [k for k in uqt.params]
self.dist_qt.append([lo_qt, up_qt])
self.original_min = min(data)
self.original_max = max(data)
self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
def linearmodel(self,data,params):
#return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ])
return sum([data[k] * params[k] for k in np.arange(0, self.order)])
def point_to_interval(self, data, lo_params, up_params):
lo = self.linearmodel(data, lo_params)
up = self.linearmodel(data, up_params)
return [lo, up]
def interval_to_interval(self, data, lo_params, up_params):
lo = self.linearmodel([k[0] for k in data], lo_params)
up = self.linearmodel([k[1] for k in data], up_params)
return [lo, up]
def forecast(self, data, **kwargs):
ndata = np.array(self.doTransformations(data))
l = len(ndata)
@ -73,10 +99,57 @@ class QuantileRegression(fts.FTS):
for k in np.arange(self.order , l):
sample = ndata[k - self.order: k]
up = self.linearmodel(sample, self.upper_qt)
down = self.linearmodel(sample, self.lower_qt)
ret.append([down, up])
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True)
return ret
def forecastAheadInterval(self, data, steps, **kwargs):
ndata = np.array(self.doTransformations(data))
l = len(ndata)
ret = [[k, k] for k in ndata[-self.order:]]
for k in np.arange(self.order, steps + self.order):
intl = self.interval_to_interval([ret[x] for x in np.arange(k - self.order, k)], self.lower_qt, self.upper_qt)
ret.append(intl)
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
return ret[-steps:]
def forecastAheadDistribution(self, data, steps, **kwargs):
ndata = np.array(self.doTransformations(data))
percentile_size = (self.original_max - self.original_min) / 100
resolution = kwargs.get('resolution', percentile_size)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
ret = []
tmps = []
grids = {}
for k in np.arange(self.order, steps + self.order):
grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution)
for qt in self.dist_qt:
intervals = [[k, k] for k in ndata[-self.order:]]
for k in np.arange(self.order, steps + self.order):
intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
intervals.append(intl)
grids[k] = self.gridCount(grids[k], resolution, index, intl)
for k in np.arange(self.order, steps + self.order):
tmp = np.array([grids[k][i] for i in sorted(grids[k])])
ret.append(tmp / sum(tmp))
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df

View File

@ -49,7 +49,7 @@ class Differential(Transformation):
def inverse(self,data, param, **kwargs):
interval = kwargs.get("interval",False)
interval = kwargs.get("point_to_interval",False)
if isinstance(data, (np.ndarray, np.generic)):
data = data.tolist()
@ -62,7 +62,7 @@ class Differential(Transformation):
if not interval:
inc = [data[t] + param[t] for t in np.arange(0, n)]
else:
inc = [[data[t][0] + param[t], data[t][0] + param[t]] for t in np.arange(0, n)]
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
if n == 1:
return inc[0]

View File

@ -134,7 +134,7 @@ class EnsembleFTS(fts.FTS):
return grid
def gridCount(self, grid, resolution, index, interval):
#print(interval)
#print(point_to_interval)
for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1

93
fts.py
View File

@ -88,11 +88,19 @@ class FTS(object):
:param kwargs:
:return:
"""
ndata = [k for k in self.doTransformations(data[- self.order:])]
ret = []
for k in np.arange(0,steps):
tmp = self.forecast(data[-self.order:],kwargs)
tmp = self.forecast(ndata[-self.order:], **kwargs)
if isinstance(tmp,(list, np.ndarray)):
tmp = tmp[0]
ret.append(tmp)
data.append(tmp)
ndata.append(tmp)
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]])
return ret
@ -164,7 +172,7 @@ class FTS(object):
params = [None for k in self.transformations]
for c, t in enumerate(reversed(self.transformations), start=0):
ndata = t.inverse(ndata, params[c])
ndata = t.inverse(ndata, params[c], **kwargs)
return ndata
@ -180,66 +188,39 @@ class FTS(object):
def len_total(self):
return sum([len(k) for k in self.flrgs])
def buildTreeWithoutOrder(self, node, lags, level):
def get_empty_grid(self, _min, _max, resolution):
grid = {}
if level not in lags:
return
for sbin in np.arange(_min,_max, resolution):
grid[sbin] = 0
for s in lags[level]:
node.appendChild(tree.FLRGTreeNode(s))
return grid
for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1)
def inputoutputmapping(self,bins=100):
dim_uod = tuple([bins for k in range(0,self.order)])
dim_fs = tuple([ len(self.sets) for k in range(0, self.order)])
simulation_uod = np.zeros(shape=dim_uod, dtype=float)
simulation_fs = np.zeros(shape=dim_fs, dtype=float)
percentiles = np.linspace(self.sets[0].lower, self.sets[-1].upper, bins).tolist()
pdf_uod = {}
for k in percentiles:
pdf_uod[k] = 0
pdf_fs = {}
for k in self.sets:
pdf_fs[k.name] = 0
lags = {}
for o in np.arange(0, self.order):
lags[o] = percentiles
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTreeWithoutOrder(root, lags, 0)
# Trace the possible paths
def getGridClean(self, resolution):
if len(self.transformations) == 0:
_min = self.sets[0].lower
_max = self.sets[-1].upper
else:
_min = self.original_min
_max = self.original_max
return self.get_empty_grid(_min, _max, resolution)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
index_uod = tuple([percentiles.index(k) for k in path])
def gridCount(self, grid, resolution, index, interval):
#print(point_to_interval)
for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1
return grid
def gridCountPoint(self, grid, resolution, index, point):
k = index.find_ge(point)
# print(k)
grid[k] += 1
return grid
index_fs = tuple([ FuzzySet.getMaxMembershipFuzzySetIndex(k, self.sets) for k in path])
forecast = self.forecast(path)[0]
simulation_uod[index_uod] = forecast
simulation_fs[index_fs] = forecast
return [simulation_fs, simulation_uod ]

View File

@ -193,6 +193,18 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = sum(np.array([pi * s.lower for s in flrg.LHS]))
return ret
def buildTreeWithoutOrder(self, node, lags, level):
if level not in lags:
return
for s in lags[level]:
node.appendChild(tree.FLRGTreeNode(s))
for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1)
def forecast(self, data, **kwargs):
ndata = np.array(self.doTransformations(data))
@ -440,34 +452,6 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
return ret
def getGridClean(self, resolution):
grid = {}
if len(self.transformations) == 0:
_min = self.sets[0].lower
_max = self.sets[-1].upper
else:
_min = self.original_min
_max = self.original_max
for sbin in np.arange(_min,_max, resolution):
grid[sbin] = 0
return grid
def gridCount(self, grid, resolution, index, interval):
#print(interval)
for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1
return grid
def gridCountPoint(self, grid, resolution, index, point):
k = index.find_ge(point)
# print(k)
grid[k] += 1
return grid
def forecastAheadDistribution(self, data, steps, **kwargs):
ret = []

View File

@ -48,6 +48,7 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
#best = np.array(bestpd["Anomaly"])
#del(bestpd)
#print(lag)
#print(a)
@ -61,26 +62,25 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#Util.cast_dataframe_to_synthetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#"""
tmp = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
tmp.train(nasdaq[:1600], None, order=(2,0,2))
teste = tmp.forecastInterval(nasdaq[1600:1604])
tmp.train(nasdaq[:1600], None, order=(1,0,1))
teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=100)
#tmp = quantreg.QuantileRegression("", dist=True)
#tmp.appendTransformation(diff)
#tmp.train(nasdaq[:1600], None, order=1)
#teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50)
"""
tmp = quantreg.QuantileRegression("", alpha=0.25)
tmp.train(taiex[:1600], None, order=1)
teste = tmp.forecastInterval(taiex[1600:1605])
"""
print(nasdaq[1600:1605])
print(teste)
kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
print(kk)
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
#print(kk)
#"""
"""
@ -97,24 +97,25 @@ bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.Weighted
partitions= np.arange(3,20,step=2), #transformation=diff,
dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
"""
bchmk.interval_sliding_window(taiex, 2000, train=0.8, inc=0.1,#models=[yu.WeightedFTS], # #
"""
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(10,200,step=10), #transformation=diff,
dump=True, save=True, file="experiments/taiex_interval_analytic.csv",
partitions= np.arange(10,200,step=10),
dump=True, save=True, file="experiments/best"
"_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(nasdaq, 2000, train=0.8, inc=0.1, #models=[yu.WeightedFTS], # #
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/nasdaq_interval_analytic_diff.csv",
dump=True, save=True, file="experiments/best_interval_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
#"""
"""
from pyFTS.partitioners import Grid