- Improvementos of interval and distribution m-step ahead forecasts for arima and quantreg

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-17 10:45:10 -03:00
parent b3e1a70602
commit 67358fa000
6 changed files with 113 additions and 44 deletions

View File

@ -20,6 +20,7 @@ class ARIMA(fts.FTS):
self.is_high_order = True
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.model = None
self.model_fit = None
self.trained_data = None
@ -44,7 +45,6 @@ class ARIMA(fts.FTS):
try:
self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=0)
print(np.sqrt(self.model_fit.sigma2))
except Exception as ex:
print(ex)
self.model_fit = None
@ -145,6 +145,9 @@ class ARIMA(fts.FTS):
return ret
def empty_grid(self, resolution):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecastAheadDistribution(self, data, steps, **kwargs):
smoothing = kwargs.get("smoothing", 0.5)
@ -158,7 +161,7 @@ class ARIMA(fts.FTS):
resolution = kwargs.get('resolution', percentile_size)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
grid = self.empty_grid(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
@ -167,7 +170,7 @@ class ARIMA(fts.FTS):
nmeans = self.forecastAhead(ndata, steps, **kwargs)
for k in np.arange(0, steps):
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
grid = self.empty_grid(resolution)
for alpha in np.arange(0.05, 0.5, 0.05):
tmp = []
@ -182,6 +185,6 @@ class ARIMA(fts.FTS):
ret.append(tmp / sum(tmp))
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
grid = self.empty_grid(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df

View File

@ -16,13 +16,19 @@ import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
from probabilistic import ProbabilityDistribution
from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
from pyFTS.benchmarks import Util as bUtil
from pyFTS.common import Transformations, Util
# from sklearn.cross_validation import KFold
from pyFTS.partitioners import Grid
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
@ -472,6 +478,21 @@ def plot_distribution(dist):
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5):
"""
Plot the forecasts of several one step ahead models, by point or by interval
:param original: Original time series data (list)
:param models: List of models to compare
:param colors: List of models colors
:param typeonlegend: Add the type of forecast (point / interval) on legend
:param save: Save the picture on file
:param file: Filename to save the picture
:param tam: Size of the picture
:param points: True to plot the point forecasts, False otherwise
:param intervals: True to plot the interval forecasts, False otherwise
:param linewidth:
:return:
"""
fig = plt.figure(figsize=tam)
ax = fig.add_subplot(111)
@ -504,8 +525,12 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
upper.insert(0, None)
lbl = fts.shortname
if typeonlegend: lbl += " (Interval)"
ax.plot(lower, color=colors[count], label=lbl, ls="--",linewidth=linewidth)
ax.plot(upper, color=colors[count], ls="--",linewidth=linewidth)
if not points and intervals:
ls = "-"
else:
ls = "--"
ax.plot(lower, color=colors[count], label=lbl, ls=ls,linewidth=linewidth)
ax.plot(upper, color=colors[count], ls=ls,linewidth=linewidth)
handles0, labels0 = ax.get_legend_handles_labels()
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
@ -685,7 +710,24 @@ def print_distribution_statistics(original, models, steps, resolution):
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to,
interpol=False, save=False, file=None, tam=[20, 5], resolution=None,
cmap='Blues',option=2):
cmap='Blues',option=2, linewidth=1.5):
"""
Plot the forecasts of several one step ahead models, by point or by interval
:param original: Original time series data (list)
:param models: List of models to compare
:param colors: List of models colors
:param distributions: True to plot a distribution
:param time_from: index of data poit to start the ahead forecasting
:param time_to: number of steps ahead to forecast
:param interpol: Fill space between distribution plots
:param save: Save the picture on file
:param file: Filename to save the picture
:param tam: Size of the picture
:param resolution:
:param cmap: Color map to be used on distribution plot
:param option: Distribution type to be passed for models
:return:
"""
fig = plt.figure(figsize=tam)
ax = fig.add_subplot(111)
@ -739,7 +781,6 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
cb.set_label('Density')
if fts.has_interval_forecasting:
forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
lower = [kk[0] for kk in forecasts]
@ -749,8 +790,8 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
for k in np.arange(0, time_from - fts.order):
lower.insert(0, None)
upper.insert(0, None)
ax.plot(lower, color=colors[count], label=fts.shortname)
ax.plot(upper, color=colors[count])
ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)
else:
forecasts = fts.forecast(original)
@ -760,10 +801,12 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
forecasts.insert(0, None)
ax.plot(forecasts, color=colors[count], label=fts.shortname)
ax.plot(original, color='black', label="Original")
ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0, labels0, loc=2)
# ax.set_title(fts.name)
if True in distributions:
lgd = ax.legend(handles0, labels0, loc=2)
else:
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
_mi = min(mi)
if _mi < 0:
_mi *= 1.1
@ -780,9 +823,7 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
ax.set_xlabel('T')
ax.set_xlim([0, len(original)])
#plt.colorbar()
Util.showAndSaveImage(fig, file, save)
Util.showAndSaveImage(fig, file, save, lgd=lgd)
def plotCompared(original, forecasts, labels, title):

View File

@ -19,6 +19,7 @@ class QuantileRegression(fts.FTS):
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.has_probability_forecasting = True
self.benchmark_only = True
self.minOrder = 1
self.alpha = kwargs.get("alpha", 0.05)
@ -108,14 +109,21 @@ class QuantileRegression(fts.FTS):
def forecastAheadInterval(self, data, steps, **kwargs):
ndata = np.array(self.doTransformations(data))
smoothing = kwargs.get("smoothing", 0.9)
l = len(ndata)
ret = [[k, k] for k in ndata[-self.order:]]
ret = []
for k in np.arange(self.order, steps + self.order):
intl = self.interval_to_interval([ret[x] for x in np.arange(k - self.order, k)], self.lower_qt, self.upper_qt)
nmeans = self.forecastAhead(ndata, steps, **kwargs)
ret.append(intl)
for k in np.arange(0, self.order):
nmeans.insert(k,ndata[-(k+1)])
for k in np.arange(self.order, steps+self.order):
intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)

View File

@ -4,6 +4,7 @@ from bisect import bisect_left, bisect_right
# Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/
# Author: RAYMOND HETTINGER
class SortedCollection(object):
'''Sequence sorted by a key function.
@ -204,8 +205,8 @@ class SortedCollection(object):
raise ValueError('No item found between keys : %r,%r' % (ge,le))
def inside(self, ge, le):
g = bisect_right(self._keys, ge)
l = bisect_left(self._keys, le)
l = bisect_right(self._keys, le)
g = bisect_left(self._keys, ge)
if g != len(self) and l != len(self) and g != l:
return self._items[g : l]
elif g != len(self) and l != len(self) and g == l:

View File

@ -6,7 +6,9 @@ import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations
class EnsembleFTS(fts.FTS):
@ -24,10 +26,14 @@ class EnsembleFTS(fts.FTS):
def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None):
self.models = []
methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS,
hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression]
for count, model in enumerate(models, start=0):
mfts = model("")
transformations = [None, Transformations.Differential(1)]
for count, method in enumerate(methods, start=0):
mfts = method("")
if mfts.benchmark_only:
if transformation is not None:
mfts.appendTransformation(transformation)
@ -37,7 +43,7 @@ class EnsembleFTS(fts.FTS):
for partition in partitions:
for partitioner in partitioners:
data_train_fs = partitioner(data, partition, transformation=transformation)
mfts = model("")
mfts = method("")
mfts.partitioner = data_train_fs
if not mfts.is_high_order:
@ -50,7 +56,7 @@ class EnsembleFTS(fts.FTS):
else:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts = method("")
mfts.partitioner = data_train_fs
if transformation is not None:
@ -60,6 +66,7 @@ class EnsembleFTS(fts.FTS):
self.models.append(mfts)
def train(self, data, sets, order=1,parameters=None):
#if self.models is None:
pass
def forecast(self, data, **kwargs):

View File

@ -19,8 +19,8 @@ from numpy import random
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
#enrollments = np.array(enrollments["Enrollments"])
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
enrollments = np.array(enrollments["Enrollments"])
diff = Transformations.Differential(1)
@ -28,14 +28,20 @@ diff = Transformations.Differential(1)
DATASETS
"""
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"])
#sunspots = pd.read_csv("DataSets/sunspots.csv", sep=",")
#sunspots = np.array(sunspots["SUNACTIVITY"])
#gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400)
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000])
nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
nasdaq = np.array(nasdaqpd["avg"][0:5000])
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
#sp500 = np.array(sp500pd["Avg"][11000:])
@ -43,7 +49,7 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
#sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
#sondapd = sondapd.dropna(axis=0, how='any')
#sonda = np.array(sondapd["ws_10m"])
#sonda = np.array(sondapd["glo_avg"])
#del(sondapd)
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
@ -53,8 +59,8 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
#print(lag)
#print(a)
#from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import distributed_benchmarks as bchmk
from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
from pyFTS.benchmarks import Util
from pyFTS.benchmarks import arima, quantreg, Measures
@ -65,17 +71,20 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#"""
tmp = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
tmp.train(nasdaq[:1600], None, order=(1,0,1))
teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=100)
tmp.train(enrollments, None, order=(1,0,0))
teste = tmp.forecastInterval(enrollments)
#tmp = quantreg.QuantileRegression("", dist=True)
#tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)
#tmp.appendTransformation(diff)
#tmp.train(nasdaq[:1600], None, order=1)
#tmp.train(sunspots[:150], None, order=1)
#teste = tmp.forecastAheadInterval(sunspots[150:155], 5)
#teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50)
print(nasdaq[1600:1605])
print(teste)
bchmk.plot_compared_series(enrollments,[tmp], ['blue','red'], points=False, intervals=True)
#print(sunspots[150:155])
#print(teste)
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
@ -109,10 +118,10 @@ bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.Weighte
"_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8, #models=[yu.WeightedFTS], # #
bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/best_interval_analytic_diff.csv",
dump=True, save=True, file="experiments/sp500_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
#"""