- Improvementos of interval and distribution m-step ahead forecasts for arima and quantreg

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-17 10:45:10 -03:00
parent b3e1a70602
commit 67358fa000
6 changed files with 113 additions and 44 deletions

View File

@ -20,6 +20,7 @@ class ARIMA(fts.FTS):
self.is_high_order = True self.is_high_order = True
self.has_point_forecasting = True self.has_point_forecasting = True
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.model = None self.model = None
self.model_fit = None self.model_fit = None
self.trained_data = None self.trained_data = None
@ -44,7 +45,6 @@ class ARIMA(fts.FTS):
try: try:
self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=0) self.model_fit = self.model.fit(disp=0)
print(np.sqrt(self.model_fit.sigma2))
except Exception as ex: except Exception as ex:
print(ex) print(ex)
self.model_fit = None self.model_fit = None
@ -145,6 +145,9 @@ class ARIMA(fts.FTS):
return ret return ret
def empty_grid(self, resolution):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecastAheadDistribution(self, data, steps, **kwargs): def forecastAheadDistribution(self, data, steps, **kwargs):
smoothing = kwargs.get("smoothing", 0.5) smoothing = kwargs.get("smoothing", 0.5)
@ -158,7 +161,7 @@ class ARIMA(fts.FTS):
resolution = kwargs.get('resolution', percentile_size) resolution = kwargs.get('resolution', percentile_size)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution) grid = self.empty_grid(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys()) index = SortedCollection.SortedCollection(iterable=grid.keys())
@ -167,7 +170,7 @@ class ARIMA(fts.FTS):
nmeans = self.forecastAhead(ndata, steps, **kwargs) nmeans = self.forecastAhead(ndata, steps, **kwargs)
for k in np.arange(0, steps): for k in np.arange(0, steps):
grid = self.get_empty_grid(self.original_min, self.original_max, resolution) grid = self.empty_grid(resolution)
for alpha in np.arange(0.05, 0.5, 0.05): for alpha in np.arange(0.05, 0.5, 0.05):
tmp = [] tmp = []
@ -182,6 +185,6 @@ class ARIMA(fts.FTS):
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
grid = self.get_empty_grid(self.original_min, self.original_max, resolution) grid = self.empty_grid(resolution)
df = pd.DataFrame(ret, columns=sorted(grid)) df = pd.DataFrame(ret, columns=sorted(grid))
return df return df

View File

@ -16,13 +16,19 @@ import numpy as np
import pandas as pd import pandas as pd
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
from probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
from pyFTS.benchmarks import Util as bUtil from pyFTS.benchmarks import Util as bUtil
from pyFTS.common import Transformations, Util from pyFTS.common import Transformations, Util
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green', colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet'] 'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
@ -472,6 +478,21 @@ def plot_distribution(dist):
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5): points=True, intervals=True, linewidth=1.5):
"""
Plot the forecasts of several one step ahead models, by point or by interval
:param original: Original time series data (list)
:param models: List of models to compare
:param colors: List of models colors
:param typeonlegend: Add the type of forecast (point / interval) on legend
:param save: Save the picture on file
:param file: Filename to save the picture
:param tam: Size of the picture
:param points: True to plot the point forecasts, False otherwise
:param intervals: True to plot the interval forecasts, False otherwise
:param linewidth:
:return:
"""
fig = plt.figure(figsize=tam) fig = plt.figure(figsize=tam)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
@ -504,8 +525,12 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
upper.insert(0, None) upper.insert(0, None)
lbl = fts.shortname lbl = fts.shortname
if typeonlegend: lbl += " (Interval)" if typeonlegend: lbl += " (Interval)"
ax.plot(lower, color=colors[count], label=lbl, ls="--",linewidth=linewidth) if not points and intervals:
ax.plot(upper, color=colors[count], ls="--",linewidth=linewidth) ls = "-"
else:
ls = "--"
ax.plot(lower, color=colors[count], label=lbl, ls=ls,linewidth=linewidth)
ax.plot(upper, color=colors[count], ls=ls,linewidth=linewidth)
handles0, labels0 = ax.get_legend_handles_labels() handles0, labels0 = ax.get_legend_handles_labels()
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
@ -685,7 +710,24 @@ def print_distribution_statistics(original, models, steps, resolution):
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to,
interpol=False, save=False, file=None, tam=[20, 5], resolution=None, interpol=False, save=False, file=None, tam=[20, 5], resolution=None,
cmap='Blues',option=2): cmap='Blues',option=2, linewidth=1.5):
"""
Plot the forecasts of several one step ahead models, by point or by interval
:param original: Original time series data (list)
:param models: List of models to compare
:param colors: List of models colors
:param distributions: True to plot a distribution
:param time_from: index of data poit to start the ahead forecasting
:param time_to: number of steps ahead to forecast
:param interpol: Fill space between distribution plots
:param save: Save the picture on file
:param file: Filename to save the picture
:param tam: Size of the picture
:param resolution:
:param cmap: Color map to be used on distribution plot
:param option: Distribution type to be passed for models
:return:
"""
fig = plt.figure(figsize=tam) fig = plt.figure(figsize=tam)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
@ -739,7 +781,6 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
cb.set_label('Density') cb.set_label('Density')
if fts.has_interval_forecasting: if fts.has_interval_forecasting:
forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
lower = [kk[0] for kk in forecasts] lower = [kk[0] for kk in forecasts]
@ -749,8 +790,8 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
for k in np.arange(0, time_from - fts.order): for k in np.arange(0, time_from - fts.order):
lower.insert(0, None) lower.insert(0, None)
upper.insert(0, None) upper.insert(0, None)
ax.plot(lower, color=colors[count], label=fts.shortname) ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
ax.plot(upper, color=colors[count]) ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)
else: else:
forecasts = fts.forecast(original) forecasts = fts.forecast(original)
@ -760,10 +801,12 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
forecasts.insert(0, None) forecasts.insert(0, None)
ax.plot(forecasts, color=colors[count], label=fts.shortname) ax.plot(forecasts, color=colors[count], label=fts.shortname)
ax.plot(original, color='black', label="Original") ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
handles0, labels0 = ax.get_legend_handles_labels() handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0, labels0, loc=2) if True in distributions:
# ax.set_title(fts.name) lgd = ax.legend(handles0, labels0, loc=2)
else:
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
_mi = min(mi) _mi = min(mi)
if _mi < 0: if _mi < 0:
_mi *= 1.1 _mi *= 1.1
@ -780,9 +823,7 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
ax.set_xlabel('T') ax.set_xlabel('T')
ax.set_xlim([0, len(original)]) ax.set_xlim([0, len(original)])
#plt.colorbar() Util.showAndSaveImage(fig, file, save, lgd=lgd)
Util.showAndSaveImage(fig, file, save)
def plotCompared(original, forecasts, labels, title): def plotCompared(original, forecasts, labels, title):

View File

@ -19,6 +19,7 @@ class QuantileRegression(fts.FTS):
self.has_point_forecasting = True self.has_point_forecasting = True
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.has_probability_forecasting = True self.has_probability_forecasting = True
self.has_probability_forecasting = True
self.benchmark_only = True self.benchmark_only = True
self.minOrder = 1 self.minOrder = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
@ -108,14 +109,21 @@ class QuantileRegression(fts.FTS):
def forecastAheadInterval(self, data, steps, **kwargs): def forecastAheadInterval(self, data, steps, **kwargs):
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
smoothing = kwargs.get("smoothing", 0.9)
l = len(ndata) l = len(ndata)
ret = [[k, k] for k in ndata[-self.order:]] ret = []
nmeans = self.forecastAhead(ndata, steps, **kwargs)
for k in np.arange(0, self.order):
nmeans.insert(k,ndata[-(k+1)])
for k in np.arange(self.order, steps+self.order): for k in np.arange(self.order, steps+self.order):
intl = self.interval_to_interval([ret[x] for x in np.arange(k - self.order, k)], self.lower_qt, self.upper_qt) intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
ret.append(intl) ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True) ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)

View File

@ -4,6 +4,7 @@ from bisect import bisect_left, bisect_right
# Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/ # Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/
# Author: RAYMOND HETTINGER # Author: RAYMOND HETTINGER
class SortedCollection(object): class SortedCollection(object):
'''Sequence sorted by a key function. '''Sequence sorted by a key function.
@ -204,8 +205,8 @@ class SortedCollection(object):
raise ValueError('No item found between keys : %r,%r' % (ge,le)) raise ValueError('No item found between keys : %r,%r' % (ge,le))
def inside(self, ge, le): def inside(self, ge, le):
g = bisect_right(self._keys, ge) l = bisect_right(self._keys, le)
l = bisect_left(self._keys, le) g = bisect_left(self._keys, ge)
if g != len(self) and l != len(self) and g != l: if g != len(self) and l != len(self) and g != l:
return self._items[g : l] return self._items[g : l]
elif g != len(self) and l != len(self) and g == l: elif g != len(self) and l != len(self) and g == l:

View File

@ -6,7 +6,9 @@ import pandas as pd
import math import math
from operator import itemgetter from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, SortedCollection from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations
class EnsembleFTS(fts.FTS): class EnsembleFTS(fts.FTS):
@ -24,10 +26,14 @@ class EnsembleFTS(fts.FTS):
def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None): def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None):
self.models = [] methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS,
hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression]
for count, model in enumerate(models, start=0): transformations = [None, Transformations.Differential(1)]
mfts = model("")
for count, method in enumerate(methods, start=0):
mfts = method("")
if mfts.benchmark_only: if mfts.benchmark_only:
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
@ -37,7 +43,7 @@ class EnsembleFTS(fts.FTS):
for partition in partitions: for partition in partitions:
for partitioner in partitioners: for partitioner in partitioners:
data_train_fs = partitioner(data, partition, transformation=transformation) data_train_fs = partitioner(data, partition, transformation=transformation)
mfts = model("") mfts = method("")
mfts.partitioner = data_train_fs mfts.partitioner = data_train_fs
if not mfts.is_high_order: if not mfts.is_high_order:
@ -50,7 +56,7 @@ class EnsembleFTS(fts.FTS):
else: else:
for order in np.arange(1, max_order + 1): for order in np.arange(1, max_order + 1):
if order >= mfts.min_order: if order >= mfts.min_order:
mfts = model("") mfts = method("")
mfts.partitioner = data_train_fs mfts.partitioner = data_train_fs
if transformation is not None: if transformation is not None:
@ -60,6 +66,7 @@ class EnsembleFTS(fts.FTS):
self.models.append(mfts) self.models.append(mfts)
def train(self, data, sets, order=1,parameters=None): def train(self, data, sets, order=1,parameters=None):
#if self.models is None:
pass pass
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -19,8 +19,8 @@ from numpy import random
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
#enrollments = np.array(enrollments["Enrollments"]) enrollments = np.array(enrollments["Enrollments"])
diff = Transformations.Differential(1) diff = Transformations.Differential(1)
@ -28,14 +28,20 @@ diff = Transformations.Differential(1)
DATASETS DATASETS
""" """
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"])
#sunspots = pd.read_csv("DataSets/sunspots.csv", sep=",")
#sunspots = np.array(sunspots["SUNACTIVITY"])
#gauss = random.normal(0,1.0,5000) #gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400) #gauss_teste = random.normal(0,1.0,400)
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") #taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000]) #taiex = np.array(taiexpd["avg"][:5000])
nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
nasdaq = np.array(nasdaqpd["avg"][0:5000]) #nasdaq = np.array(nasdaqpd["avg"][0:5000])
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",") #sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
#sp500 = np.array(sp500pd["Avg"][11000:]) #sp500 = np.array(sp500pd["Avg"][11000:])
@ -43,7 +49,7 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
#sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";") #sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
#sondapd = sondapd.dropna(axis=0, how='any') #sondapd = sondapd.dropna(axis=0, how='any')
#sonda = np.array(sondapd["ws_10m"]) #sonda = np.array(sondapd["glo_avg"])
#del(sondapd) #del(sondapd)
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";") #bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
@ -53,8 +59,8 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
#print(lag) #print(lag)
#print(a) #print(a)
#from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk
from pyFTS.benchmarks import Util from pyFTS.benchmarks import Util
from pyFTS.benchmarks import arima, quantreg, Measures from pyFTS.benchmarks import arima, quantreg, Measures
@ -65,17 +71,20 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#""" #"""
tmp = arima.ARIMA("", alpha=0.25) tmp = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff) #tmp.appendTransformation(diff)
tmp.train(nasdaq[:1600], None, order=(1,0,1)) tmp.train(enrollments, None, order=(1,0,0))
teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=100) teste = tmp.forecastInterval(enrollments)
#tmp = quantreg.QuantileRegression("", dist=True) #tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)
#tmp.appendTransformation(diff) #tmp.appendTransformation(diff)
#tmp.train(nasdaq[:1600], None, order=1) #tmp.train(sunspots[:150], None, order=1)
#teste = tmp.forecastAheadInterval(sunspots[150:155], 5)
#teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50) #teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50)
print(nasdaq[1600:1605]) bchmk.plot_compared_series(enrollments,[tmp], ['blue','red'], points=False, intervals=True)
print(teste)
#print(sunspots[150:155])
#print(teste)
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp) #kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
@ -109,10 +118,10 @@ bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.Weighte
"_interval_analytic.csv", "_interval_analytic.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8, #models=[yu.WeightedFTS], # # bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(3,20,step=2), transformation=diff, partitions= np.arange(3,20,step=2), transformation=diff,
dump=True, save=True, file="experiments/best_interval_analytic_diff.csv", dump=True, save=True, file="experiments/sp500_analytic_diff.csv",
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
#""" #"""