- Improvementos of interval and distribution m-step ahead forecasts for arima and quantreg
This commit is contained in:
parent
b3e1a70602
commit
67358fa000
@ -20,6 +20,7 @@ class ARIMA(fts.FTS):
|
||||
self.is_high_order = True
|
||||
self.has_point_forecasting = True
|
||||
self.has_interval_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.model = None
|
||||
self.model_fit = None
|
||||
self.trained_data = None
|
||||
@ -44,7 +45,6 @@ class ARIMA(fts.FTS):
|
||||
try:
|
||||
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
||||
self.model_fit = self.model.fit(disp=0)
|
||||
print(np.sqrt(self.model_fit.sigma2))
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
self.model_fit = None
|
||||
@ -145,6 +145,9 @@ class ARIMA(fts.FTS):
|
||||
|
||||
return ret
|
||||
|
||||
def empty_grid(self, resolution):
|
||||
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
||||
|
||||
def forecastAheadDistribution(self, data, steps, **kwargs):
|
||||
smoothing = kwargs.get("smoothing", 0.5)
|
||||
|
||||
@ -158,7 +161,7 @@ class ARIMA(fts.FTS):
|
||||
|
||||
resolution = kwargs.get('resolution', percentile_size)
|
||||
|
||||
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
||||
grid = self.empty_grid(resolution)
|
||||
|
||||
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
||||
|
||||
@ -167,7 +170,7 @@ class ARIMA(fts.FTS):
|
||||
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
||||
|
||||
for k in np.arange(0, steps):
|
||||
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
||||
grid = self.empty_grid(resolution)
|
||||
for alpha in np.arange(0.05, 0.5, 0.05):
|
||||
tmp = []
|
||||
|
||||
@ -182,6 +185,6 @@ class ARIMA(fts.FTS):
|
||||
|
||||
ret.append(tmp / sum(tmp))
|
||||
|
||||
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
|
||||
grid = self.empty_grid(resolution)
|
||||
df = pd.DataFrame(ret, columns=sorted(grid))
|
||||
return df
|
@ -16,13 +16,19 @@ import numpy as np
|
||||
import pandas as pd
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
from probabilistic import ProbabilityDistribution
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
|
||||
from pyFTS.benchmarks import Util as bUtil
|
||||
from pyFTS.common import Transformations, Util
|
||||
# from sklearn.cross_validation import KFold
|
||||
from pyFTS.partitioners import Grid
|
||||
from matplotlib import rc
|
||||
|
||||
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
|
||||
## for Palatino and other serif fonts use:
|
||||
#rc('font',**{'family':'serif','serif':['Palatino']})
|
||||
rc('text', usetex=True)
|
||||
|
||||
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
|
||||
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
|
||||
@ -472,6 +478,21 @@ def plot_distribution(dist):
|
||||
|
||||
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
|
||||
points=True, intervals=True, linewidth=1.5):
|
||||
"""
|
||||
Plot the forecasts of several one step ahead models, by point or by interval
|
||||
:param original: Original time series data (list)
|
||||
:param models: List of models to compare
|
||||
:param colors: List of models colors
|
||||
:param typeonlegend: Add the type of forecast (point / interval) on legend
|
||||
:param save: Save the picture on file
|
||||
:param file: Filename to save the picture
|
||||
:param tam: Size of the picture
|
||||
:param points: True to plot the point forecasts, False otherwise
|
||||
:param intervals: True to plot the interval forecasts, False otherwise
|
||||
:param linewidth:
|
||||
:return:
|
||||
"""
|
||||
|
||||
fig = plt.figure(figsize=tam)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
@ -504,8 +525,12 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
|
||||
upper.insert(0, None)
|
||||
lbl = fts.shortname
|
||||
if typeonlegend: lbl += " (Interval)"
|
||||
ax.plot(lower, color=colors[count], label=lbl, ls="--",linewidth=linewidth)
|
||||
ax.plot(upper, color=colors[count], ls="--",linewidth=linewidth)
|
||||
if not points and intervals:
|
||||
ls = "-"
|
||||
else:
|
||||
ls = "--"
|
||||
ax.plot(lower, color=colors[count], label=lbl, ls=ls,linewidth=linewidth)
|
||||
ax.plot(upper, color=colors[count], ls=ls,linewidth=linewidth)
|
||||
|
||||
handles0, labels0 = ax.get_legend_handles_labels()
|
||||
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
|
||||
@ -685,7 +710,24 @@ def print_distribution_statistics(original, models, steps, resolution):
|
||||
|
||||
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to,
|
||||
interpol=False, save=False, file=None, tam=[20, 5], resolution=None,
|
||||
cmap='Blues',option=2):
|
||||
cmap='Blues',option=2, linewidth=1.5):
|
||||
"""
|
||||
Plot the forecasts of several one step ahead models, by point or by interval
|
||||
:param original: Original time series data (list)
|
||||
:param models: List of models to compare
|
||||
:param colors: List of models colors
|
||||
:param distributions: True to plot a distribution
|
||||
:param time_from: index of data poit to start the ahead forecasting
|
||||
:param time_to: number of steps ahead to forecast
|
||||
:param interpol: Fill space between distribution plots
|
||||
:param save: Save the picture on file
|
||||
:param file: Filename to save the picture
|
||||
:param tam: Size of the picture
|
||||
:param resolution:
|
||||
:param cmap: Color map to be used on distribution plot
|
||||
:param option: Distribution type to be passed for models
|
||||
:return:
|
||||
"""
|
||||
fig = plt.figure(figsize=tam)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
@ -739,7 +781,6 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
|
||||
|
||||
cb.set_label('Density')
|
||||
|
||||
|
||||
if fts.has_interval_forecasting:
|
||||
forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
|
||||
lower = [kk[0] for kk in forecasts]
|
||||
@ -749,8 +790,8 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
|
||||
for k in np.arange(0, time_from - fts.order):
|
||||
lower.insert(0, None)
|
||||
upper.insert(0, None)
|
||||
ax.plot(lower, color=colors[count], label=fts.shortname)
|
||||
ax.plot(upper, color=colors[count])
|
||||
ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
|
||||
ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)
|
||||
|
||||
else:
|
||||
forecasts = fts.forecast(original)
|
||||
@ -760,10 +801,12 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
|
||||
forecasts.insert(0, None)
|
||||
ax.plot(forecasts, color=colors[count], label=fts.shortname)
|
||||
|
||||
ax.plot(original, color='black', label="Original")
|
||||
ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
|
||||
handles0, labels0 = ax.get_legend_handles_labels()
|
||||
ax.legend(handles0, labels0, loc=2)
|
||||
# ax.set_title(fts.name)
|
||||
if True in distributions:
|
||||
lgd = ax.legend(handles0, labels0, loc=2)
|
||||
else:
|
||||
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
|
||||
_mi = min(mi)
|
||||
if _mi < 0:
|
||||
_mi *= 1.1
|
||||
@ -780,9 +823,7 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
|
||||
ax.set_xlabel('T')
|
||||
ax.set_xlim([0, len(original)])
|
||||
|
||||
#plt.colorbar()
|
||||
|
||||
Util.showAndSaveImage(fig, file, save)
|
||||
Util.showAndSaveImage(fig, file, save, lgd=lgd)
|
||||
|
||||
|
||||
def plotCompared(original, forecasts, labels, title):
|
||||
|
@ -19,6 +19,7 @@ class QuantileRegression(fts.FTS):
|
||||
self.has_point_forecasting = True
|
||||
self.has_interval_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.benchmark_only = True
|
||||
self.minOrder = 1
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
@ -108,14 +109,21 @@ class QuantileRegression(fts.FTS):
|
||||
def forecastAheadInterval(self, data, steps, **kwargs):
|
||||
ndata = np.array(self.doTransformations(data))
|
||||
|
||||
smoothing = kwargs.get("smoothing", 0.9)
|
||||
|
||||
l = len(ndata)
|
||||
|
||||
ret = [[k, k] for k in ndata[-self.order:]]
|
||||
ret = []
|
||||
|
||||
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
||||
|
||||
for k in np.arange(0, self.order):
|
||||
nmeans.insert(k,ndata[-(k+1)])
|
||||
|
||||
for k in np.arange(self.order, steps+self.order):
|
||||
intl = self.interval_to_interval([ret[x] for x in np.arange(k - self.order, k)], self.lower_qt, self.upper_qt)
|
||||
intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
|
||||
|
||||
ret.append(intl)
|
||||
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
|
||||
|
||||
ret = self.doInverseTransformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
|
||||
|
||||
|
@ -4,6 +4,7 @@ from bisect import bisect_left, bisect_right
|
||||
# Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/
|
||||
# Author: RAYMOND HETTINGER
|
||||
|
||||
|
||||
class SortedCollection(object):
|
||||
'''Sequence sorted by a key function.
|
||||
|
||||
@ -204,8 +205,8 @@ class SortedCollection(object):
|
||||
raise ValueError('No item found between keys : %r,%r' % (ge,le))
|
||||
|
||||
def inside(self, ge, le):
|
||||
g = bisect_right(self._keys, ge)
|
||||
l = bisect_left(self._keys, le)
|
||||
l = bisect_right(self._keys, le)
|
||||
g = bisect_left(self._keys, ge)
|
||||
if g != len(self) and l != len(self) and g != l:
|
||||
return self._items[g : l]
|
||||
elif g != len(self) and l != len(self) and g == l:
|
||||
|
19
ensemble.py
19
ensemble.py
@ -6,7 +6,9 @@ import pandas as pd
|
||||
import math
|
||||
from operator import itemgetter
|
||||
from pyFTS.common import FLR, FuzzySet, SortedCollection
|
||||
from pyFTS import fts
|
||||
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
||||
from pyFTS.benchmarks import arima, quantreg
|
||||
from pyFTS.common import Transformations
|
||||
|
||||
|
||||
class EnsembleFTS(fts.FTS):
|
||||
@ -24,10 +26,14 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None):
|
||||
|
||||
self.models = []
|
||||
methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
|
||||
ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS,
|
||||
hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
for count, model in enumerate(models, start=0):
|
||||
mfts = model("")
|
||||
transformations = [None, Transformations.Differential(1)]
|
||||
|
||||
for count, method in enumerate(methods, start=0):
|
||||
mfts = method("")
|
||||
if mfts.benchmark_only:
|
||||
if transformation is not None:
|
||||
mfts.appendTransformation(transformation)
|
||||
@ -37,7 +43,7 @@ class EnsembleFTS(fts.FTS):
|
||||
for partition in partitions:
|
||||
for partitioner in partitioners:
|
||||
data_train_fs = partitioner(data, partition, transformation=transformation)
|
||||
mfts = model("")
|
||||
mfts = method("")
|
||||
|
||||
mfts.partitioner = data_train_fs
|
||||
if not mfts.is_high_order:
|
||||
@ -50,7 +56,7 @@ class EnsembleFTS(fts.FTS):
|
||||
else:
|
||||
for order in np.arange(1, max_order + 1):
|
||||
if order >= mfts.min_order:
|
||||
mfts = model("")
|
||||
mfts = method("")
|
||||
mfts.partitioner = data_train_fs
|
||||
|
||||
if transformation is not None:
|
||||
@ -60,6 +66,7 @@ class EnsembleFTS(fts.FTS):
|
||||
self.models.append(mfts)
|
||||
|
||||
def train(self, data, sets, order=1,parameters=None):
|
||||
#if self.models is None:
|
||||
pass
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
@ -19,8 +19,8 @@ from numpy import random
|
||||
|
||||
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
||||
|
||||
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
||||
#enrollments = np.array(enrollments["Enrollments"])
|
||||
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
||||
enrollments = np.array(enrollments["Enrollments"])
|
||||
|
||||
diff = Transformations.Differential(1)
|
||||
|
||||
@ -28,14 +28,20 @@ diff = Transformations.Differential(1)
|
||||
DATASETS
|
||||
"""
|
||||
|
||||
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
|
||||
passengers = np.array(passengers["Passengers"])
|
||||
|
||||
#sunspots = pd.read_csv("DataSets/sunspots.csv", sep=",")
|
||||
#sunspots = np.array(sunspots["SUNACTIVITY"])
|
||||
|
||||
#gauss = random.normal(0,1.0,5000)
|
||||
#gauss_teste = random.normal(0,1.0,400)
|
||||
|
||||
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
||||
#taiex = np.array(taiexpd["avg"][:5000])
|
||||
|
||||
nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
|
||||
nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
|
||||
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||
|
||||
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
|
||||
#sp500 = np.array(sp500pd["Avg"][11000:])
|
||||
@ -43,7 +49,7 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||
|
||||
#sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
|
||||
#sondapd = sondapd.dropna(axis=0, how='any')
|
||||
#sonda = np.array(sondapd["ws_10m"])
|
||||
#sonda = np.array(sondapd["glo_avg"])
|
||||
#del(sondapd)
|
||||
|
||||
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
|
||||
@ -53,8 +59,8 @@ nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||
#print(lag)
|
||||
#print(a)
|
||||
|
||||
#from pyFTS.benchmarks import benchmarks as bchmk
|
||||
from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||
from pyFTS.benchmarks import benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
|
||||
from pyFTS.benchmarks import Util
|
||||
from pyFTS.benchmarks import arima, quantreg, Measures
|
||||
@ -65,17 +71,20 @@ from pyFTS.benchmarks import arima, quantreg, Measures
|
||||
#"""
|
||||
tmp = arima.ARIMA("", alpha=0.25)
|
||||
#tmp.appendTransformation(diff)
|
||||
tmp.train(nasdaq[:1600], None, order=(1,0,1))
|
||||
teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=100)
|
||||
tmp.train(enrollments, None, order=(1,0,0))
|
||||
teste = tmp.forecastInterval(enrollments)
|
||||
|
||||
|
||||
#tmp = quantreg.QuantileRegression("", dist=True)
|
||||
#tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)
|
||||
#tmp.appendTransformation(diff)
|
||||
#tmp.train(nasdaq[:1600], None, order=1)
|
||||
#tmp.train(sunspots[:150], None, order=1)
|
||||
#teste = tmp.forecastAheadInterval(sunspots[150:155], 5)
|
||||
#teste = tmp.forecastAheadDistribution(nasdaq[1600:1604], steps=5, resolution=50)
|
||||
|
||||
print(nasdaq[1600:1605])
|
||||
print(teste)
|
||||
bchmk.plot_compared_series(enrollments,[tmp], ['blue','red'], points=False, intervals=True)
|
||||
|
||||
#print(sunspots[150:155])
|
||||
#print(teste)
|
||||
|
||||
#kk = Measures.get_interval_statistics(nasdaq[1600:1605], tmp)
|
||||
|
||||
@ -109,10 +118,10 @@ bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8,#models=[yu.Weighte
|
||||
"_interval_analytic.csv",
|
||||
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||
|
||||
bchmk.interval_sliding_window(best, 5000, train=0.8, inc=0.8, #models=[yu.WeightedFTS], # #
|
||||
bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.WeightedFTS], # #
|
||||
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
||||
partitions= np.arange(3,20,step=2), transformation=diff,
|
||||
dump=True, save=True, file="experiments/best_interval_analytic_diff.csv",
|
||||
dump=True, save=True, file="experiments/sp500_analytic_diff.csv",
|
||||
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||
|
||||
#"""
|
||||
|
Loading…
Reference in New Issue
Block a user