Bugfixes and improvements in benchmarks, arima and quantreg
This commit is contained in:
parent
46b719464f
commit
9b2f286bab
@ -60,27 +60,48 @@ class ARIMA(fts.FTS):
|
||||
print(ex)
|
||||
self.model_fit = None
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
def inference(self, steps):
|
||||
t_z = self.model.transform_z()
|
||||
mu, Y = self.model._model(self.model.latent_variables.get_z_values())
|
||||
date_index = self.model.shift_dates(steps)
|
||||
sim_vector = self.model._sim_prediction(mu, Y, steps, t_z, 1000)
|
||||
|
||||
return ret
|
||||
return sim_vector
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
def forecast_interval(self, data, **kwargs):
|
||||
|
||||
|
||||
|
||||
return ret
|
||||
raise NotImplementedError()
|
||||
|
||||
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||
sim_vector = self.inference(steps)
|
||||
|
||||
if 'alpha' in kwargs:
|
||||
alpha = kwargs.get('alpha')
|
||||
else:
|
||||
alpha = self.alpha
|
||||
|
||||
ret = []
|
||||
|
||||
for ct, sample in enumerate(sim_vector):
|
||||
i = [np.percentile(sample, qt) for qt in [alpha, 1-alpha]]
|
||||
ret.append(i)
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
|
||||
|
||||
return ret
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||
|
||||
sim_vector = self.inference(steps)
|
||||
|
||||
ret = []
|
||||
|
||||
for ct, sample in enumerate(sim_vector):
|
||||
pd = ProbabilityDistribution.ProbabilityDistribution(type='histogram', data=sample, nbins=500)
|
||||
ret.append(pd)
|
||||
|
||||
return ret
|
@ -225,12 +225,12 @@ def pinball_mean(tau, targets, forecasts):
|
||||
def winkler_score(tau, target, forecast):
|
||||
'''R. L. Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187–191. doi:10.2307/2284720. '''
|
||||
delta = forecast[1] - forecast[0]
|
||||
if forecast[0] < target and target < forecast[1]:
|
||||
if forecast[0] <= target <= forecast[1]:
|
||||
return delta
|
||||
elif forecast[0] > target:
|
||||
return delta + 2 * (forecast[0] - target) / tau
|
||||
return delta + (2 * (forecast[0] - target)) / tau
|
||||
elif forecast[1] < target:
|
||||
return delta + 2 * (target - forecast[1]) / tau
|
||||
return delta + (2 * (target - forecast[1])) / tau
|
||||
|
||||
|
||||
def winkler_mean(tau, targets, forecasts):
|
||||
@ -248,44 +248,52 @@ def winkler_mean(tau, targets, forecasts):
|
||||
|
||||
|
||||
def brier_score(targets, densities):
|
||||
'''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
|
||||
'''
|
||||
Brier Score for probabilistic forecasts.
|
||||
Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3.
|
||||
|
||||
:param targets: a list with the target values
|
||||
:param densities: a list with pyFTS.probabil objectsistic.ProbabilityDistribution
|
||||
:return: float
|
||||
'''
|
||||
|
||||
if not isinstance(densities, list):
|
||||
densities = [densities]
|
||||
targets = [targets]
|
||||
|
||||
ret = []
|
||||
|
||||
for ct, d in enumerate(densities):
|
||||
try:
|
||||
v = d.bin_index.find_ge(targets[ct])
|
||||
v = d.bin_index.find_le(targets[ct])
|
||||
|
||||
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
|
||||
score += (d.distribution[v] - 1) ** 2
|
||||
score = sum([d.density(k) ** 2 for k in d.bins if k != v])
|
||||
score += (d.density(v) - 1) ** 2
|
||||
ret.append(score)
|
||||
except ValueError as ex:
|
||||
ret.append(sum([d.distribution[k] ** 2 for k in d.bins]))
|
||||
ret.append(sum([d.density(k) ** 2 for k in d.bins]))
|
||||
return sum(ret) / len(ret)
|
||||
|
||||
|
||||
def pmf_to_cdf(density):
|
||||
ret = []
|
||||
for row in density.index:
|
||||
tmp = []
|
||||
prev = 0
|
||||
for col in density.columns:
|
||||
prev += density[col][row] if not np.isnan(density[col][row]) else 0
|
||||
tmp.append(prev)
|
||||
ret.append(tmp)
|
||||
df = pd.DataFrame(ret, columns=density.columns)
|
||||
return df
|
||||
def logarithm_score(targets, densities):
|
||||
'''
|
||||
Logarithm Score for probabilistic forecasts.
|
||||
Good IJ (1952). “Rational Decisions.”Journal of the Royal Statistical Society B,14(1),107–114. URLhttps://www.jstor.org/stable/2984087.
|
||||
|
||||
:param targets: a list with the target values
|
||||
:param densities: a list with pyFTS.probabil objectsistic.ProbabilityDistribution
|
||||
:return: float
|
||||
'''
|
||||
_ls = float(0.0)
|
||||
if isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
|
||||
densities = [densities]
|
||||
targets = [targets]
|
||||
|
||||
def heavyside(bin, target):
|
||||
return 1 if bin >= target else 0
|
||||
n = len(densities)
|
||||
for ct, df in enumerate(densities):
|
||||
_ls += -np.log(df.density(targets[ct]))
|
||||
|
||||
|
||||
def heavyside_cdf(bins, targets):
|
||||
ret = []
|
||||
for t in targets:
|
||||
result = [1 if b >= t else 0 for b in bins]
|
||||
ret.append(result)
|
||||
df = pd.DataFrame(ret, columns=bins)
|
||||
return df
|
||||
return _ls / n
|
||||
|
||||
|
||||
def crps(targets, densities):
|
||||
@ -299,13 +307,13 @@ def crps(targets, densities):
|
||||
_crps = float(0.0)
|
||||
if isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
|
||||
densities = [densities]
|
||||
targets = [targets]
|
||||
|
||||
l = len(densities[0].bins)
|
||||
n = len(densities)
|
||||
for ct, df in enumerate(densities):
|
||||
_crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||
|
||||
return _crps / float(l * n)
|
||||
return _crps / n
|
||||
|
||||
|
||||
def get_point_statistics(data, model, **kwargs):
|
||||
@ -416,6 +424,41 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
return ret
|
||||
|
||||
|
||||
def get_interval_ahead_statistics(data, intervals, **kwargs):
|
||||
"""
|
||||
Condensate all measures for point interval forecasters
|
||||
|
||||
:param data: test data
|
||||
:param model: FTS model with interval forecasting capability
|
||||
:param kwargs:
|
||||
:return: a list with the sharpness, resolution, coverage, .05 pinball mean,
|
||||
.25 pinball mean, .75 pinball mean and .95 pinball mean.
|
||||
"""
|
||||
|
||||
l = len(intervals)
|
||||
|
||||
if len(data) != l:
|
||||
raise Exception("Data and intervals have different lenghts!")
|
||||
|
||||
lags = {}
|
||||
|
||||
for lag in range(l):
|
||||
ret = {}
|
||||
datum = data[lag]
|
||||
interval = intervals[lag]
|
||||
ret['sharpness'] = round(interval[1] - interval[0], 2)
|
||||
ret['coverage'] = 1 if interval[0] <= datum <= interval[1] else 0
|
||||
ret['pinball05'] = round(pinball(0.05, datum, interval[0]), 2)
|
||||
ret['pinball25'] = round(pinball(0.25, datum, interval[0]), 2)
|
||||
ret['pinball75'] = round(pinball(0.75, datum, interval[1]), 2)
|
||||
ret['pinball95'] = round(pinball(0.95, datum, interval[1]), 2)
|
||||
ret['winkler05'] = round(winkler_score(0.05, datum, interval), 2)
|
||||
ret['winkler25'] = round(winkler_score(0.25, datum, interval), 2)
|
||||
lags[lag] = ret
|
||||
|
||||
return lags
|
||||
|
||||
|
||||
def get_distribution_statistics(data, model, **kwargs):
|
||||
"""
|
||||
Get CRPS statistic and time for a forecasting model
|
||||
@ -452,3 +495,32 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
|
||||
return ret
|
||||
|
||||
|
||||
def get_distribution_ahead_statistics(data, distributions):
|
||||
"""
|
||||
Get CRPS statistic and time for a forecasting model
|
||||
|
||||
:param data: test data
|
||||
:param model: FTS model with probabilistic forecasting capability
|
||||
:param kwargs:
|
||||
:return: a list with the CRPS and execution time
|
||||
"""
|
||||
|
||||
l = len(distributions)
|
||||
|
||||
if len(data) != l:
|
||||
raise Exception("Data and distributions have different lenghts!")
|
||||
|
||||
lags = {}
|
||||
|
||||
for lag in range(l):
|
||||
ret = {}
|
||||
datum = data[lag]
|
||||
dist = distributions[lag]
|
||||
ret['crps'] = round(crps(datum, dist), 3)
|
||||
ret['brier'] = round(brier_score(datum, dist), 3)
|
||||
ret['log'] = round(logarithm_score(datum, dist), 3)
|
||||
lags[lag] = ret
|
||||
|
||||
return lags
|
||||
|
@ -13,7 +13,7 @@ class QuantileRegression(fts.FTS):
|
||||
"""Façade for statsmodels.regression.quantile_regression"""
|
||||
def __init__(self, **kwargs):
|
||||
super(QuantileRegression, self).__init__(**kwargs)
|
||||
self.name = "QR"
|
||||
self.name = "QAR"
|
||||
self.detail = "Quantile Regression"
|
||||
self.is_high_order = True
|
||||
self.has_point_forecasting = True
|
||||
@ -105,7 +105,7 @@ class QuantileRegression(fts.FTS):
|
||||
nmeans = self.forecast_ahead(ndata, steps, **kwargs)
|
||||
|
||||
for k in np.arange(0, self.order):
|
||||
nmeans.insert(k,ndata[-(k+1)])
|
||||
nmeans.insert(k,ndata[k])
|
||||
|
||||
for k in np.arange(self.order, steps+self.order):
|
||||
intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
|
||||
@ -136,18 +136,29 @@ class QuantileRegression(fts.FTS):
|
||||
return ret
|
||||
|
||||
def forecast_ahead_distribution(self, ndata, steps, **kwargs):
|
||||
smoothing = kwargs.get("smoothing", 0.9)
|
||||
|
||||
l = len(ndata)
|
||||
|
||||
ret = []
|
||||
|
||||
nmeans = self.forecast_ahead(ndata, steps, **kwargs)
|
||||
|
||||
for k in np.arange(0, self.order):
|
||||
nmeans.insert(k, ndata[k])
|
||||
|
||||
for k in np.arange(self.order, steps + self.order):
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
|
||||
uod=[self.original_min, self.original_max])
|
||||
intervals = [[k, k] for k in ndata[-self.order:]]
|
||||
|
||||
intervals = [[nmeans[self.order], nmeans[self.order]]]
|
||||
for qt in self.dist_qt:
|
||||
intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
|
||||
intervals.append(intl)
|
||||
intl1 = self.point_to_interval(nmeans[k - self.order: k], qt[0], qt[1])
|
||||
intl2 = [intl1[0] * (1 + k * smoothing), intl1[1] * (1 + k * smoothing)]
|
||||
intervals.append(intl2)
|
||||
dist.append_interval(intervals)
|
||||
|
||||
ret.append(dist)
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -162,17 +162,23 @@ def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=
|
||||
cb.set_label('Density')
|
||||
|
||||
|
||||
def plot_distribution2(probabilitydist, data, **kwargs): #ax, cmap, probabilitydist, time_from, data=None):
|
||||
def plot_distribution2(probabilitydist, data, **kwargs):
|
||||
'''
|
||||
Plot distributions over the time (x-axis)
|
||||
:param probabilitydist:
|
||||
:param data:
|
||||
:param kwargs:
|
||||
:return:
|
||||
|
||||
:param probabilitydist: the forecasted probability distributions to plot
|
||||
:param data: the original test sample
|
||||
:keyword start_at: the time index (inside of data) to start to plot the probability distributions
|
||||
:keyword ax: a matplotlib axis. If no value was provided a new axis is created.
|
||||
:keyword cmap: a matplotlib colormap name, the default value is 'Blues'
|
||||
:keyword quantiles: the list of quantiles intervals to plot, e. g. [.05, .25, .75, .95]
|
||||
:keyword median: a boolean value indicating if the median value will be plot.
|
||||
'''
|
||||
import matplotlib.colorbar as cbar
|
||||
import matplotlib.cm as cm
|
||||
|
||||
order = kwargs.get('order', 1)
|
||||
|
||||
ax = kwargs.get('ax',None)
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])
|
||||
@ -182,7 +188,7 @@ def plot_distribution2(probabilitydist, data, **kwargs): #ax, cmap, probabilityd
|
||||
cmap = kwargs.get('cmap','Blues')
|
||||
cmap = plt.get_cmap(cmap)
|
||||
|
||||
start_at = kwargs.get('start_at',0)
|
||||
start_at = kwargs.get('start_at',0) + order - 1
|
||||
|
||||
x = [k + start_at for k in range(l + 1)]
|
||||
|
||||
@ -207,12 +213,13 @@ def plot_distribution2(probabilitydist, data, **kwargs): #ax, cmap, probabilityd
|
||||
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
|
||||
facecolor=scalarMap.to_rgba(ct / lq))
|
||||
|
||||
y = [data[start_at]]
|
||||
for pd in probabilitydist:
|
||||
qts = pd.quantile(.5)
|
||||
y.append(qts[0])
|
||||
if kwargs.get('median',True):
|
||||
y = [data[start_at]]
|
||||
for pd in probabilitydist:
|
||||
qts = pd.quantile(.5)
|
||||
y.append(qts[0])
|
||||
|
||||
ax.plot(x, y, color='red')
|
||||
ax.plot(x, y, color='red', label='Median')
|
||||
|
||||
cax, _ = cbar.make_axes(ax)
|
||||
cb = cbar.ColorbarBase(cax, cmap=cmap, norm=normal)
|
||||
|
@ -232,7 +232,7 @@ class FTS(object):
|
||||
|
||||
ret = []
|
||||
for k in np.arange(0, steps):
|
||||
tmp = self.forecast(data[-self.max_lag:], **kwargs)
|
||||
tmp = self.forecast(data[k:self.max_lag+k], **kwargs)
|
||||
|
||||
if isinstance(tmp,(list, np.ndarray)):
|
||||
tmp = tmp[-1]
|
||||
|
@ -16,10 +16,13 @@ import scipy.stats as st
|
||||
from itertools import product
|
||||
|
||||
|
||||
def sampler(data, quantiles):
|
||||
def sampler(data, quantiles, bounds=False):
|
||||
ret = []
|
||||
for qt in quantiles:
|
||||
ret.append(np.nanpercentile(data, q=qt * 100))
|
||||
if bounds:
|
||||
ret.insert(0, min(data))
|
||||
ret.append(max(data))
|
||||
return ret
|
||||
|
||||
|
||||
@ -190,25 +193,25 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
ret = []
|
||||
|
||||
samples = [[k] for k in data[-self.order:]]
|
||||
start = kwargs.get('start', self.order)
|
||||
|
||||
uod = self.get_UoD()
|
||||
|
||||
sample = [[k] for k in data[start - self.order: start]]
|
||||
|
||||
for k in np.arange(self.order, steps + self.order):
|
||||
forecasts = []
|
||||
lags = {}
|
||||
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
|
||||
|
||||
# Build the tree with all possible paths
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
|
||||
for p in root.paths():
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
lags = []
|
||||
for i in np.arange(0, self.order):
|
||||
lags.append(sample[i - self.order])
|
||||
|
||||
# Trace the possible paths
|
||||
for path in product(*lags):
|
||||
forecasts.extend(self.get_models_forecasts(path))
|
||||
|
||||
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.2)))
|
||||
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
|
||||
|
||||
interval = self.get_interval(forecasts)
|
||||
|
||||
if len(interval) == 1:
|
||||
@ -248,7 +251,7 @@ class EnsembleFTS(fts.FTS):
|
||||
if 'method' in kwargs:
|
||||
self.point_method = kwargs.get('method','mean')
|
||||
|
||||
smooth = kwargs.get("smooth", "KDE")
|
||||
smooth = kwargs.get("smooth", "histogram")
|
||||
alpha = kwargs.get("alpha", None)
|
||||
|
||||
ret = []
|
||||
@ -270,7 +273,7 @@ class EnsembleFTS(fts.FTS):
|
||||
for path in product(*lags):
|
||||
forecasts.extend(self.get_models_forecasts(path))
|
||||
|
||||
sample.append(forecasts)
|
||||
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
|
||||
|
||||
if alpha is None:
|
||||
forecasts = np.ravel(forecasts).tolist()
|
||||
|
@ -17,6 +17,8 @@ class ProbabilityDistribution(object):
|
||||
|
||||
self.data = []
|
||||
|
||||
data = kwargs.get("data", None)
|
||||
|
||||
self.type = type
|
||||
"""
|
||||
If type is histogram, the PDF is discrete
|
||||
@ -28,12 +30,10 @@ class ProbabilityDistribution(object):
|
||||
self.labels = kwargs.get("bins_labels", None)
|
||||
"""Bins labels on a discrete PDF"""
|
||||
|
||||
data = kwargs.get("data", None)
|
||||
|
||||
if self.type == "KDE":
|
||||
self.kde = kde.KernelSmoothing(h=kwargs.get("h", 0.5), kernel=kwargs.get("kernel", "epanechnikov"))
|
||||
|
||||
if self.data is not None:
|
||||
if data is not None and self.uod is None:
|
||||
_min = np.nanmin(data)
|
||||
_min = _min * .7 if _min > 0 else _min * 1.3
|
||||
_max = np.nanmax(data)
|
||||
|
@ -5,6 +5,8 @@ import os
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.common import Transformations
|
||||
from pyFTS.models import chen, hofts
|
||||
@ -17,27 +19,56 @@ from pyFTS.models import hofts
|
||||
from pyFTS.data import TAIEX
|
||||
|
||||
data = TAIEX.get_data()
|
||||
|
||||
train = data[:800]
|
||||
test = data[800:1000]
|
||||
'''
|
||||
model = ensemble.EnsembleFTS()
|
||||
|
||||
for k in [15, 25, 35]:
|
||||
for order in [1, 2, 3]:
|
||||
fs = Grid.GridPartitioner(data=data, npart=k)
|
||||
fs = Grid.GridPartitioner(data=train, npart=k)
|
||||
tmp = hofts.WeightedHighOrderFTS(partitioner=fs, order=order)
|
||||
|
||||
tmp.fit(data)
|
||||
tmp.fit(train)
|
||||
|
||||
model.append_model(tmp)
|
||||
'''
|
||||
#fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])
|
||||
|
||||
#ax.plot(data[:28], label='Original', color='black')
|
||||
|
||||
from pyFTS.benchmarks import arima, quantreg, BSTS
|
||||
|
||||
#model = arima.ARIMA(order=(2,0,0))
|
||||
#model = quantreg.QuantileRegression(order=1, dist=True)
|
||||
model = BSTS.ARIMA(order=(2,0,0))
|
||||
model.fit(train)
|
||||
|
||||
horizon = 5
|
||||
|
||||
intervals = model.predict(test[:10], type='interval', alpha=.25, steps_ahead=horizon)
|
||||
|
||||
distributions = model.predict(test[:10], type='distribution', smooth='histogram', steps_ahead=horizon, num_bins=100)
|
||||
|
||||
#Util.plot_distribution2(forecasts, data[:28], start_at=20, order=3, ax=ax, cmap="Blues")
|
||||
|
||||
print('end')
|
||||
|
||||
from pyFTS.benchmarks import Measures
|
||||
|
||||
start = model.order+1
|
||||
end = start + horizon
|
||||
|
||||
print(Measures.get_interval_ahead_statistics(test[start:end], intervals))
|
||||
print(Measures.get_distribution_ahead_statistics(test[start:end], distributions))
|
||||
|
||||
forecasts = model.predict(data, type='distribution', smooth='histogram', steps_ahead=5)
|
||||
|
||||
from pyFTS.benchmarks import benchmarks as bchmk
|
||||
|
||||
#f, ax = plt.subplots(1, 1, figsize=[20, 5])
|
||||
|
||||
#ax.plot(data)
|
||||
#bchmk.plot_interval(ax, forecasts, 3, "")
|
||||
print(forecasts)
|
||||
#print(forecasts)
|
||||
|
||||
'''
|
||||
mu_local = 5
|
||||
|
Loading…
Reference in New Issue
Block a user