- Bugfixes and improvements on Ensemble FTS

- Improvement on plotting probabilistic forecasts
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-18 14:20:12 -03:00
parent aa2b5de18f
commit f533dd249a
3 changed files with 126 additions and 100 deletions

View File

@ -708,9 +708,9 @@ def print_distribution_statistics(original, models, steps, resolution):
print(ret) print(ret)
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
interpol=False, save=False, file=None, tam=[20, 5], resolution=None, save=False, file=None, tam=[20, 5], resolution=None,
cmap='Blues',option=2, linewidth=1.5): cmap='Blues', linewidth=1.5):
""" """
Plot the forecasts of several one step ahead models, by point or by interval Plot the forecasts of several one step ahead models, by point or by interval
:param original: Original time series data (list) :param original: Original time series data (list)
@ -743,45 +743,12 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
for count, fts in enumerate(models, start=0): for count, fts in enumerate(models, start=0):
if fts.has_probability_forecasting and distributions[count]: if fts.has_probability_forecasting and distributions[count]:
density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to, density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to,
resolution=resolution, method=option) resolution=resolution)
Y = [] #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to)
X = [] plot_density_rectange(ax, cm, density, fig, resolution, time_from, time_to)
C = []
S = []
y = density.columns
t = len(y)
ss = time_to ** 2 if fts.has_interval_forecasting and intervals:
for k in density.index:
#alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns]
col = [density[col][k]*5 for col in density.columns]
x = [time_from + k for x in np.arange(0, t)]
s = [ss for x in np.arange(0, t)]
ic = resolution/10
for cc in np.arange(0, resolution, ic):
Y.append(y + cc)
X.append(x)
C.append(col)
S.append(s)
Y = np.hstack(Y)
X = np.hstack(X)
C = np.hstack(C)
S = np.hstack(S)
s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap)
s.set_clim([0, 1])
cb = fig.colorbar(s)
cb.set_label('Density')
if fts.has_interval_forecasting:
forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
lower = [kk[0] for kk in forecasts] lower = [kk[0] for kk in forecasts]
upper = [kk[1] for kk in forecasts] upper = [kk[1] for kk in forecasts]
@ -793,14 +760,6 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth) ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
ax.plot(upper, color=colors[count], linewidth=linewidth*1.5) ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)
else:
forecasts = fts.forecast(original)
mi.append(min(forecasts))
ma.append(max(forecasts))
for k in np.arange(0, time_from):
forecasts.insert(0, None)
ax.plot(forecasts, color=colors[count], label=fts.shortname)
ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5) ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
handles0, labels0 = ax.get_legend_handles_labels() handles0, labels0 = ax.get_legend_handles_labels()
if True in distributions: if True in distributions:
@ -826,6 +785,27 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_
Util.showAndSaveImage(fig, file, save, lgd=lgd) Util.showAndSaveImage(fig, file, save, lgd=lgd)
def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to):
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from matplotlib.colorbar import ColorbarPatch
patches = []
colors = []
for x in density.index:
for y in density.columns:
s = Rectangle((time_from + x, y), 1, resolution, fill=True, lw = 0)
patches.append(s)
colors.append(density[y][x]*5)
pc = PatchCollection(patches=patches, match_original=True)
pc.set_clim([0, 1])
pc.set_cmap(cmap)
pc.set_array(np.array(colors))
ax.add_collection(pc)
cb = fig.colorbar(pc, ax=ax)
cb.set_label('Density')
def plotCompared(original, forecasts, labels, title): def plotCompared(original, forecasts, labels, title):
fig = plt.figure(figsize=[13, 6]) fig = plt.figure(figsize=[13, 6])
ax = fig.add_subplot(111) ax = fig.add_subplot(111)

View File

@ -12,7 +12,6 @@ from pyFTS.common import Transformations
import scipy.stats as st import scipy.stats as st
from pyFTS import tree from pyFTS import tree
def sampler(data, quantiles): def sampler(data, quantiles):
ret = [] ret = []
for qt in quantiles: for qt in quantiles:
@ -33,12 +32,14 @@ class EnsembleFTS(fts.FTS):
self.models = [] self.models = []
self.parameters = [] self.parameters = []
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
self.max_order = 1 self.order = 1
self.point_method = kwargs.get('point_method', 'mean')
self.interval_method = kwargs.get('interval_method', 'quantile')
def appendModel(self, model): def appendModel(self, model):
self.models.append(model) self.models.append(model)
if model.order > self.max_order: if model.order > self.order:
self.max_order = model.order self.order = model.order
def train(self, data, sets, order=1,parameters=None): def train(self, data, sets, order=1,parameters=None):
self.original_max = max(data) self.original_max = max(data)
@ -49,31 +50,33 @@ class EnsembleFTS(fts.FTS):
for model in self.models: for model in self.models:
sample = data[-model.order:] sample = data[-model.order:]
forecast = model.forecast(sample) forecast = model.forecast(sample)
if isinstance(forecast, (list,np.ndarray)): if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
forecast = int(forecast[-1]) forecast = int(forecast[-1])
elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0:
forecast = np.nan
tmp.append(forecast) tmp.append(forecast)
return tmp return tmp
def get_point(self,method, forecasts, **kwargs): def get_point(self,forecasts, **kwargs):
if method == 'mean': if self.point_method == 'mean':
ret = np.nanmean(forecasts) ret = np.nanmean(forecasts)
elif method == 'median': elif self.point_method == 'median':
ret = np.nanpercentile(forecasts, 50) ret = np.nanpercentile(forecasts, 50)
elif method == 'quantile': elif self.point_method == 'quantile':
alpha = kwargs.get("alpha",0.05) alpha = kwargs.get("alpha",0.05)
ret = np.percentile(forecasts, alpha*100) ret = np.percentile(forecasts, alpha*100)
return ret return ret
def get_interval(self, method, forecasts): def get_interval(self, forecasts):
ret = [] ret = []
if method == 'extremum': if self.interval_method == 'extremum':
ret.append([min(forecasts), max(forecasts)]) ret.append([min(forecasts), max(forecasts)])
elif method == 'quantile': elif self.interval_method == 'quantile':
qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100) qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100)
qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100) qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100)
ret.append([qt_lo, qt_up]) ret.append([qt_lo, qt_up])
elif method == 'normal': elif self.interval_method == 'normal':
mu = np.nanmean(forecasts) mu = np.nanmean(forecasts)
sigma = np.sqrt(np.nanvar(forecasts)) sigma = np.sqrt(np.nanvar(forecasts))
ret.append(mu + st.norm.ppf(self.alpha) * sigma) ret.append(mu + st.norm.ppf(self.alpha) * sigma)
@ -83,61 +86,59 @@ class EnsembleFTS(fts.FTS):
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
method = kwargs.get('method','mean') if "method" in kwargs:
self.point_method = kwargs.get('method','mean')
ndata = np.array(self.doTransformations(data)) l = len(data)
l = len(ndata)
ret = [] ret = []
for k in np.arange(self.max_order, l+1): for k in np.arange(self.order, l+1):
sample = ndata[k - self.max_order : k ] sample = data[k - self.order : k]
tmp = self.get_models_forecasts(sample) tmp = self.get_models_forecasts(sample)
point = self.get_point(method, tmp) point = self.get_point(tmp)
ret.append(point) ret.append(point)
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
return ret return ret
def forecastInterval(self, data, **kwargs): def forecastInterval(self, data, **kwargs):
method = kwargs.get('method', 'extremum') if "method" in kwargs:
self.interval_method = kwargs.get('method','quantile')
if 'alpha' in kwargs: if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha',0.05) self.alpha = kwargs.get('alpha',0.05)
ndata = np.array(self.doTransformations(data)) l = len(data)
l = len(ndata)
ret = [] ret = []
for k in np.arange(self.max_order, l+1): for k in np.arange(self.order, l+1):
sample = ndata[k - self.max_order : k ] sample = data[k - self.order : k]
tmp = self.get_models_forecasts(sample) tmp = self.get_models_forecasts(sample)
interval = self.get_interval(method, tmp) interval = self.get_interval(tmp)
if len(interval) == 1:
interval = interval[-1]
ret.append(interval) ret.append(interval)
return ret return ret
def forecastAheadInterval(self, data, steps, **kwargs): def forecastAheadInterval(self, data, steps, **kwargs):
method = kwargs.get('method', 'extremum') if 'method' in kwargs:
self.interval_method = kwargs.get('method','quantile')
ret = [] ret = []
samples = [[k,k] for k in data[-self.max_order:]] samples = [[k,k] for k in data[-self.order:]]
for k in np.arange(self.max_order, steps): for k in np.arange(self.order, steps+self.order):
forecasts = [] forecasts = []
sample = samples[k - self.max_order : k] sample = samples[k - self.order : k]
lo_sample = [i[0] for i in sample] lo_sample = [i[0] for i in sample]
up_sample = [i[1] for i in sample] up_sample = [i[1] for i in sample]
forecasts.extend(self.get_models_forecasts(lo_sample) ) forecasts.extend(self.get_models_forecasts(lo_sample) )
forecasts.extend(self.get_models_forecasts(up_sample)) forecasts.extend(self.get_models_forecasts(up_sample))
interval = self.get_interval(method, forecasts) interval = self.get_interval(forecasts)
if len(interval) == 1: if len(interval) == 1:
interval = interval[0] interval = interval[0]
@ -151,7 +152,8 @@ class EnsembleFTS(fts.FTS):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecastAheadDistribution(self, data, steps, **kwargs): def forecastAheadDistribution(self, data, steps, **kwargs):
method = kwargs.get('method', 'extremum') if 'method' in kwargs:
self.point_method = kwargs.get('method','mean')
percentile_size = (self.original_max - self.original_min) / 100 percentile_size = (self.original_max - self.original_min) / 100
@ -163,12 +165,12 @@ class EnsembleFTS(fts.FTS):
ret = [] ret = []
samples = [[k] for k in data[-self.max_order:]] samples = [[k] for k in data[-self.order:]]
for k in np.arange(self.max_order, steps + self.max_order): for k in np.arange(self.order, steps + self.order):
forecasts = [] forecasts = []
lags = {} lags = {}
for i in np.arange(0, self.max_order): lags[i] = samples[k - self.max_order + i] for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
# Build the tree with all possible paths # Build the tree with all possible paths
@ -189,5 +191,39 @@ class EnsembleFTS(fts.FTS):
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
return ret grid = self.empty_grid(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
class AllMethodEnsembleFTS(EnsembleFTS):
def __init__(self, **kwargs):
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs)
self.min_order = 3
def set_transformations(self, model):
for t in self.transformations:
model.appendTransformation(t)
def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data)
self.original_min = min(data)
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
for method in fo_methods:
model = method("")
self.set_transformations(model)
model.train(data, sets)
self.appendModel(model)
for method in ho_methods:
for o in np.arange(1, order+1):
model = method("")
if model.min_order >= o:
self.set_transformations(model)
model.train(data, sets, order=o)
self.appendModel(model)

View File

@ -25,7 +25,7 @@ passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"]) passengers = np.array(passengers["Passengers"])
e = ensemble.EnsembleFTS("") e = ensemble.AllMethodEnsembleFTS()
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS,
ismailefendi.ImprovedWeightedFTS] ismailefendi.ImprovedWeightedFTS]
@ -34,6 +34,12 @@ ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
fs = Grid.GridPartitioner(passengers, 10, transformation=diff) fs = Grid.GridPartitioner(passengers, 10, transformation=diff)
e.appendTransformation(diff)
e.train(passengers, fs.sets, order=3)
"""
for method in fo_methods: for method in fo_methods:
model = method("") model = method("")
model.appendTransformation(diff) model.appendTransformation(diff)
@ -73,7 +79,7 @@ e.appendModel(arima201)
e.train(passengers, None) e.train(passengers, None)
"""
_mean = e.forecast(passengers, method="mean") _mean = e.forecast(passengers, method="mean")
print(_mean) print(_mean)
@ -92,21 +98,25 @@ _normal = e.forecastInterval(passengers, method="normal", alpha=0.25)
print(_normal) print(_normal)
""" """
""" #"""
_extremum = e.forecastAheadInterval(passengers, 10, method="extremum") #_extremum = e.forecastAheadInterval(passengers, 10, method="extremum")
print(_extremum) #print(_extremum)
_quantile = e.forecastAheadInterval(passengers, 10, method="quantile", alpha=0.25) #_quantile = e.forecastAheadInterval(passengers[:50], 40, method="quantile", alpha=0.25)
print(_quantile) #print(_quantile)
_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25) #_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25)
print(_normal) #print(_normal)
""" #"""
dist = e.forecastAheadDistribution(passengers, 20) #dist = e.forecastAheadDistribution(passengers, 20)
print(dist) #print(dist)
bchmk.plot_compared_intervals_ahead(passengers[:120],[e], ['blue','red'],
distributions=[True,False], save=True, file="pictures/distribution_ahead_arma",
time_from=60, time_to=10, tam=[12,5])