diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 098b22b..ec98c62 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -708,9 +708,9 @@ def print_distribution_statistics(original, models, steps, resolution): print(ret) -def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, - interpol=False, save=False, file=None, tam=[20, 5], resolution=None, - cmap='Blues',option=2, linewidth=1.5): +def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True, + save=False, file=None, tam=[20, 5], resolution=None, + cmap='Blues', linewidth=1.5): """ Plot the forecasts of several one step ahead models, by point or by interval :param original: Original time series data (list) @@ -743,45 +743,12 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_ for count, fts in enumerate(models, start=0): if fts.has_probability_forecasting and distributions[count]: density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to, - resolution=resolution, method=option) + resolution=resolution) - Y = [] - X = [] - C = [] - S = [] - y = density.columns - t = len(y) + #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to) + plot_density_rectange(ax, cm, density, fig, resolution, time_from, time_to) - ss = time_to ** 2 - - for k in density.index: - #alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns] - col = [density[col][k]*5 for col in density.columns] - - x = [time_from + k for x in np.arange(0, t)] - - s = [ss for x in np.arange(0, t)] - - ic = resolution/10 - - for cc in np.arange(0, resolution, ic): - Y.append(y + cc) - X.append(x) - C.append(col) - S.append(s) - - Y = np.hstack(Y) - X = np.hstack(X) - C = np.hstack(C) - S = np.hstack(S) - - s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap) - s.set_clim([0, 1]) - cb = fig.colorbar(s) - - cb.set_label('Density') - - if fts.has_interval_forecasting: + if fts.has_interval_forecasting and intervals: forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) lower = [kk[0] for kk in forecasts] upper = [kk[1] for kk in forecasts] @@ -793,14 +760,6 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_ ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth) ax.plot(upper, color=colors[count], linewidth=linewidth*1.5) - else: - forecasts = fts.forecast(original) - mi.append(min(forecasts)) - ma.append(max(forecasts)) - for k in np.arange(0, time_from): - forecasts.insert(0, None) - ax.plot(forecasts, color=colors[count], label=fts.shortname) - ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5) handles0, labels0 = ax.get_legend_handles_labels() if True in distributions: @@ -826,6 +785,27 @@ def plot_compared_intervals_ahead(original, models, colors, distributions, time_ Util.showAndSaveImage(fig, file, save, lgd=lgd) +def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to): + from matplotlib.patches import Rectangle + from matplotlib.collections import PatchCollection + from matplotlib.colorbar import ColorbarPatch + patches = [] + colors = [] + for x in density.index: + for y in density.columns: + s = Rectangle((time_from + x, y), 1, resolution, fill=True, lw = 0) + patches.append(s) + colors.append(density[y][x]*5) + pc = PatchCollection(patches=patches, match_original=True) + pc.set_clim([0, 1]) + pc.set_cmap(cmap) + pc.set_array(np.array(colors)) + ax.add_collection(pc) + cb = fig.colorbar(pc, ax=ax) + cb.set_label('Density') + + + def plotCompared(original, forecasts, labels, title): fig = plt.figure(figsize=[13, 6]) ax = fig.add_subplot(111) diff --git a/ensemble.py b/ensemble.py index 670ee89..c8ddfbf 100644 --- a/ensemble.py +++ b/ensemble.py @@ -12,7 +12,6 @@ from pyFTS.common import Transformations import scipy.stats as st from pyFTS import tree - def sampler(data, quantiles): ret = [] for qt in quantiles: @@ -33,12 +32,14 @@ class EnsembleFTS(fts.FTS): self.models = [] self.parameters = [] self.alpha = kwargs.get("alpha", 0.05) - self.max_order = 1 + self.order = 1 + self.point_method = kwargs.get('point_method', 'mean') + self.interval_method = kwargs.get('interval_method', 'quantile') def appendModel(self, model): self.models.append(model) - if model.order > self.max_order: - self.max_order = model.order + if model.order > self.order: + self.order = model.order def train(self, data, sets, order=1,parameters=None): self.original_max = max(data) @@ -49,31 +50,33 @@ class EnsembleFTS(fts.FTS): for model in self.models: sample = data[-model.order:] forecast = model.forecast(sample) - if isinstance(forecast, (list,np.ndarray)): + if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0: forecast = int(forecast[-1]) + elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: + forecast = np.nan tmp.append(forecast) return tmp - def get_point(self,method, forecasts, **kwargs): - if method == 'mean': + def get_point(self,forecasts, **kwargs): + if self.point_method == 'mean': ret = np.nanmean(forecasts) - elif method == 'median': + elif self.point_method == 'median': ret = np.nanpercentile(forecasts, 50) - elif method == 'quantile': + elif self.point_method == 'quantile': alpha = kwargs.get("alpha",0.05) ret = np.percentile(forecasts, alpha*100) return ret - def get_interval(self, method, forecasts): + def get_interval(self, forecasts): ret = [] - if method == 'extremum': + if self.interval_method == 'extremum': ret.append([min(forecasts), max(forecasts)]) - elif method == 'quantile': + elif self.interval_method == 'quantile': qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100) qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100) ret.append([qt_lo, qt_up]) - elif method == 'normal': + elif self.interval_method == 'normal': mu = np.nanmean(forecasts) sigma = np.sqrt(np.nanvar(forecasts)) ret.append(mu + st.norm.ppf(self.alpha) * sigma) @@ -83,61 +86,59 @@ class EnsembleFTS(fts.FTS): def forecast(self, data, **kwargs): - method = kwargs.get('method','mean') + if "method" in kwargs: + self.point_method = kwargs.get('method','mean') - ndata = np.array(self.doTransformations(data)) - - l = len(ndata) + l = len(data) ret = [] - for k in np.arange(self.max_order, l+1): - sample = ndata[k - self.max_order : k ] + for k in np.arange(self.order, l+1): + sample = data[k - self.order : k] tmp = self.get_models_forecasts(sample) - point = self.get_point(method, tmp) + point = self.get_point(tmp) ret.append(point) - ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) - return ret - def forecastInterval(self, data, **kwargs): - method = kwargs.get('method', 'extremum') + if "method" in kwargs: + self.interval_method = kwargs.get('method','quantile') if 'alpha' in kwargs: self.alpha = kwargs.get('alpha',0.05) - ndata = np.array(self.doTransformations(data)) - - l = len(ndata) + l = len(data) ret = [] - for k in np.arange(self.max_order, l+1): - sample = ndata[k - self.max_order : k ] + for k in np.arange(self.order, l+1): + sample = data[k - self.order : k] tmp = self.get_models_forecasts(sample) - interval = self.get_interval(method, tmp) + interval = self.get_interval(tmp) + if len(interval) == 1: + interval = interval[-1] ret.append(interval) return ret def forecastAheadInterval(self, data, steps, **kwargs): - method = kwargs.get('method', 'extremum') + if 'method' in kwargs: + self.interval_method = kwargs.get('method','quantile') ret = [] - samples = [[k,k] for k in data[-self.max_order:]] + samples = [[k,k] for k in data[-self.order:]] - for k in np.arange(self.max_order, steps): + for k in np.arange(self.order, steps+self.order): forecasts = [] - sample = samples[k - self.max_order : k] + sample = samples[k - self.order : k] lo_sample = [i[0] for i in sample] up_sample = [i[1] for i in sample] forecasts.extend(self.get_models_forecasts(lo_sample) ) forecasts.extend(self.get_models_forecasts(up_sample)) - interval = self.get_interval(method, forecasts) + interval = self.get_interval(forecasts) if len(interval) == 1: interval = interval[0] @@ -151,7 +152,8 @@ class EnsembleFTS(fts.FTS): return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) def forecastAheadDistribution(self, data, steps, **kwargs): - method = kwargs.get('method', 'extremum') + if 'method' in kwargs: + self.point_method = kwargs.get('method','mean') percentile_size = (self.original_max - self.original_min) / 100 @@ -163,12 +165,12 @@ class EnsembleFTS(fts.FTS): ret = [] - samples = [[k] for k in data[-self.max_order:]] + samples = [[k] for k in data[-self.order:]] - for k in np.arange(self.max_order, steps + self.max_order): + for k in np.arange(self.order, steps + self.order): forecasts = [] lags = {} - for i in np.arange(0, self.max_order): lags[i] = samples[k - self.max_order + i] + for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i] # Build the tree with all possible paths @@ -189,5 +191,39 @@ class EnsembleFTS(fts.FTS): ret.append(tmp / sum(tmp)) - return ret + grid = self.empty_grid(resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df + +class AllMethodEnsembleFTS(EnsembleFTS): + def __init__(self, **kwargs): + super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs) + self.min_order = 3 + + def set_transformations(self, model): + for t in self.transformations: + model.appendTransformation(t) + + def train(self, data, sets, order=1, parameters=None): + self.original_max = max(data) + self.original_min = min(data) + + fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, + sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS] + + ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS] + + for method in fo_methods: + model = method("") + self.set_transformations(model) + model.train(data, sets) + self.appendModel(model) + + for method in ho_methods: + for o in np.arange(1, order+1): + model = method("") + if model.min_order >= o: + self.set_transformations(model) + model.train(data, sets, order=o) + self.appendModel(model) diff --git a/tests/ensemble.py b/tests/ensemble.py index c6e6294..a397454 100644 --- a/tests/ensemble.py +++ b/tests/ensemble.py @@ -25,7 +25,7 @@ passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",") passengers = np.array(passengers["Passengers"]) -e = ensemble.EnsembleFTS("") +e = ensemble.AllMethodEnsembleFTS() fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS] @@ -34,6 +34,12 @@ ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS] fs = Grid.GridPartitioner(passengers, 10, transformation=diff) +e.appendTransformation(diff) + +e.train(passengers, fs.sets, order=3) + +""" + for method in fo_methods: model = method("") model.appendTransformation(diff) @@ -73,7 +79,7 @@ e.appendModel(arima201) e.train(passengers, None) -""" + _mean = e.forecast(passengers, method="mean") print(_mean) @@ -92,21 +98,25 @@ _normal = e.forecastInterval(passengers, method="normal", alpha=0.25) print(_normal) """ -""" -_extremum = e.forecastAheadInterval(passengers, 10, method="extremum") -print(_extremum) +#""" +#_extremum = e.forecastAheadInterval(passengers, 10, method="extremum") +#print(_extremum) -_quantile = e.forecastAheadInterval(passengers, 10, method="quantile", alpha=0.25) -print(_quantile) +#_quantile = e.forecastAheadInterval(passengers[:50], 40, method="quantile", alpha=0.25) +#print(_quantile) -_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25) -print(_normal) -""" +#_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25) +#print(_normal) +#""" -dist = e.forecastAheadDistribution(passengers, 20) +#dist = e.forecastAheadDistribution(passengers, 20) -print(dist) +#print(dist) + +bchmk.plot_compared_intervals_ahead(passengers[:120],[e], ['blue','red'], + distributions=[True,False], save=True, file="pictures/distribution_ahead_arma", + time_from=60, time_to=10, tam=[12,5])