2017-04-13 19:36:22 +04:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf8 -*-
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import math
|
|
|
|
from operator import itemgetter
|
|
|
|
from pyFTS.common import FLR, FuzzySet, SortedCollection
|
2017-05-20 20:43:39 +04:00
|
|
|
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu, sfts
|
2017-05-17 17:45:10 +04:00
|
|
|
from pyFTS.benchmarks import arima, quantreg
|
|
|
|
from pyFTS.common import Transformations
|
2017-05-17 23:58:51 +04:00
|
|
|
import scipy.stats as st
|
|
|
|
from pyFTS import tree
|
2017-07-02 02:42:45 +04:00
|
|
|
from pyFTS.models import msfts
|
2017-07-04 01:39:10 +04:00
|
|
|
from pyFTS.probabilistic import ProbabilityDistribution, kde
|
2017-05-17 23:58:51 +04:00
|
|
|
|
|
|
|
def sampler(data, quantiles):
|
|
|
|
ret = []
|
|
|
|
for qt in quantiles:
|
|
|
|
ret.append(np.nanpercentile(data, q=qt * 100))
|
|
|
|
return ret
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-04-13 19:36:22 +04:00
|
|
|
class EnsembleFTS(fts.FTS):
|
2017-05-03 00:16:49 +04:00
|
|
|
def __init__(self, name, **kwargs):
|
2017-05-17 23:58:51 +04:00
|
|
|
super(EnsembleFTS, self).__init__(1, "Ensemble FTS")
|
2017-04-13 19:36:22 +04:00
|
|
|
self.shortname = "Ensemble FTS " + name
|
|
|
|
self.name = "Ensemble FTS"
|
|
|
|
self.flrgs = {}
|
2017-05-02 18:32:03 +04:00
|
|
|
self.has_point_forecasting = True
|
|
|
|
self.has_interval_forecasting = True
|
|
|
|
self.has_probability_forecasting = True
|
|
|
|
self.is_high_order = True
|
2017-04-13 19:36:22 +04:00
|
|
|
self.models = []
|
|
|
|
self.parameters = []
|
2017-05-17 23:58:51 +04:00
|
|
|
self.alpha = kwargs.get("alpha", 0.05)
|
2017-05-18 21:20:12 +04:00
|
|
|
self.order = 1
|
|
|
|
self.point_method = kwargs.get('point_method', 'mean')
|
|
|
|
self.interval_method = kwargs.get('interval_method', 'quantile')
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
def appendModel(self, model):
|
|
|
|
self.models.append(model)
|
2017-05-18 21:20:12 +04:00
|
|
|
if model.order > self.order:
|
|
|
|
self.order = model.order
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
def train(self, data, sets, order=1,parameters=None):
|
|
|
|
self.original_max = max(data)
|
|
|
|
self.original_min = min(data)
|
|
|
|
|
|
|
|
def get_models_forecasts(self,data):
|
|
|
|
tmp = []
|
|
|
|
for model in self.models:
|
|
|
|
sample = data[-model.order:]
|
|
|
|
forecast = model.forecast(sample)
|
2017-05-18 21:20:12 +04:00
|
|
|
if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
|
2017-05-17 23:58:51 +04:00
|
|
|
forecast = int(forecast[-1])
|
2017-05-18 21:20:12 +04:00
|
|
|
elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0:
|
|
|
|
forecast = np.nan
|
2017-05-17 23:58:51 +04:00
|
|
|
tmp.append(forecast)
|
|
|
|
return tmp
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
def get_point(self,forecasts, **kwargs):
|
|
|
|
if self.point_method == 'mean':
|
2017-05-17 23:58:51 +04:00
|
|
|
ret = np.nanmean(forecasts)
|
2017-05-18 21:20:12 +04:00
|
|
|
elif self.point_method == 'median':
|
2017-05-17 23:58:51 +04:00
|
|
|
ret = np.nanpercentile(forecasts, 50)
|
2017-05-18 21:20:12 +04:00
|
|
|
elif self.point_method == 'quantile':
|
2017-05-17 23:58:51 +04:00
|
|
|
alpha = kwargs.get("alpha",0.05)
|
|
|
|
ret = np.percentile(forecasts, alpha*100)
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
return ret
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
def get_interval(self, forecasts):
|
2017-05-17 23:58:51 +04:00
|
|
|
ret = []
|
2017-05-18 21:20:12 +04:00
|
|
|
if self.interval_method == 'extremum':
|
2017-05-17 23:58:51 +04:00
|
|
|
ret.append([min(forecasts), max(forecasts)])
|
2017-05-18 21:20:12 +04:00
|
|
|
elif self.interval_method == 'quantile':
|
2017-05-17 23:58:51 +04:00
|
|
|
qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100)
|
|
|
|
qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100)
|
|
|
|
ret.append([qt_lo, qt_up])
|
2017-05-18 21:20:12 +04:00
|
|
|
elif self.interval_method == 'normal':
|
2017-05-17 23:58:51 +04:00
|
|
|
mu = np.nanmean(forecasts)
|
|
|
|
sigma = np.sqrt(np.nanvar(forecasts))
|
|
|
|
ret.append(mu + st.norm.ppf(self.alpha) * sigma)
|
|
|
|
ret.append(mu + st.norm.ppf(1 - self.alpha) * sigma)
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
return ret
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecast(self, data, **kwargs):
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
if "method" in kwargs:
|
|
|
|
self.point_method = kwargs.get('method','mean')
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
l = len(data)
|
2017-04-13 19:36:22 +04:00
|
|
|
ret = []
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
for k in np.arange(self.order, l+1):
|
|
|
|
sample = data[k - self.order : k]
|
2017-05-17 23:58:51 +04:00
|
|
|
tmp = self.get_models_forecasts(sample)
|
2017-05-18 21:20:12 +04:00
|
|
|
point = self.get_point(tmp)
|
2017-05-17 23:58:51 +04:00
|
|
|
ret.append(point)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
|
|
|
return ret
|
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecastInterval(self, data, **kwargs):
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
if "method" in kwargs:
|
|
|
|
self.interval_method = kwargs.get('method','quantile')
|
2017-04-15 03:48:53 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
if 'alpha' in kwargs:
|
|
|
|
self.alpha = kwargs.get('alpha',0.05)
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
l = len(data)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
for k in np.arange(self.order, l+1):
|
|
|
|
sample = data[k - self.order : k]
|
2017-05-17 23:58:51 +04:00
|
|
|
tmp = self.get_models_forecasts(sample)
|
2017-05-18 21:20:12 +04:00
|
|
|
interval = self.get_interval(tmp)
|
|
|
|
if len(interval) == 1:
|
|
|
|
interval = interval[-1]
|
2017-05-17 23:58:51 +04:00
|
|
|
ret.append(interval)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
|
|
|
return ret
|
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
if 'method' in kwargs:
|
|
|
|
self.interval_method = kwargs.get('method','quantile')
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-20 20:43:39 +04:00
|
|
|
if 'alpha' in kwargs:
|
|
|
|
self.alpha = kwargs.get('alpha', self.alpha)
|
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
ret = []
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-20 20:43:39 +04:00
|
|
|
samples = [[k] for k in data[-self.order:]]
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-20 20:43:39 +04:00
|
|
|
for k in np.arange(self.order, steps + self.order):
|
2017-05-17 23:58:51 +04:00
|
|
|
forecasts = []
|
2017-05-20 20:43:39 +04:00
|
|
|
lags = {}
|
|
|
|
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
|
|
|
|
|
|
|
|
# Build the tree with all possible paths
|
|
|
|
|
|
|
|
root = tree.FLRGTreeNode(None)
|
|
|
|
|
|
|
|
tree.buildTreeWithoutOrder(root, lags, 0)
|
|
|
|
|
|
|
|
for p in root.paths():
|
|
|
|
path = list(reversed(list(filter(None.__ne__, p))))
|
|
|
|
|
|
|
|
forecasts.extend(self.get_models_forecasts(path))
|
|
|
|
|
|
|
|
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.2)))
|
2017-05-18 21:20:12 +04:00
|
|
|
interval = self.get_interval(forecasts)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
if len(interval) == 1:
|
|
|
|
interval = interval[0]
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
ret.append(interval)
|
|
|
|
|
|
|
|
return ret
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-17 23:58:51 +04:00
|
|
|
def empty_grid(self, resolution):
|
|
|
|
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-04-15 02:57:39 +04:00
|
|
|
def forecastAheadDistribution(self, data, steps, **kwargs):
|
2017-05-18 21:20:12 +04:00
|
|
|
if 'method' in kwargs:
|
|
|
|
self.point_method = kwargs.get('method','mean')
|
2017-05-17 23:58:51 +04:00
|
|
|
|
|
|
|
percentile_size = (self.original_max - self.original_min) / 100
|
|
|
|
|
|
|
|
resolution = kwargs.get('resolution', percentile_size)
|
|
|
|
|
|
|
|
grid = self.empty_grid(resolution)
|
|
|
|
|
|
|
|
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
samples = [[k] for k in data[-self.order:]]
|
2017-05-17 23:58:51 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
for k in np.arange(self.order, steps + self.order):
|
2017-05-17 23:58:51 +04:00
|
|
|
forecasts = []
|
|
|
|
lags = {}
|
2017-05-18 21:20:12 +04:00
|
|
|
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
|
2017-05-17 23:58:51 +04:00
|
|
|
|
|
|
|
# Build the tree with all possible paths
|
|
|
|
|
|
|
|
root = tree.FLRGTreeNode(None)
|
|
|
|
|
|
|
|
tree.buildTreeWithoutOrder(root, lags, 0)
|
|
|
|
|
|
|
|
for p in root.paths():
|
|
|
|
path = list(reversed(list(filter(None.__ne__, p))))
|
|
|
|
|
|
|
|
forecasts.extend(self.get_models_forecasts(path))
|
|
|
|
|
2017-05-20 20:43:39 +04:00
|
|
|
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
|
2017-05-17 23:58:51 +04:00
|
|
|
|
|
|
|
grid = self.gridCountPoint(grid, resolution, index, forecasts)
|
|
|
|
|
|
|
|
tmp = np.array([grid[i] for i in sorted(grid)])
|
|
|
|
|
|
|
|
ret.append(tmp / sum(tmp))
|
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
grid = self.empty_grid(resolution)
|
|
|
|
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
class AllMethodEnsembleFTS(EnsembleFTS):
|
2017-05-20 20:43:39 +04:00
|
|
|
def __init__(self, name, **kwargs):
|
2017-05-18 21:20:12 +04:00
|
|
|
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS", **kwargs)
|
|
|
|
self.min_order = 3
|
|
|
|
|
|
|
|
def set_transformations(self, model):
|
|
|
|
for t in self.transformations:
|
|
|
|
model.appendTransformation(t)
|
|
|
|
|
|
|
|
def train(self, data, sets, order=1, parameters=None):
|
|
|
|
self.original_max = max(data)
|
|
|
|
self.original_min = min(data)
|
|
|
|
|
|
|
|
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
|
2017-05-20 20:43:39 +04:00
|
|
|
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS, sfts.SeasonalFTS]
|
2017-05-18 21:20:12 +04:00
|
|
|
|
|
|
|
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
|
|
|
|
|
|
|
|
for method in fo_methods:
|
|
|
|
model = method("")
|
|
|
|
self.set_transformations(model)
|
|
|
|
model.train(data, sets)
|
|
|
|
self.appendModel(model)
|
2017-04-13 19:36:22 +04:00
|
|
|
|
2017-05-18 21:20:12 +04:00
|
|
|
for method in ho_methods:
|
|
|
|
for o in np.arange(1, order+1):
|
|
|
|
model = method("")
|
|
|
|
if model.min_order >= o:
|
|
|
|
self.set_transformations(model)
|
|
|
|
model.train(data, sets, order=o)
|
|
|
|
self.appendModel(model)
|
2017-05-20 20:43:39 +04:00
|
|
|
|
|
|
|
|
2017-07-02 02:42:45 +04:00
|
|
|
|
|
|
|
|