- Ensemble FTS - first complete version
This commit is contained in:
parent
67358fa000
commit
aa2b5de18f
@ -19,7 +19,6 @@ class QuantileRegression(fts.FTS):
|
|||||||
self.has_point_forecasting = True
|
self.has_point_forecasting = True
|
||||||
self.has_interval_forecasting = True
|
self.has_interval_forecasting = True
|
||||||
self.has_probability_forecasting = True
|
self.has_probability_forecasting = True
|
||||||
self.has_probability_forecasting = True
|
|
||||||
self.benchmark_only = True
|
self.benchmark_only = True
|
||||||
self.minOrder = 1
|
self.minOrder = 1
|
||||||
self.alpha = kwargs.get("alpha", 0.05)
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
|
219
ensemble.py
219
ensemble.py
@ -9,11 +9,20 @@ from pyFTS.common import FLR, FuzzySet, SortedCollection
|
|||||||
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
||||||
from pyFTS.benchmarks import arima, quantreg
|
from pyFTS.benchmarks import arima, quantreg
|
||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
|
import scipy.stats as st
|
||||||
|
from pyFTS import tree
|
||||||
|
|
||||||
|
|
||||||
|
def sampler(data, quantiles):
|
||||||
|
ret = []
|
||||||
|
for qt in quantiles:
|
||||||
|
ret.append(np.nanpercentile(data, q=qt * 100))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
class EnsembleFTS(fts.FTS):
|
class EnsembleFTS(fts.FTS):
|
||||||
def __init__(self, name, **kwargs):
|
def __init__(self, name, **kwargs):
|
||||||
super(EnsembleFTS, self).__init__("Ensemble FTS")
|
super(EnsembleFTS, self).__init__(1, "Ensemble FTS")
|
||||||
self.shortname = "Ensemble FTS " + name
|
self.shortname = "Ensemble FTS " + name
|
||||||
self.name = "Ensemble FTS"
|
self.name = "Ensemble FTS"
|
||||||
self.flrgs = {}
|
self.flrgs = {}
|
||||||
@ -23,51 +32,54 @@ class EnsembleFTS(fts.FTS):
|
|||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
self.models = []
|
self.models = []
|
||||||
self.parameters = []
|
self.parameters = []
|
||||||
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
|
self.max_order = 1
|
||||||
|
|
||||||
def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None):
|
def appendModel(self, model):
|
||||||
|
self.models.append(model)
|
||||||
methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
|
if model.order > self.max_order:
|
||||||
ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS,
|
self.max_order = model.order
|
||||||
hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression]
|
|
||||||
|
|
||||||
transformations = [None, Transformations.Differential(1)]
|
|
||||||
|
|
||||||
for count, method in enumerate(methods, start=0):
|
|
||||||
mfts = method("")
|
|
||||||
if mfts.benchmark_only:
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.appendTransformation(transformation)
|
|
||||||
mfts.train(data,None, order=1, parameters=None)
|
|
||||||
self.models.append(mfts)
|
|
||||||
else:
|
|
||||||
for partition in partitions:
|
|
||||||
for partitioner in partitioners:
|
|
||||||
data_train_fs = partitioner(data, partition, transformation=transformation)
|
|
||||||
mfts = method("")
|
|
||||||
|
|
||||||
mfts.partitioner = data_train_fs
|
|
||||||
if not mfts.is_high_order:
|
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.appendTransformation(transformation)
|
|
||||||
|
|
||||||
mfts.train(data, data_train_fs.sets)
|
|
||||||
self.models.append(mfts)
|
|
||||||
else:
|
|
||||||
for order in np.arange(1, max_order + 1):
|
|
||||||
if order >= mfts.min_order:
|
|
||||||
mfts = method("")
|
|
||||||
mfts.partitioner = data_train_fs
|
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.appendTransformation(transformation)
|
|
||||||
|
|
||||||
mfts.train(data, data_train_fs.sets, order=order)
|
|
||||||
self.models.append(mfts)
|
|
||||||
|
|
||||||
def train(self, data, sets, order=1,parameters=None):
|
def train(self, data, sets, order=1,parameters=None):
|
||||||
#if self.models is None:
|
self.original_max = max(data)
|
||||||
pass
|
self.original_min = min(data)
|
||||||
|
|
||||||
|
def get_models_forecasts(self,data):
|
||||||
|
tmp = []
|
||||||
|
for model in self.models:
|
||||||
|
sample = data[-model.order:]
|
||||||
|
forecast = model.forecast(sample)
|
||||||
|
if isinstance(forecast, (list,np.ndarray)):
|
||||||
|
forecast = int(forecast[-1])
|
||||||
|
tmp.append(forecast)
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
def get_point(self,method, forecasts, **kwargs):
|
||||||
|
if method == 'mean':
|
||||||
|
ret = np.nanmean(forecasts)
|
||||||
|
elif method == 'median':
|
||||||
|
ret = np.nanpercentile(forecasts, 50)
|
||||||
|
elif method == 'quantile':
|
||||||
|
alpha = kwargs.get("alpha",0.05)
|
||||||
|
ret = np.percentile(forecasts, alpha*100)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def get_interval(self, method, forecasts):
|
||||||
|
ret = []
|
||||||
|
if method == 'extremum':
|
||||||
|
ret.append([min(forecasts), max(forecasts)])
|
||||||
|
elif method == 'quantile':
|
||||||
|
qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100)
|
||||||
|
qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100)
|
||||||
|
ret.append([qt_lo, qt_up])
|
||||||
|
elif method == 'normal':
|
||||||
|
mu = np.nanmean(forecasts)
|
||||||
|
sigma = np.sqrt(np.nanvar(forecasts))
|
||||||
|
ret.append(mu + st.norm.ppf(self.alpha) * sigma)
|
||||||
|
ret.append(mu + st.norm.ppf(1 - self.alpha) * sigma)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def forecast(self, data, **kwargs):
|
def forecast(self, data, **kwargs):
|
||||||
|
|
||||||
@ -76,83 +88,106 @@ class EnsembleFTS(fts.FTS):
|
|||||||
ndata = np.array(self.doTransformations(data))
|
ndata = np.array(self.doTransformations(data))
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for k in np.arange(0, l+1):
|
for k in np.arange(self.max_order, l+1):
|
||||||
tmp = []
|
sample = ndata[k - self.max_order : k ]
|
||||||
for model in self.models:
|
tmp = self.get_models_forecasts(sample)
|
||||||
if k >= model.minOrder - 1:
|
point = self.get_point(method, tmp)
|
||||||
sample = ndata[k - model.order : k]
|
ret.append(point)
|
||||||
tmp.append( model.forecast(sample) )
|
|
||||||
if method == 'mean':
|
|
||||||
ret.append( np.nanmean(tmp))
|
|
||||||
elif method == 'median':
|
|
||||||
ret.append(np.percentile(tmp,50))
|
|
||||||
|
|
||||||
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def forecastInterval(self, data, **kwargs):
|
def forecastInterval(self, data, **kwargs):
|
||||||
|
|
||||||
method = kwargs.get('method', 'extremum')
|
method = kwargs.get('method', 'extremum')
|
||||||
|
|
||||||
|
if 'alpha' in kwargs:
|
||||||
|
self.alpha = kwargs.get('alpha',0.05)
|
||||||
|
|
||||||
ndata = np.array(self.doTransformations(data))
|
ndata = np.array(self.doTransformations(data))
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(self.max_order, l+1):
|
||||||
tmp = []
|
sample = ndata[k - self.max_order : k ]
|
||||||
for model in self.models:
|
tmp = self.get_models_forecasts(sample)
|
||||||
if k >= model.minOrder - 1:
|
interval = self.get_interval(method, tmp)
|
||||||
sample = ndata[k - model.order : k]
|
ret.append(interval)
|
||||||
tmp.append( model.forecast(sample) )
|
|
||||||
if method == 'extremum':
|
|
||||||
ret.append( [ min(tmp), max(tmp) ] )
|
|
||||||
elif method == 'quantile':
|
|
||||||
q = kwargs.get('q', [.05, .95])
|
|
||||||
ret.append(np.percentile(tmp,q=q*100))
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecastAhead(self, data, steps, **kwargs):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def forecastAheadInterval(self, data, steps, **kwargs):
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
||||||
pass
|
|
||||||
|
|
||||||
|
method = kwargs.get('method', 'extremum')
|
||||||
|
|
||||||
def getGridClean(self, resolution):
|
ret = []
|
||||||
grid = {}
|
|
||||||
|
|
||||||
if len(self.transformations) == 0:
|
samples = [[k,k] for k in data[-self.max_order:]]
|
||||||
_min = self.sets[0].lower
|
|
||||||
_max = self.sets[-1].upper
|
|
||||||
else:
|
|
||||||
_min = self.original_min
|
|
||||||
_max = self.original_max
|
|
||||||
|
|
||||||
for sbin in np.arange(_min,_max, resolution):
|
for k in np.arange(self.max_order, steps):
|
||||||
grid[sbin] = 0
|
forecasts = []
|
||||||
|
sample = samples[k - self.max_order : k]
|
||||||
|
lo_sample = [i[0] for i in sample]
|
||||||
|
up_sample = [i[1] for i in sample]
|
||||||
|
forecasts.extend(self.get_models_forecasts(lo_sample) )
|
||||||
|
forecasts.extend(self.get_models_forecasts(up_sample))
|
||||||
|
interval = self.get_interval(method, forecasts)
|
||||||
|
|
||||||
return grid
|
if len(interval) == 1:
|
||||||
|
interval = interval[0]
|
||||||
|
|
||||||
def gridCount(self, grid, resolution, index, interval):
|
ret.append(interval)
|
||||||
#print(point_to_interval)
|
samples.append(interval)
|
||||||
for k in index.inside(interval[0],interval[1]):
|
|
||||||
#print(k)
|
|
||||||
grid[k] += 1
|
|
||||||
return grid
|
|
||||||
|
|
||||||
def gridCountPoint(self, grid, resolution, index, point):
|
return ret
|
||||||
k = index.find_ge(point)
|
|
||||||
# print(k)
|
def empty_grid(self, resolution):
|
||||||
grid[k] += 1
|
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
||||||
return grid
|
|
||||||
|
|
||||||
def forecastAheadDistribution(self, data, steps, **kwargs):
|
def forecastAheadDistribution(self, data, steps, **kwargs):
|
||||||
pass
|
method = kwargs.get('method', 'extremum')
|
||||||
|
|
||||||
|
percentile_size = (self.original_max - self.original_min) / 100
|
||||||
|
|
||||||
|
resolution = kwargs.get('resolution', percentile_size)
|
||||||
|
|
||||||
|
grid = self.empty_grid(resolution)
|
||||||
|
|
||||||
|
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
samples = [[k] for k in data[-self.max_order:]]
|
||||||
|
|
||||||
|
for k in np.arange(self.max_order, steps + self.max_order):
|
||||||
|
forecasts = []
|
||||||
|
lags = {}
|
||||||
|
for i in np.arange(0, self.max_order): lags[i] = samples[k - self.max_order + i]
|
||||||
|
|
||||||
|
# Build the tree with all possible paths
|
||||||
|
|
||||||
|
root = tree.FLRGTreeNode(None)
|
||||||
|
|
||||||
|
tree.buildTreeWithoutOrder(root, lags, 0)
|
||||||
|
|
||||||
|
for p in root.paths():
|
||||||
|
path = list(reversed(list(filter(None.__ne__, p))))
|
||||||
|
|
||||||
|
forecasts.extend(self.get_models_forecasts(path))
|
||||||
|
|
||||||
|
samples.append(sampler(forecasts, [0.05, 0.25, 0.5, 0.75, 0.95 ]))
|
||||||
|
|
||||||
|
grid = self.gridCountPoint(grid, resolution, index, forecasts)
|
||||||
|
|
||||||
|
tmp = np.array([grid[i] for i in sorted(grid)])
|
||||||
|
|
||||||
|
ret.append(tmp / sum(tmp))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
9
fts.py
9
fts.py
@ -210,13 +210,15 @@ class FTS(object):
|
|||||||
def gridCount(self, grid, resolution, index, interval):
|
def gridCount(self, grid, resolution, index, interval):
|
||||||
#print(point_to_interval)
|
#print(point_to_interval)
|
||||||
for k in index.inside(interval[0],interval[1]):
|
for k in index.inside(interval[0],interval[1]):
|
||||||
#print(k)
|
|
||||||
grid[k] += 1
|
grid[k] += 1
|
||||||
return grid
|
return grid
|
||||||
|
|
||||||
def gridCountPoint(self, grid, resolution, index, point):
|
def gridCountPoint(self, grid, resolution, index, point):
|
||||||
k = index.find_ge(point)
|
if not isinstance(point, (list, np.ndarray)):
|
||||||
# print(k)
|
point = [point]
|
||||||
|
|
||||||
|
for p in point:
|
||||||
|
k = index.find_ge(p)
|
||||||
grid[k] += 1
|
grid[k] += 1
|
||||||
return grid
|
return grid
|
||||||
|
|
||||||
@ -225,4 +227,3 @@ class FTS(object):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
112
tests/ensemble.py
Normal file
112
tests/ensemble.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib as plt
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
|
||||||
|
from pyFTS.common import FLR,FuzzySet,Membership,Transformations
|
||||||
|
from pyFTS import fts,hofts,ifts,pwfts,tree, chen, ensemble, song, yu, cheng, ismailefendi, sadaei, hwang
|
||||||
|
from pyFTS.benchmarks import naive, arima
|
||||||
|
from numpy import random
|
||||||
|
from pyFTS.benchmarks import benchmarks as bchmk
|
||||||
|
from pyFTS.benchmarks import arima, quantreg, Measures
|
||||||
|
|
||||||
|
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
||||||
|
|
||||||
|
diff = Transformations.Differential(1)
|
||||||
|
|
||||||
|
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
|
||||||
|
passengers = np.array(passengers["Passengers"])
|
||||||
|
|
||||||
|
|
||||||
|
e = ensemble.EnsembleFTS("")
|
||||||
|
|
||||||
|
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS,
|
||||||
|
ismailefendi.ImprovedWeightedFTS]
|
||||||
|
|
||||||
|
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
|
||||||
|
|
||||||
|
fs = Grid.GridPartitioner(passengers, 10, transformation=diff)
|
||||||
|
|
||||||
|
for method in fo_methods:
|
||||||
|
model = method("")
|
||||||
|
model.appendTransformation(diff)
|
||||||
|
model.train(passengers, fs.sets)
|
||||||
|
e.appendModel(model)
|
||||||
|
|
||||||
|
|
||||||
|
for method in ho_methods:
|
||||||
|
for order in [1,2,3]:
|
||||||
|
model = method("")
|
||||||
|
model.appendTransformation(diff)
|
||||||
|
model.train(passengers, fs.sets, order=order)
|
||||||
|
e.appendModel(model)
|
||||||
|
|
||||||
|
|
||||||
|
arima100 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima100.train(passengers, None, order=(1,0,0))
|
||||||
|
|
||||||
|
arima101 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima101.train(passengers, None, order=(1,0,1))
|
||||||
|
|
||||||
|
arima200 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima200.train(passengers, None, order=(2,0,0))
|
||||||
|
|
||||||
|
arima201 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima201.train(passengers, None, order=(2,0,1))
|
||||||
|
|
||||||
|
|
||||||
|
e.appendModel(arima100)
|
||||||
|
e.appendModel(arima101)
|
||||||
|
e.appendModel(arima200)
|
||||||
|
e.appendModel(arima201)
|
||||||
|
|
||||||
|
e.train(passengers, None)
|
||||||
|
|
||||||
|
"""
|
||||||
|
_mean = e.forecast(passengers, method="mean")
|
||||||
|
print(_mean)
|
||||||
|
|
||||||
|
_median = e.forecast(passengers, method="median")
|
||||||
|
print(_median)
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
_extremum = e.forecastInterval(passengers, method="extremum")
|
||||||
|
print(_extremum)
|
||||||
|
|
||||||
|
_quantile = e.forecastInterval(passengers, method="quantile", alpha=0.25)
|
||||||
|
print(_quantile)
|
||||||
|
|
||||||
|
|
||||||
|
_normal = e.forecastInterval(passengers, method="normal", alpha=0.25)
|
||||||
|
print(_normal)
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
_extremum = e.forecastAheadInterval(passengers, 10, method="extremum")
|
||||||
|
print(_extremum)
|
||||||
|
|
||||||
|
_quantile = e.forecastAheadInterval(passengers, 10, method="quantile", alpha=0.25)
|
||||||
|
print(_quantile)
|
||||||
|
|
||||||
|
|
||||||
|
_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25)
|
||||||
|
print(_normal)
|
||||||
|
"""
|
||||||
|
|
||||||
|
dist = e.forecastAheadDistribution(passengers, 20)
|
||||||
|
|
||||||
|
print(dist)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,10 +69,21 @@ from pyFTS.benchmarks import arima, quantreg, Measures
|
|||||||
|
|
||||||
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
|
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
|
||||||
#"""
|
#"""
|
||||||
tmp = arima.ARIMA("", alpha=0.25)
|
arima100 = arima.ARIMA("", alpha=0.25)
|
||||||
#tmp.appendTransformation(diff)
|
#tmp.appendTransformation(diff)
|
||||||
tmp.train(enrollments, None, order=(1,0,0))
|
arima100.train(passengers, None, order=(1,0,0))
|
||||||
teste = tmp.forecastInterval(enrollments)
|
|
||||||
|
arima101 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima101.train(passengers, None, order=(1,0,1))
|
||||||
|
|
||||||
|
arima200 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima200.train(passengers, None, order=(2,0,0))
|
||||||
|
|
||||||
|
arima201 = arima.ARIMA("", alpha=0.25)
|
||||||
|
#tmp.appendTransformation(diff)
|
||||||
|
arima201.train(passengers, None, order=(2,0,1))
|
||||||
|
|
||||||
|
|
||||||
#tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)
|
#tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)
|
||||||
|
12
tree.py
12
tree.py
@ -51,3 +51,15 @@ def flat(dados):
|
|||||||
yield k
|
yield k
|
||||||
else:
|
else:
|
||||||
yield inst
|
yield inst
|
||||||
|
|
||||||
|
|
||||||
|
def buildTreeWithoutOrder(node, lags, level):
|
||||||
|
|
||||||
|
if level not in lags:
|
||||||
|
return
|
||||||
|
|
||||||
|
for s in lags[level]:
|
||||||
|
node.appendChild(FLRGTreeNode(s))
|
||||||
|
|
||||||
|
for child in node.getChildren():
|
||||||
|
buildTreeWithoutOrder(child, lags, level + 1)
|
Loading…
Reference in New Issue
Block a user