- Ensemble FTS - first complete version

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-17 16:58:51 -03:00
parent 67358fa000
commit aa2b5de18f
6 changed files with 271 additions and 101 deletions

View File

@ -19,7 +19,6 @@ class QuantileRegression(fts.FTS):
self.has_point_forecasting = True self.has_point_forecasting = True
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.has_probability_forecasting = True self.has_probability_forecasting = True
self.has_probability_forecasting = True
self.benchmark_only = True self.benchmark_only = True
self.minOrder = 1 self.minOrder = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)

View File

@ -9,11 +9,20 @@ from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
from pyFTS.benchmarks import arima, quantreg from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations from pyFTS.common import Transformations
import scipy.stats as st
from pyFTS import tree
def sampler(data, quantiles):
ret = []
for qt in quantiles:
ret.append(np.nanpercentile(data, q=qt * 100))
return ret
class EnsembleFTS(fts.FTS): class EnsembleFTS(fts.FTS):
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(EnsembleFTS, self).__init__("Ensemble FTS") super(EnsembleFTS, self).__init__(1, "Ensemble FTS")
self.shortname = "Ensemble FTS " + name self.shortname = "Ensemble FTS " + name
self.name = "Ensemble FTS" self.name = "Ensemble FTS"
self.flrgs = {} self.flrgs = {}
@ -23,51 +32,54 @@ class EnsembleFTS(fts.FTS):
self.is_high_order = True self.is_high_order = True
self.models = [] self.models = []
self.parameters = [] self.parameters = []
self.alpha = kwargs.get("alpha", 0.05)
self.max_order = 1
def build(self, data, models, partitioners, partitions, max_order=3, transformation=None, indexer=None): def appendModel(self, model):
self.models.append(model)
methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, if model.order > self.max_order:
ismailefendi.ImprovedWeightedFTS, sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS, self.max_order = model.order
hofts.HighOrderFTS, arima.ARIMA, quantreg.QuantileRegression]
transformations = [None, Transformations.Differential(1)]
for count, method in enumerate(methods, start=0):
mfts = method("")
if mfts.benchmark_only:
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data,None, order=1, parameters=None)
self.models.append(mfts)
else:
for partition in partitions:
for partitioner in partitioners:
data_train_fs = partitioner(data, partition, transformation=transformation)
mfts = method("")
mfts.partitioner = data_train_fs
if not mfts.is_high_order:
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data, data_train_fs.sets)
self.models.append(mfts)
else:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = method("")
mfts.partitioner = data_train_fs
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data, data_train_fs.sets, order=order)
self.models.append(mfts)
def train(self, data, sets, order=1,parameters=None): def train(self, data, sets, order=1,parameters=None):
#if self.models is None: self.original_max = max(data)
pass self.original_min = min(data)
def get_models_forecasts(self,data):
tmp = []
for model in self.models:
sample = data[-model.order:]
forecast = model.forecast(sample)
if isinstance(forecast, (list,np.ndarray)):
forecast = int(forecast[-1])
tmp.append(forecast)
return tmp
def get_point(self,method, forecasts, **kwargs):
if method == 'mean':
ret = np.nanmean(forecasts)
elif method == 'median':
ret = np.nanpercentile(forecasts, 50)
elif method == 'quantile':
alpha = kwargs.get("alpha",0.05)
ret = np.percentile(forecasts, alpha*100)
return ret
def get_interval(self, method, forecasts):
ret = []
if method == 'extremum':
ret.append([min(forecasts), max(forecasts)])
elif method == 'quantile':
qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100)
qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100)
ret.append([qt_lo, qt_up])
elif method == 'normal':
mu = np.nanmean(forecasts)
sigma = np.sqrt(np.nanvar(forecasts))
ret.append(mu + st.norm.ppf(self.alpha) * sigma)
ret.append(mu + st.norm.ppf(1 - self.alpha) * sigma)
return ret
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
@ -76,83 +88,106 @@ class EnsembleFTS(fts.FTS):
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
ret = [] ret = []
for k in np.arange(0, l+1): for k in np.arange(self.max_order, l+1):
tmp = [] sample = ndata[k - self.max_order : k ]
for model in self.models: tmp = self.get_models_forecasts(sample)
if k >= model.minOrder - 1: point = self.get_point(method, tmp)
sample = ndata[k - model.order : k] ret.append(point)
tmp.append( model.forecast(sample) )
if method == 'mean':
ret.append( np.nanmean(tmp))
elif method == 'median':
ret.append(np.percentile(tmp,50))
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
return ret return ret
def forecastInterval(self, data, **kwargs): def forecastInterval(self, data, **kwargs):
method = kwargs.get('method', 'extremum') method = kwargs.get('method', 'extremum')
if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha',0.05)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
ret = [] ret = []
for k in np.arange(0, l): for k in np.arange(self.max_order, l+1):
tmp = [] sample = ndata[k - self.max_order : k ]
for model in self.models: tmp = self.get_models_forecasts(sample)
if k >= model.minOrder - 1: interval = self.get_interval(method, tmp)
sample = ndata[k - model.order : k] ret.append(interval)
tmp.append( model.forecast(sample) )
if method == 'extremum':
ret.append( [ min(tmp), max(tmp) ] )
elif method == 'quantile':
q = kwargs.get('q', [.05, .95])
ret.append(np.percentile(tmp,q=q*100))
return ret return ret
def forecastAhead(self, data, steps, **kwargs):
pass
def forecastAheadInterval(self, data, steps, **kwargs): def forecastAheadInterval(self, data, steps, **kwargs):
pass
method = kwargs.get('method', 'extremum')
def getGridClean(self, resolution): ret = []
grid = {}
if len(self.transformations) == 0: samples = [[k,k] for k in data[-self.max_order:]]
_min = self.sets[0].lower
_max = self.sets[-1].upper
else:
_min = self.original_min
_max = self.original_max
for sbin in np.arange(_min,_max, resolution): for k in np.arange(self.max_order, steps):
grid[sbin] = 0 forecasts = []
sample = samples[k - self.max_order : k]
lo_sample = [i[0] for i in sample]
up_sample = [i[1] for i in sample]
forecasts.extend(self.get_models_forecasts(lo_sample) )
forecasts.extend(self.get_models_forecasts(up_sample))
interval = self.get_interval(method, forecasts)
return grid if len(interval) == 1:
interval = interval[0]
def gridCount(self, grid, resolution, index, interval): ret.append(interval)
#print(point_to_interval) samples.append(interval)
for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1
return grid
def gridCountPoint(self, grid, resolution, index, point): return ret
k = index.find_ge(point)
# print(k) def empty_grid(self, resolution):
grid[k] += 1 return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
return grid
def forecastAheadDistribution(self, data, steps, **kwargs): def forecastAheadDistribution(self, data, steps, **kwargs):
pass method = kwargs.get('method', 'extremum')
percentile_size = (self.original_max - self.original_min) / 100
resolution = kwargs.get('resolution', percentile_size)
grid = self.empty_grid(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
ret = []
samples = [[k] for k in data[-self.max_order:]]
for k in np.arange(self.max_order, steps + self.max_order):
forecasts = []
lags = {}
for i in np.arange(0, self.max_order): lags[i] = samples[k - self.max_order + i]
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
tree.buildTreeWithoutOrder(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
forecasts.extend(self.get_models_forecasts(path))
samples.append(sampler(forecasts, [0.05, 0.25, 0.5, 0.75, 0.95 ]))
grid = self.gridCountPoint(grid, resolution, index, forecasts)
tmp = np.array([grid[i] for i in sorted(grid)])
ret.append(tmp / sum(tmp))
return ret

9
fts.py
View File

@ -210,13 +210,15 @@ class FTS(object):
def gridCount(self, grid, resolution, index, interval): def gridCount(self, grid, resolution, index, interval):
#print(point_to_interval) #print(point_to_interval)
for k in index.inside(interval[0],interval[1]): for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1 grid[k] += 1
return grid return grid
def gridCountPoint(self, grid, resolution, index, point): def gridCountPoint(self, grid, resolution, index, point):
k = index.find_ge(point) if not isinstance(point, (list, np.ndarray)):
# print(k) point = [point]
for p in point:
k = index.find_ge(p)
grid[k] += 1 grid[k] += 1
return grid return grid
@ -225,4 +227,3 @@ class FTS(object):

112
tests/ensemble.py Normal file
View File

@ -0,0 +1,112 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import os
import numpy as np
import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
from pyFTS.common import FLR,FuzzySet,Membership,Transformations
from pyFTS import fts,hofts,ifts,pwfts,tree, chen, ensemble, song, yu, cheng, ismailefendi, sadaei, hwang
from pyFTS.benchmarks import naive, arima
from numpy import random
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import arima, quantreg, Measures
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
diff = Transformations.Differential(1)
passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",")
passengers = np.array(passengers["Passengers"])
e = ensemble.EnsembleFTS("")
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS,
ismailefendi.ImprovedWeightedFTS]
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
fs = Grid.GridPartitioner(passengers, 10, transformation=diff)
for method in fo_methods:
model = method("")
model.appendTransformation(diff)
model.train(passengers, fs.sets)
e.appendModel(model)
for method in ho_methods:
for order in [1,2,3]:
model = method("")
model.appendTransformation(diff)
model.train(passengers, fs.sets, order=order)
e.appendModel(model)
arima100 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima100.train(passengers, None, order=(1,0,0))
arima101 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima101.train(passengers, None, order=(1,0,1))
arima200 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima200.train(passengers, None, order=(2,0,0))
arima201 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima201.train(passengers, None, order=(2,0,1))
e.appendModel(arima100)
e.appendModel(arima101)
e.appendModel(arima200)
e.appendModel(arima201)
e.train(passengers, None)
"""
_mean = e.forecast(passengers, method="mean")
print(_mean)
_median = e.forecast(passengers, method="median")
print(_median)
"""
"""
_extremum = e.forecastInterval(passengers, method="extremum")
print(_extremum)
_quantile = e.forecastInterval(passengers, method="quantile", alpha=0.25)
print(_quantile)
_normal = e.forecastInterval(passengers, method="normal", alpha=0.25)
print(_normal)
"""
"""
_extremum = e.forecastAheadInterval(passengers, 10, method="extremum")
print(_extremum)
_quantile = e.forecastAheadInterval(passengers, 10, method="quantile", alpha=0.25)
print(_quantile)
_normal = e.forecastAheadInterval(passengers, 10, method="normal", alpha=0.25)
print(_normal)
"""
dist = e.forecastAheadDistribution(passengers, 20)
print(dist)

View File

@ -69,10 +69,21 @@ from pyFTS.benchmarks import arima, quantreg, Measures
#Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#""" #"""
tmp = arima.ARIMA("", alpha=0.25) arima100 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff) #tmp.appendTransformation(diff)
tmp.train(enrollments, None, order=(1,0,0)) arima100.train(passengers, None, order=(1,0,0))
teste = tmp.forecastInterval(enrollments)
arima101 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima101.train(passengers, None, order=(1,0,1))
arima200 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima200.train(passengers, None, order=(2,0,0))
arima201 = arima.ARIMA("", alpha=0.25)
#tmp.appendTransformation(diff)
arima201.train(passengers, None, order=(2,0,1))
#tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True) #tmp = quantreg.QuantileRegression("", alpha=0.25, dist=True)

12
tree.py
View File

@ -51,3 +51,15 @@ def flat(dados):
yield k yield k
else: else:
yield inst yield inst
def buildTreeWithoutOrder(node, lags, level):
if level not in lags:
return
for s in lags[level]:
node.appendChild(FLRGTreeNode(s))
for child in node.getChildren():
buildTreeWithoutOrder(child, lags, level + 1)