- Refactoring: moving Ensemble methods to subpackage ensemble

- Parallel Seasonal Ensemble
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-07-03 18:39:10 -03:00
parent 9bfd931e45
commit 7e98b34b16
7 changed files with 118 additions and 64 deletions

0
ensemble/__init__.py Normal file
View File

View File

@ -12,6 +12,7 @@ from pyFTS.common import Transformations
import scipy.stats as st import scipy.stats as st
from pyFTS import tree from pyFTS import tree
from pyFTS.models import msfts from pyFTS.models import msfts
from pyFTS.probabilistic import ProbabilityDistribution, kde
def sampler(data, quantiles): def sampler(data, quantiles):
ret = [] ret = []
@ -242,28 +243,5 @@ class AllMethodEnsembleFTS(EnsembleFTS):
self.appendModel(model) self.appendModel(model)
class SeasonalEnsembleFTS(EnsembleFTS):
def __init__(self, name, **kwargs):
super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
self.min_order = 1
self.indexers = []
self.partitioners = []
self.is_multivariate = True
self.has_seasonality = True
self.has_probability_forecasting = True
def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data)
self.original_min = min(data)
for ix in self.indexers:
for pt in self.partitioners:
model = msfts.MultiSeasonalFTS()
model.indexer = ix
model.appendTransformation(pt.transformation)
model.train(data,pt.sets,order=1)
self.appendModel(model)

77
ensemble/multiseasonal.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu, sfts
from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations, Util as cUtil
import scipy.stats as st
from pyFTS.ensemble import ensemble
from pyFTS.models import msfts
from pyFTS.probabilistic import ProbabilityDistribution, kde
from copy import deepcopy
from joblib import Parallel, delayed
import multiprocessing
def train_individual_model(partitioner, train_data, indexer):
pttr = str(partitioner.__module__).split('.')[-1]
_key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name
model = msfts.MultiSeasonalFTS(_key, indexer=indexer)
model.appendTransformation(partitioner.transformation)
model.train(train_data, partitioner.sets, order=1)
cUtil.persist_obj(model, "models/"+_key+".pkl")
print(_key)
return model
class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
def __init__(self, name, **kwargs):
super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
self.min_order = 1
self.indexers = []
self.partitioners = []
self.is_multivariate = True
self.has_seasonality = True
self.has_probability_forecasting = True
def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data)
self.original_min = min(data)
num_cores = multiprocessing.cpu_count()
pool = {}
count = 0
for ix in self.indexers:
for pt in self.partitioners:
pool[count] = {'ix': ix, 'pt': pt}
results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys())
for tmp in results:
self.appendModel(tmp)
def forecastDistribution(self, data, **kwargs):
ret = []
h = kwargs.get("h",10)
for k in data:
tmp = self.get_models_forecasts(k)
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h)
ret.append(dist)
return ret

View File

@ -7,7 +7,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
""" """
Multi-Seasonal Fuzzy Time Series Multi-Seasonal Fuzzy Time Series
""" """
def __init__(self, order, name, indexer, **kwargs): def __init__(self, name, indexer, **kwargs):
super(MultiSeasonalFTS, self).__init__("MSFTS") super(MultiSeasonalFTS, self).__init__("MSFTS")
self.name = "Multi Seasonal FTS" self.name = "Multi Seasonal FTS"
self.shortname = "MSFTS " + name self.shortname = "MSFTS " + name

View File

@ -6,8 +6,9 @@ class SeasonalIndexer(object):
""" """
Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set
""" """
def __init__(self,num_seasons): def __init__(self,num_seasons, **kwargs):
self.num_seasons = num_seasons self.num_seasons = num_seasons
self.name = kwargs.get("name","")
def get_season_of_data(self,data): def get_season_of_data(self,data):
pass pass
@ -26,8 +27,8 @@ class SeasonalIndexer(object):
class LinearSeasonalIndexer(SeasonalIndexer): class LinearSeasonalIndexer(SeasonalIndexer):
def __init__(self,seasons,units,ignore=None): def __init__(self,seasons,units,ignore=None,**kwargs):
super(LinearSeasonalIndexer, self).__init__(len(seasons)) super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs)
self.seasons = seasons self.seasons = seasons
self.units = units self.units = units
self.ignore = ignore self.ignore = ignore
@ -76,8 +77,8 @@ class LinearSeasonalIndexer(SeasonalIndexer):
class DataFrameSeasonalIndexer(SeasonalIndexer): class DataFrameSeasonalIndexer(SeasonalIndexer):
def __init__(self,index_fields,index_seasons, data_fields): def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons)) super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs)
self.fields = index_fields self.fields = index_fields
self.seasons = index_seasons self.seasons = index_seasons
self.data_fields = data_fields self.data_fields = data_fields
@ -131,8 +132,8 @@ class DateTime(Enum):
class DateTimeSeasonalIndexer(SeasonalIndexer): class DateTimeSeasonalIndexer(SeasonalIndexer):
def __init__(self,date_field, index_fields, index_seasons, data_fields): def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons)) super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs)
self.fields = index_fields self.fields = index_fields
self.seasons = index_seasons self.seasons = index_seasons
self.data_fields = data_fields self.data_fields = data_fields

View File

@ -2,6 +2,7 @@ import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection from pyFTS.common import FuzzySet,SortedCollection
from pyFTS.probabilistic import kde
class ProbabilityDistribution(object): class ProbabilityDistribution(object):
@ -11,14 +12,15 @@ class ProbabilityDistribution(object):
If type is KDE the PDF is continuous If type is KDE the PDF is continuous
""" """
def __init__(self,type, **kwargs): def __init__(self,type, **kwargs):
self.uod = kwargs.get("uod", None)
if type is None: if type is None:
self.type = "KDE" self.type = "KDE"
self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov"))
else: else:
self.type = type self.type = type
self.description = kwargs.get("description", None) self.description = kwargs.get("description", None)
self.uod = kwargs.get("uod", None)
if self.type == "histogram": if self.type == "histogram":
self.nbins = kwargs.get("num_bins", None) self.nbins = kwargs.get("num_bins", None)
self.bins = kwargs.get("bins", None) self.bins = kwargs.get("bins", None)
@ -45,14 +47,15 @@ class ProbabilityDistribution(object):
self.data.extend(values) self.data.extend(values)
def density(self, values): def density(self, values):
if self.type == "histogram": ret = []
ret = [] for k in values:
for k in values: if self.type == "histogram":
v = self.index.find_ge(k) v = self.index.find_ge(k)
ret.append(self.distribution[v] / self.count) ret.append(self.distribution[v] / self.count)
return ret else:
else: v = self.kde.probability(k, self.data)
pass ret.append(v)
return ret
def cummulative(self, values): def cummulative(self, values):

View File

@ -76,38 +76,33 @@ sonda_teste = sonda[1051201:]
from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models.seasonal import SeasonalIndexer
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg') indexers = []
cUtil.persist_obj(ix_m15, "models/sonda_ix_m15.pkl") for i in ["models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", "models/sonda_ix_Mhm15.pkl"]:
obj = cUtil.load_obj(i)
indexers.append( obj )
print(obj)
partitioners = []
transformations = ["", "_diff"]
for max_part in [10, 20, 30, 40, 50, 60]:
for t in transformations:
obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl")
partitioners.append( obj )
print(obj)
ix_Mh = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,SeasonalIndexer.DateTime.hour], from pyFTS import ensemble
[None, None],'glo_avg')
cUtil.persist_obj(ix_Mh, "models/sonda_ix_Mh.pkl") fts = ensemble.SeasonalEnsembleFTS("")
ix_Mhm15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month, fts.indexers = indexers
SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute], fts.partitioners = partitioners
[None, None,15],'glo_avg')
cUtil.persist_obj(ix_Mhm15, "models/sonda_ix_Mhm15.pkl") fts.train(sonda_treino, sets=None)
tmp = ix_Mh.get_data(sonda_treino)
for max_part in [10, 20, 30, 40, 50]:
fs1 = Grid.GridPartitionerTrimf(tmp,max_part)
cUtil.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl")
fs2 = FCM.FCMPartitionerTrimf(tmp, max_part)
cUtil.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl")
fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part)
cUtil.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl")
cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl")
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk