- Refactoring: moving Ensemble methods to subpackage ensemble

- Parallel Seasonal Ensemble
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-07-03 18:39:10 -03:00
parent 9bfd931e45
commit 7e98b34b16
7 changed files with 118 additions and 64 deletions

0
ensemble/__init__.py Normal file
View File

View File

@ -12,6 +12,7 @@ from pyFTS.common import Transformations
import scipy.stats as st
from pyFTS import tree
from pyFTS.models import msfts
from pyFTS.probabilistic import ProbabilityDistribution, kde
def sampler(data, quantiles):
ret = []
@ -242,28 +243,5 @@ class AllMethodEnsembleFTS(EnsembleFTS):
self.appendModel(model)
class SeasonalEnsembleFTS(EnsembleFTS):
def __init__(self, name, **kwargs):
super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
self.min_order = 1
self.indexers = []
self.partitioners = []
self.is_multivariate = True
self.has_seasonality = True
self.has_probability_forecasting = True
def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data)
self.original_min = min(data)
for ix in self.indexers:
for pt in self.partitioners:
model = msfts.MultiSeasonalFTS()
model.indexer = ix
model.appendTransformation(pt.transformation)
model.train(data,pt.sets,order=1)
self.appendModel(model)

77
ensemble/multiseasonal.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu, sfts
from pyFTS.benchmarks import arima, quantreg
from pyFTS.common import Transformations, Util as cUtil
import scipy.stats as st
from pyFTS.ensemble import ensemble
from pyFTS.models import msfts
from pyFTS.probabilistic import ProbabilityDistribution, kde
from copy import deepcopy
from joblib import Parallel, delayed
import multiprocessing
def train_individual_model(partitioner, train_data, indexer):
pttr = str(partitioner.__module__).split('.')[-1]
_key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name
model = msfts.MultiSeasonalFTS(_key, indexer=indexer)
model.appendTransformation(partitioner.transformation)
model.train(train_data, partitioner.sets, order=1)
cUtil.persist_obj(model, "models/"+_key+".pkl")
print(_key)
return model
class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
def __init__(self, name, **kwargs):
super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
self.min_order = 1
self.indexers = []
self.partitioners = []
self.is_multivariate = True
self.has_seasonality = True
self.has_probability_forecasting = True
def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data)
self.original_min = min(data)
num_cores = multiprocessing.cpu_count()
pool = {}
count = 0
for ix in self.indexers:
for pt in self.partitioners:
pool[count] = {'ix': ix, 'pt': pt}
results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys())
for tmp in results:
self.appendModel(tmp)
def forecastDistribution(self, data, **kwargs):
ret = []
h = kwargs.get("h",10)
for k in data:
tmp = self.get_models_forecasts(k)
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h)
ret.append(dist)
return ret

View File

@ -7,7 +7,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
"""
Multi-Seasonal Fuzzy Time Series
"""
def __init__(self, order, name, indexer, **kwargs):
def __init__(self, name, indexer, **kwargs):
super(MultiSeasonalFTS, self).__init__("MSFTS")
self.name = "Multi Seasonal FTS"
self.shortname = "MSFTS " + name

View File

@ -6,8 +6,9 @@ class SeasonalIndexer(object):
"""
Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set
"""
def __init__(self,num_seasons):
def __init__(self,num_seasons, **kwargs):
self.num_seasons = num_seasons
self.name = kwargs.get("name","")
def get_season_of_data(self,data):
pass
@ -26,8 +27,8 @@ class SeasonalIndexer(object):
class LinearSeasonalIndexer(SeasonalIndexer):
def __init__(self,seasons,units,ignore=None):
super(LinearSeasonalIndexer, self).__init__(len(seasons))
def __init__(self,seasons,units,ignore=None,**kwargs):
super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs)
self.seasons = seasons
self.units = units
self.ignore = ignore
@ -76,8 +77,8 @@ class LinearSeasonalIndexer(SeasonalIndexer):
class DataFrameSeasonalIndexer(SeasonalIndexer):
def __init__(self,index_fields,index_seasons, data_fields):
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons))
def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs)
self.fields = index_fields
self.seasons = index_seasons
self.data_fields = data_fields
@ -131,8 +132,8 @@ class DateTime(Enum):
class DateTimeSeasonalIndexer(SeasonalIndexer):
def __init__(self,date_field, index_fields, index_seasons, data_fields):
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons))
def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs)
self.fields = index_fields
self.seasons = index_seasons
self.data_fields = data_fields

View File

@ -2,6 +2,7 @@ import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection
from pyFTS.probabilistic import kde
class ProbabilityDistribution(object):
@ -11,14 +12,15 @@ class ProbabilityDistribution(object):
If type is KDE the PDF is continuous
"""
def __init__(self,type, **kwargs):
self.uod = kwargs.get("uod", None)
if type is None:
self.type = "KDE"
self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov"))
else:
self.type = type
self.description = kwargs.get("description", None)
self.uod = kwargs.get("uod", None)
if self.type == "histogram":
self.nbins = kwargs.get("num_bins", None)
self.bins = kwargs.get("bins", None)
@ -45,14 +47,15 @@ class ProbabilityDistribution(object):
self.data.extend(values)
def density(self, values):
if self.type == "histogram":
ret = []
for k in values:
ret = []
for k in values:
if self.type == "histogram":
v = self.index.find_ge(k)
ret.append(self.distribution[v] / self.count)
return ret
else:
pass
else:
v = self.kde.probability(k, self.data)
ret.append(v)
return ret
def cummulative(self, values):

View File

@ -76,38 +76,33 @@ sonda_teste = sonda[1051201:]
from pyFTS.models.seasonal import SeasonalIndexer
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg')
indexers = []
cUtil.persist_obj(ix_m15, "models/sonda_ix_m15.pkl")
for i in ["models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", "models/sonda_ix_Mhm15.pkl"]:
obj = cUtil.load_obj(i)
indexers.append( obj )
print(obj)
partitioners = []
transformations = ["", "_diff"]
for max_part in [10, 20, 30, 40, 50, 60]:
for t in transformations:
obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl")
partitioners.append( obj )
print(obj)
ix_Mh = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,SeasonalIndexer.DateTime.hour],
[None, None],'glo_avg')
from pyFTS import ensemble
cUtil.persist_obj(ix_Mh, "models/sonda_ix_Mh.pkl")
fts = ensemble.SeasonalEnsembleFTS("")
ix_Mhm15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,
SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute],
[None, None,15],'glo_avg')
fts.indexers = indexers
fts.partitioners = partitioners
cUtil.persist_obj(ix_Mhm15, "models/sonda_ix_Mhm15.pkl")
tmp = ix_Mh.get_data(sonda_treino)
for max_part in [10, 20, 30, 40, 50]:
fs1 = Grid.GridPartitionerTrimf(tmp,max_part)
cUtil.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl")
fs2 = FCM.FCMPartitionerTrimf(tmp, max_part)
cUtil.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl")
fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part)
cUtil.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl")
fts.train(sonda_treino, sets=None)
cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl")
from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk