- Optimizations and bugfixes on Multi Seasonal Ensemble

- Several Bugfixes
 - KDE on ProbabilityDistribution
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-07-04 12:18:07 -03:00
parent 7e98b34b16
commit 9861189d50
7 changed files with 71 additions and 36 deletions

View File

@ -57,6 +57,9 @@ class Differential(Transformation):
if not isinstance(data, list): if not isinstance(data, list):
data = [data] data = [data]
if not isinstance(param, list):
param = [param]
n = len(data) n = len(data)
if not interval: if not interval:

View File

@ -50,12 +50,15 @@ class EnsembleFTS(fts.FTS):
def get_models_forecasts(self,data): def get_models_forecasts(self,data):
tmp = [] tmp = []
for model in self.models: for model in self.models:
sample = data[-model.order:] if self.is_multivariate or self.has_seasonality:
forecast = model.forecast(sample) forecast = model.forecast(data)
if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0: else:
forecast = int(forecast[-1]) sample = data[-model.order:]
elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: forecast = model.forecast(sample)
forecast = np.nan if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
forecast = int(forecast[-1])
elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0:
forecast = np.nan
tmp.append(forecast) tmp.append(forecast)
return tmp return tmp

View File

@ -20,7 +20,10 @@ import multiprocessing
def train_individual_model(partitioner, train_data, indexer): def train_individual_model(partitioner, train_data, indexer):
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name diff = "_diff" if partitioner.transformation is not None else ""
_key = "msfts_" + pttr + str(partitioner.partitions) + diff + "_" + indexer.name
print(_key)
model = msfts.MultiSeasonalFTS(_key, indexer=indexer) model = msfts.MultiSeasonalFTS(_key, indexer=indexer)
model.appendTransformation(partitioner.transformation) model.appendTransformation(partitioner.transformation)
@ -28,8 +31,6 @@ def train_individual_model(partitioner, train_data, indexer):
cUtil.persist_obj(model, "models/"+_key+".pkl") cUtil.persist_obj(model, "models/"+_key+".pkl")
print(_key)
return model return model
@ -54,23 +55,28 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
for ix in self.indexers: for ix in self.indexers:
for pt in self.partitioners: for pt in self.partitioners:
pool[count] = {'ix': ix, 'pt': pt} pool[count] = {'ix': ix, 'pt': pt}
count += 1
results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys()) results = Parallel(n_jobs=num_cores)(
delayed(train_individual_model)(deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix']))
for m in pool.keys())
for tmp in results: for tmp in results:
self.appendModel(tmp) self.appendModel(tmp)
cUtil.persist_obj(self, "models/"+self.name+".pkl")
def forecastDistribution(self, data, **kwargs): def forecastDistribution(self, data, **kwargs):
ret = [] ret = []
h = kwargs.get("h",10) h = kwargs.get("h",10)
for k in data: for k in data.index:
tmp = self.get_models_forecasts(k) tmp = self.get_models_forecasts(data.ix[k])
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h) dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max])
ret.append(dist) ret.append(dist)

View File

@ -46,7 +46,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
index = self.indexer.get_season_of_data(data) index = self.indexer.get_season_of_data(data)
ndata = self.indexer.get_data(data) ndata = self.indexer.get_data(data)
for k in np.arange(1, len(data)): for k in np.arange(0, len(index)):
flrg = self.flrgs[str(index[k])] flrg = self.flrgs[str(index[k])]
@ -54,7 +54,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
ret.append(sum(mp) / len(mp)) ret.append(sum(mp) / len(mp))
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]]) ret = self.doInverseTransformations(ret, params=[ndata])
return ret return ret

View File

@ -1,4 +1,5 @@
import numpy as np import numpy as np
import pandas as pd
from enum import Enum from enum import Enum
@ -27,8 +28,8 @@ class SeasonalIndexer(object):
class LinearSeasonalIndexer(SeasonalIndexer): class LinearSeasonalIndexer(SeasonalIndexer):
def __init__(self,seasons,units,ignore=None,**kwargs): def __init__(self,seasons,units,ignore=None, **kwargs):
super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs) super(LinearSeasonalIndexer, self).__init__(len(seasons), **kwargs)
self.seasons = seasons self.seasons = seasons
self.units = units self.units = units
self.ignore = ignore self.ignore = ignore
@ -78,7 +79,7 @@ class LinearSeasonalIndexer(SeasonalIndexer):
class DataFrameSeasonalIndexer(SeasonalIndexer): class DataFrameSeasonalIndexer(SeasonalIndexer):
def __init__(self,index_fields,index_seasons, data_fields,**kwargs): def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs) super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
self.fields = index_fields self.fields = index_fields
self.seasons = index_seasons self.seasons = index_seasons
self.data_fields = data_fields self.data_fields = data_fields
@ -133,7 +134,7 @@ class DateTime(Enum):
class DateTimeSeasonalIndexer(SeasonalIndexer): class DateTimeSeasonalIndexer(SeasonalIndexer):
def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs): def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs) super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
self.fields = index_fields self.fields = index_fields
self.seasons = index_seasons self.seasons = index_seasons
self.data_fields = data_fields self.data_fields = data_fields
@ -163,14 +164,24 @@ class DateTimeSeasonalIndexer(SeasonalIndexer):
return tmp // resolution return tmp // resolution
def get_season_of_data(self, data): def get_season_of_data(self, data):
# data = data.copy()
ret = [] ret = []
for ix in data.index:
date = data[self.date_field][ix] if isinstance(data, pd.DataFrame):
for ix in data.index:
date = data[self.date_field][ix]
season = []
for c, f in enumerate(self.fields, start=0):
season.append(self.strip_datepart(date, f, self.seasons[c]))
ret.append(season)
elif isinstance(data, pd.Series):
date = data[self.date_field]
season = [] season = []
for c, f in enumerate(self.fields, start=0): for c, f in enumerate(self.fields, start=0):
season.append( self.strip_datepart(date, f, self.seasons[c]) ) season.append(self.strip_datepart(date, f, self.seasons[c]))
ret.append(season) ret.append(season)
return ret return ret
def get_season_by_index(self, index): def get_season_by_index(self, index):

View File

@ -16,24 +16,26 @@ class ProbabilityDistribution(object):
if type is None: if type is None:
self.type = "KDE" self.type = "KDE"
self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov")) self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov"))
else: else:
self.type = type self.type = type
self.description = kwargs.get("description", None) self.description = kwargs.get("description", None)
self.nbins = kwargs.get("num_bins", 100)
if self.type == "histogram": if self.type == "histogram":
self.nbins = kwargs.get("num_bins", None)
self.bins = kwargs.get("bins", None) self.bins = kwargs.get("bins", None)
self.labels = kwargs.get("bins_labels", None) self.labels = kwargs.get("bins_labels", None)
if self.bins is None: if self.bins is None:
self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist() self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist()
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.distribution = {} self.distribution = {}
self.count = 0 self.count = 0
for k in self.bins: self.distribution[k] = 0 for k in self.bins: self.distribution[k] = 0
self.data = kwargs.get("data",None) self.data = kwargs.get("data",None)
@ -45,6 +47,10 @@ class ProbabilityDistribution(object):
self.count += 1 self.count += 1
else: else:
self.data.extend(values) self.data.extend(values)
self.distribution = {}
dens = self.density(self.bins)
for v,d in enumerate(dens):
self.distribution[v] = d
def density(self, values): def density(self, values):
ret = [] ret = []
@ -111,7 +117,10 @@ class ProbabilityDistribution(object):
fig = plt.figure(figsize=tam) fig = plt.figure(figsize=tam)
axis = fig.add_subplot(111) axis = fig.add_subplot(111)
ys = [self.distribution[k]/self.count for k in self.bins] if self.type == "histogram":
ys = [self.distribution[k]/self.count for k in self.bins]
else:
ys = [self.distribution[k] for k in self.bins]
axis.plot(self.bins, ys,c=color, label=self.name) axis.plot(self.bins, ys,c=color, label=self.name)

View File

@ -74,6 +74,7 @@ sonda.index = np.arange(0,len(sonda.index))
sonda_treino = sonda[:1051200] sonda_treino = sonda[:1051200]
sonda_teste = sonda[1051201:] sonda_teste = sonda[1051201:]
'''
from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models.seasonal import SeasonalIndexer
indexers = [] indexers = []
@ -93,16 +94,18 @@ for max_part in [10, 20, 30, 40, 50, 60]:
print(obj) print(obj)
from pyFTS import ensemble from pyFTS.ensemble import ensemble, multiseasonal
fts = ensemble.SeasonalEnsembleFTS("") fts = multiseasonal.SeasonalEnsembleFTS("")
fts.indexers = indexers fts.indexers = indexers
fts.partitioners = partitioners fts.partitioners = partitioners
fts.train(sonda_treino, sets=None) fts.train(sonda_treino, sets=None)
'''
ftse = cUtil.load_obj("models/sonda_msfts_ensemble.pkl")
cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl") tmp = ftse.forecastDistribution(sonda_teste)
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk