- Optimizations and bugfixes on Multi Seasonal Ensemble
- Several Bugfixes - KDE on ProbabilityDistribution
This commit is contained in:
parent
7e98b34b16
commit
9861189d50
@ -57,6 +57,9 @@ class Differential(Transformation):
|
|||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
data = [data]
|
data = [data]
|
||||||
|
|
||||||
|
if not isinstance(param, list):
|
||||||
|
param = [param]
|
||||||
|
|
||||||
n = len(data)
|
n = len(data)
|
||||||
|
|
||||||
if not interval:
|
if not interval:
|
||||||
|
@ -50,6 +50,9 @@ class EnsembleFTS(fts.FTS):
|
|||||||
def get_models_forecasts(self,data):
|
def get_models_forecasts(self,data):
|
||||||
tmp = []
|
tmp = []
|
||||||
for model in self.models:
|
for model in self.models:
|
||||||
|
if self.is_multivariate or self.has_seasonality:
|
||||||
|
forecast = model.forecast(data)
|
||||||
|
else:
|
||||||
sample = data[-model.order:]
|
sample = data[-model.order:]
|
||||||
forecast = model.forecast(sample)
|
forecast = model.forecast(sample)
|
||||||
if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
|
if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
|
||||||
|
@ -20,7 +20,10 @@ import multiprocessing
|
|||||||
|
|
||||||
def train_individual_model(partitioner, train_data, indexer):
|
def train_individual_model(partitioner, train_data, indexer):
|
||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
_key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name
|
diff = "_diff" if partitioner.transformation is not None else ""
|
||||||
|
_key = "msfts_" + pttr + str(partitioner.partitions) + diff + "_" + indexer.name
|
||||||
|
|
||||||
|
print(_key)
|
||||||
|
|
||||||
model = msfts.MultiSeasonalFTS(_key, indexer=indexer)
|
model = msfts.MultiSeasonalFTS(_key, indexer=indexer)
|
||||||
model.appendTransformation(partitioner.transformation)
|
model.appendTransformation(partitioner.transformation)
|
||||||
@ -28,8 +31,6 @@ def train_individual_model(partitioner, train_data, indexer):
|
|||||||
|
|
||||||
cUtil.persist_obj(model, "models/"+_key+".pkl")
|
cUtil.persist_obj(model, "models/"+_key+".pkl")
|
||||||
|
|
||||||
print(_key)
|
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
@ -54,23 +55,28 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
|||||||
for ix in self.indexers:
|
for ix in self.indexers:
|
||||||
for pt in self.partitioners:
|
for pt in self.partitioners:
|
||||||
pool[count] = {'ix': ix, 'pt': pt}
|
pool[count] = {'ix': ix, 'pt': pt}
|
||||||
|
count += 1
|
||||||
|
|
||||||
results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys())
|
results = Parallel(n_jobs=num_cores)(
|
||||||
|
delayed(train_individual_model)(deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix']))
|
||||||
|
for m in pool.keys())
|
||||||
|
|
||||||
for tmp in results:
|
for tmp in results:
|
||||||
self.appendModel(tmp)
|
self.appendModel(tmp)
|
||||||
|
|
||||||
|
cUtil.persist_obj(self, "models/"+self.name+".pkl")
|
||||||
|
|
||||||
def forecastDistribution(self, data, **kwargs):
|
def forecastDistribution(self, data, **kwargs):
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
h = kwargs.get("h",10)
|
h = kwargs.get("h",10)
|
||||||
|
|
||||||
for k in data:
|
for k in data.index:
|
||||||
|
|
||||||
tmp = self.get_models_forecasts(k)
|
tmp = self.get_models_forecasts(data.ix[k])
|
||||||
|
|
||||||
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h)
|
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max])
|
||||||
|
|
||||||
ret.append(dist)
|
ret.append(dist)
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
|
|||||||
index = self.indexer.get_season_of_data(data)
|
index = self.indexer.get_season_of_data(data)
|
||||||
ndata = self.indexer.get_data(data)
|
ndata = self.indexer.get_data(data)
|
||||||
|
|
||||||
for k in np.arange(1, len(data)):
|
for k in np.arange(0, len(index)):
|
||||||
|
|
||||||
flrg = self.flrgs[str(index[k])]
|
flrg = self.flrgs[str(index[k])]
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
|
|||||||
|
|
||||||
ret.append(sum(mp) / len(mp))
|
ret.append(sum(mp) / len(mp))
|
||||||
|
|
||||||
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]])
|
ret = self.doInverseTransformations(ret, params=[ndata])
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
@ -27,8 +28,8 @@ class SeasonalIndexer(object):
|
|||||||
|
|
||||||
|
|
||||||
class LinearSeasonalIndexer(SeasonalIndexer):
|
class LinearSeasonalIndexer(SeasonalIndexer):
|
||||||
def __init__(self,seasons,units,ignore=None,**kwargs):
|
def __init__(self,seasons,units,ignore=None, **kwargs):
|
||||||
super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs)
|
super(LinearSeasonalIndexer, self).__init__(len(seasons), **kwargs)
|
||||||
self.seasons = seasons
|
self.seasons = seasons
|
||||||
self.units = units
|
self.units = units
|
||||||
self.ignore = ignore
|
self.ignore = ignore
|
||||||
@ -78,7 +79,7 @@ class LinearSeasonalIndexer(SeasonalIndexer):
|
|||||||
|
|
||||||
class DataFrameSeasonalIndexer(SeasonalIndexer):
|
class DataFrameSeasonalIndexer(SeasonalIndexer):
|
||||||
def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
|
def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
|
||||||
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs)
|
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
|
||||||
self.fields = index_fields
|
self.fields = index_fields
|
||||||
self.seasons = index_seasons
|
self.seasons = index_seasons
|
||||||
self.data_fields = data_fields
|
self.data_fields = data_fields
|
||||||
@ -133,7 +134,7 @@ class DateTime(Enum):
|
|||||||
|
|
||||||
class DateTimeSeasonalIndexer(SeasonalIndexer):
|
class DateTimeSeasonalIndexer(SeasonalIndexer):
|
||||||
def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
|
def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
|
||||||
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs)
|
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
|
||||||
self.fields = index_fields
|
self.fields = index_fields
|
||||||
self.seasons = index_seasons
|
self.seasons = index_seasons
|
||||||
self.data_fields = data_fields
|
self.data_fields = data_fields
|
||||||
@ -163,14 +164,24 @@ class DateTimeSeasonalIndexer(SeasonalIndexer):
|
|||||||
return tmp // resolution
|
return tmp // resolution
|
||||||
|
|
||||||
def get_season_of_data(self, data):
|
def get_season_of_data(self, data):
|
||||||
# data = data.copy()
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
|
if isinstance(data, pd.DataFrame):
|
||||||
for ix in data.index:
|
for ix in data.index:
|
||||||
date = data[self.date_field][ix]
|
date = data[self.date_field][ix]
|
||||||
season = []
|
season = []
|
||||||
for c, f in enumerate(self.fields, start=0):
|
for c, f in enumerate(self.fields, start=0):
|
||||||
season.append( self.strip_datepart(date, f, self.seasons[c]) )
|
season.append(self.strip_datepart(date, f, self.seasons[c]))
|
||||||
ret.append(season)
|
ret.append(season)
|
||||||
|
|
||||||
|
elif isinstance(data, pd.Series):
|
||||||
|
date = data[self.date_field]
|
||||||
|
season = []
|
||||||
|
for c, f in enumerate(self.fields, start=0):
|
||||||
|
season.append(self.strip_datepart(date, f, self.seasons[c]))
|
||||||
|
ret.append(season)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_season_by_index(self, index):
|
def get_season_by_index(self, index):
|
||||||
|
@ -16,13 +16,15 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
if type is None:
|
if type is None:
|
||||||
self.type = "KDE"
|
self.type = "KDE"
|
||||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov"))
|
self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov"))
|
||||||
else:
|
else:
|
||||||
self.type = type
|
self.type = type
|
||||||
|
|
||||||
self.description = kwargs.get("description", None)
|
self.description = kwargs.get("description", None)
|
||||||
|
self.nbins = kwargs.get("num_bins", 100)
|
||||||
|
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
self.nbins = kwargs.get("num_bins", None)
|
|
||||||
self.bins = kwargs.get("bins", None)
|
self.bins = kwargs.get("bins", None)
|
||||||
self.labels = kwargs.get("bins_labels", None)
|
self.labels = kwargs.get("bins_labels", None)
|
||||||
|
|
||||||
@ -45,6 +47,10 @@ class ProbabilityDistribution(object):
|
|||||||
self.count += 1
|
self.count += 1
|
||||||
else:
|
else:
|
||||||
self.data.extend(values)
|
self.data.extend(values)
|
||||||
|
self.distribution = {}
|
||||||
|
dens = self.density(self.bins)
|
||||||
|
for v,d in enumerate(dens):
|
||||||
|
self.distribution[v] = d
|
||||||
|
|
||||||
def density(self, values):
|
def density(self, values):
|
||||||
ret = []
|
ret = []
|
||||||
@ -111,7 +117,10 @@ class ProbabilityDistribution(object):
|
|||||||
fig = plt.figure(figsize=tam)
|
fig = plt.figure(figsize=tam)
|
||||||
axis = fig.add_subplot(111)
|
axis = fig.add_subplot(111)
|
||||||
|
|
||||||
|
if self.type == "histogram":
|
||||||
ys = [self.distribution[k]/self.count for k in self.bins]
|
ys = [self.distribution[k]/self.count for k in self.bins]
|
||||||
|
else:
|
||||||
|
ys = [self.distribution[k] for k in self.bins]
|
||||||
|
|
||||||
axis.plot(self.bins, ys,c=color, label=self.name)
|
axis.plot(self.bins, ys,c=color, label=self.name)
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ sonda.index = np.arange(0,len(sonda.index))
|
|||||||
sonda_treino = sonda[:1051200]
|
sonda_treino = sonda[:1051200]
|
||||||
sonda_teste = sonda[1051201:]
|
sonda_teste = sonda[1051201:]
|
||||||
|
|
||||||
|
'''
|
||||||
from pyFTS.models.seasonal import SeasonalIndexer
|
from pyFTS.models.seasonal import SeasonalIndexer
|
||||||
|
|
||||||
indexers = []
|
indexers = []
|
||||||
@ -93,16 +94,18 @@ for max_part in [10, 20, 30, 40, 50, 60]:
|
|||||||
print(obj)
|
print(obj)
|
||||||
|
|
||||||
|
|
||||||
from pyFTS import ensemble
|
from pyFTS.ensemble import ensemble, multiseasonal
|
||||||
|
|
||||||
fts = ensemble.SeasonalEnsembleFTS("")
|
fts = multiseasonal.SeasonalEnsembleFTS("")
|
||||||
|
|
||||||
fts.indexers = indexers
|
fts.indexers = indexers
|
||||||
fts.partitioners = partitioners
|
fts.partitioners = partitioners
|
||||||
|
|
||||||
fts.train(sonda_treino, sets=None)
|
fts.train(sonda_treino, sets=None)
|
||||||
|
'''
|
||||||
|
ftse = cUtil.load_obj("models/sonda_msfts_ensemble.pkl")
|
||||||
|
|
||||||
cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl")
|
tmp = ftse.forecastDistribution(sonda_teste)
|
||||||
|
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk
|
from pyFTS.benchmarks import benchmarks as bchmk
|
||||||
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||||
|
Loading…
Reference in New Issue
Block a user