- Optimizations and bugfixes on Multi Seasonal Ensemble

- Bugfixes on ProbabilityDistribution
 - Indexers on Partitioners
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-07-04 16:30:53 -03:00
parent 9861189d50
commit 962ef89bcf
9 changed files with 68 additions and 36 deletions

View File

@ -59,6 +59,9 @@ class EnsembleFTS(fts.FTS):
forecast = int(forecast[-1]) forecast = int(forecast[-1])
elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0:
forecast = np.nan forecast = np.nan
if isinstance(forecast, list):
tmp.extend(forecast)
else:
tmp.append(forecast) tmp.append(forecast)
return tmp return tmp

View File

@ -44,9 +44,13 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
self.has_seasonality = True self.has_seasonality = True
self.has_probability_forecasting = True self.has_probability_forecasting = True
def update_uod(self, data):
self.original_max = max(self.indexer.get_data(data))
self.original_min = min(self.indexer.get_data(data))
def train(self, data, sets, order=1, parameters=None): def train(self, data, sets, order=1, parameters=None):
self.original_max = max(data) self.original_max = max(self.indexer.get_data(data))
self.original_min = min(data) self.original_min = min(self.indexer.get_data(data))
num_cores = multiprocessing.cpu_count() num_cores = multiprocessing.cpu_count()
@ -76,7 +80,9 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
tmp = self.get_models_forecasts(data.ix[k]) tmp = self.get_models_forecasts(data.ix[k])
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max]) tmp = np.ravel(tmp).tolist()
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max], data=tmp)
ret.append(dist) ret.append(dist)

View File

@ -78,8 +78,8 @@ def c_means(k, dados, tam):
class CMeansPartitioner(partitioner.Partitioner): class CMeansPartitioner(partitioner.Partitioner):
def __init__(self, data, npart, func = Membership.trimf, transformation=None): def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
super(CMeansPartitioner, self).__init__("CMeans", data, npart, func=func, transformation=transformation) super(CMeansPartitioner, self).__init__("CMeans", data, npart, func=func, transformation=transformation, indexer=indexer)
def build(self, data): def build(self, data):
sets = [] sets = []

View File

@ -79,8 +79,8 @@ def bestSplit(data, npart):
class EntropyPartitioner(partitioner.Partitioner): class EntropyPartitioner(partitioner.Partitioner):
"""Huarng Entropy Partitioner""" """Huarng Entropy Partitioner"""
def __init__(self, data, npart, func = Membership.trimf, transformation=None): def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
super(EntropyPartitioner, self).__init__("Entropy", data, npart, func=func, transformation=transformation) super(EntropyPartitioner, self).__init__("Entropy", data, npart, func=func, transformation=transformation, indexer=indexer)
def build(self, data): def build(self, data):
sets = [] sets = []

View File

@ -104,8 +104,8 @@ class FCMPartitioner(partitioner.Partitioner):
""" """
""" """
def __init__(self, data,npart,func = Membership.trimf, transformation=None): def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None):
super(FCMPartitioner, self).__init__("FCM", data, npart, func=func, transformation=transformation) super(FCMPartitioner, self).__init__("FCM", data, npart, func=func, transformation=transformation, indexer=indexer)
def build(self,data): def build(self,data):
sets = [] sets = []

View File

@ -8,8 +8,8 @@ from pyFTS.partitioners import partitioner
class GridPartitioner(partitioner.Partitioner): class GridPartitioner(partitioner.Partitioner):
"""Even Length Grid Partitioner""" """Even Length Grid Partitioner"""
def __init__(self, data, npart, func = Membership.trimf, transformation=None): def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation) super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation, indexer=indexer)
def build(self, data): def build(self, data):
sets = [] sets = []

View File

@ -12,8 +12,8 @@ from pyFTS.partitioners import partitioner
class HuarngPartitioner(partitioner.Partitioner): class HuarngPartitioner(partitioner.Partitioner):
"""Huarng Empirical Partitioner""" """Huarng Empirical Partitioner"""
def __init__(self, data,npart,func = Membership.trimf, transformation=None): def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None):
super(HuarngPartitioner, self).__init__("Huarng", data, npart, func=func, transformation=transformation) super(HuarngPartitioner, self).__init__("Huarng", data, npart, func=func, transformation=transformation, indexer=indexer)
def build(self, data): def build(self, data):
diff = Transformations.Differential(1) diff = Transformations.Differential(1)

View File

@ -11,25 +11,20 @@ class ProbabilityDistribution(object):
If type is histogram, the PDF is discrete If type is histogram, the PDF is discrete
If type is KDE the PDF is continuous If type is KDE the PDF is continuous
""" """
def __init__(self,type, **kwargs): def __init__(self,type = "KDE", **kwargs):
self.uod = kwargs.get("uod", None) self.uod = kwargs.get("uod", None)
if type is None:
self.type = "KDE"
self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov"))
else:
self.type = type self.type = type
if self.type == "KDE":
self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov"))
self.description = kwargs.get("description", None)
self.nbins = kwargs.get("num_bins", 100) self.nbins = kwargs.get("num_bins", 100)
if self.type == "histogram":
self.bins = kwargs.get("bins", None) self.bins = kwargs.get("bins", None)
self.labels = kwargs.get("bins_labels", None) self.labels = kwargs.get("bins_labels", None)
if self.bins is None: if self.bins is None:
self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist() self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
@ -37,7 +32,14 @@ class ProbabilityDistribution(object):
self.count = 0 self.count = 0
for k in self.bins: self.distribution[k] = 0 for k in self.bins: self.distribution[k] = 0
self.data = kwargs.get("data",None) self.data = []
data = kwargs.get("data",None)
if data is not None:
self.append(data)
self.name = kwargs.get("name", "")
def append(self, values): def append(self, values):
if self.type == "histogram": if self.type == "histogram":
@ -50,7 +52,7 @@ class ProbabilityDistribution(object):
self.distribution = {} self.distribution = {}
dens = self.density(self.bins) dens = self.density(self.bins)
for v,d in enumerate(dens): for v,d in enumerate(dens):
self.distribution[v] = d self.distribution[self.bins[v]] = d
def density(self, values): def density(self, values):
ret = [] ret = []

View File

@ -20,7 +20,7 @@ from pyFTS.models.seasonal import SeasonalIndexer
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#diff = Transformations.Differential(1) diff = Transformations.Differential(1)
#ix = SeasonalIndexer.LinearSeasonalIndexer([12, 24], [720, 1],[False, False]) #ix = SeasonalIndexer.LinearSeasonalIndexer([12, 24], [720, 1],[False, False])
""" """
@ -72,22 +72,27 @@ sonda = sonda[:][527041:]
sonda.index = np.arange(0,len(sonda.index)) sonda.index = np.arange(0,len(sonda.index))
sonda_treino = sonda[:1051200] sonda_treino = sonda[:1051200]
sonda_teste = sonda[1051201:] sonda_teste = sonda[1051901:1051910]
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15')
fs1 = Grid.GridPartitioner(sonda_treino,50,transformation=diff, indexer=ix_m15)
''' '''
from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models.seasonal import SeasonalIndexer
indexers = [] indexers = []
for i in ["models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", "models/sonda_ix_Mhm15.pkl"]: for i in ["models/sonda_ix_Mhm15.pkl"]: #, "models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", ]:
obj = cUtil.load_obj(i) obj = cUtil.load_obj(i)
indexers.append( obj ) indexers.append( obj )
print(obj) print(obj)
partitioners = [] partitioners = []
transformations = ["", "_diff"] transformations = [""] #, "_diff"]
for max_part in [10, 20, 30, 40, 50, 60]: for max_part in [30, 40, 50, 60, 70, 80, 90]:
for t in transformations: for t in transformations:
obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl") obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl")
partitioners.append( obj ) partitioners.append( obj )
@ -96,17 +101,33 @@ for max_part in [10, 20, 30, 40, 50, 60]:
from pyFTS.ensemble import ensemble, multiseasonal from pyFTS.ensemble import ensemble, multiseasonal
fts = multiseasonal.SeasonalEnsembleFTS("") fts = multiseasonal.SeasonalEnsembleFTS("sonda_msfts_Mhm15")
fts.indexers = indexers fts.indexers = indexers
fts.partitioners = partitioners fts.partitioners = partitioners
fts.indexer = indexers[0]
fts.train(sonda_treino, sets=None) fts.train(sonda_treino, sets=None)
''' '''
ftse = cUtil.load_obj("models/sonda_msfts_ensemble.pkl") #'''
tmp = ftse.forecastDistribution(sonda_teste) #ix = cUtil.load_obj("models/sonda_ix_m15.pkl")
#ftse = cUtil.load_obj("models/msfts_Grid40_diff_Mhm15.pkl")
#ftse.indexer = ix
#ftse.update_uod(sonda_treino)
#tmp = ftse.forecastDistribution(sonda_teste,h=1)
#tmp = ftse.forecast(sonda_teste,h=1)
#tmp[5].plot()
#'''
'''
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
#from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk
@ -299,7 +320,7 @@ diff = Transformations.Differential(1)
fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff) fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff)
'''
tmp = sfts.SeasonalFTS("") tmp = sfts.SeasonalFTS("")
tmp.indexer = ix tmp.indexer = ix
tmp.appendTransformation(diff) tmp.appendTransformation(diff)