From 962ef89bcff020e77d5e2283412a5bca6cd51db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Tue, 4 Jul 2017 16:30:53 -0300 Subject: [PATCH] - Optimizations and bugfixes on Multi Seasonal Ensemble - Bugfixes on ProbabilityDistribution - Indexers on Partitioners --- ensemble/ensemble.py | 5 ++- ensemble/multiseasonal.py | 12 ++++++-- partitioners/CMeans.py | 4 +-- partitioners/Entropy.py | 4 +-- partitioners/FCM.py | 4 +-- partitioners/Grid.py | 4 +-- partitioners/Huarng.py | 4 +-- probabilistic/ProbabilityDistribution.py | 28 +++++++++-------- tests/general.py | 39 ++++++++++++++++++------ 9 files changed, 68 insertions(+), 36 deletions(-) diff --git a/ensemble/ensemble.py b/ensemble/ensemble.py index c02aa1e..5d3c494 100644 --- a/ensemble/ensemble.py +++ b/ensemble/ensemble.py @@ -59,7 +59,10 @@ class EnsembleFTS(fts.FTS): forecast = int(forecast[-1]) elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: forecast = np.nan - tmp.append(forecast) + if isinstance(forecast, list): + tmp.extend(forecast) + else: + tmp.append(forecast) return tmp def get_point(self,forecasts, **kwargs): diff --git a/ensemble/multiseasonal.py b/ensemble/multiseasonal.py index e5f5f0f..816b90f 100644 --- a/ensemble/multiseasonal.py +++ b/ensemble/multiseasonal.py @@ -44,9 +44,13 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): self.has_seasonality = True self.has_probability_forecasting = True + def update_uod(self, data): + self.original_max = max(self.indexer.get_data(data)) + self.original_min = min(self.indexer.get_data(data)) + def train(self, data, sets, order=1, parameters=None): - self.original_max = max(data) - self.original_min = min(data) + self.original_max = max(self.indexer.get_data(data)) + self.original_min = min(self.indexer.get_data(data)) num_cores = multiprocessing.cpu_count() @@ -76,7 +80,9 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): tmp = self.get_models_forecasts(data.ix[k]) - dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max]) + tmp = np.ravel(tmp).tolist() + + dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max], data=tmp) ret.append(dist) diff --git a/partitioners/CMeans.py b/partitioners/CMeans.py index 72030c3..caba7a3 100644 --- a/partitioners/CMeans.py +++ b/partitioners/CMeans.py @@ -78,8 +78,8 @@ def c_means(k, dados, tam): class CMeansPartitioner(partitioner.Partitioner): - def __init__(self, data, npart, func = Membership.trimf, transformation=None): - super(CMeansPartitioner, self).__init__("CMeans", data, npart, func=func, transformation=transformation) + def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None): + super(CMeansPartitioner, self).__init__("CMeans", data, npart, func=func, transformation=transformation, indexer=indexer) def build(self, data): sets = [] diff --git a/partitioners/Entropy.py b/partitioners/Entropy.py index fd3048a..035a35c 100644 --- a/partitioners/Entropy.py +++ b/partitioners/Entropy.py @@ -79,8 +79,8 @@ def bestSplit(data, npart): class EntropyPartitioner(partitioner.Partitioner): """Huarng Entropy Partitioner""" - def __init__(self, data, npart, func = Membership.trimf, transformation=None): - super(EntropyPartitioner, self).__init__("Entropy", data, npart, func=func, transformation=transformation) + def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None): + super(EntropyPartitioner, self).__init__("Entropy", data, npart, func=func, transformation=transformation, indexer=indexer) def build(self, data): sets = [] diff --git a/partitioners/FCM.py b/partitioners/FCM.py index d4a09d8..26c0648 100644 --- a/partitioners/FCM.py +++ b/partitioners/FCM.py @@ -104,8 +104,8 @@ class FCMPartitioner(partitioner.Partitioner): """ """ - def __init__(self, data,npart,func = Membership.trimf, transformation=None): - super(FCMPartitioner, self).__init__("FCM", data, npart, func=func, transformation=transformation) + def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None): + super(FCMPartitioner, self).__init__("FCM", data, npart, func=func, transformation=transformation, indexer=indexer) def build(self,data): sets = [] diff --git a/partitioners/Grid.py b/partitioners/Grid.py index 47ff125..1776ae0 100644 --- a/partitioners/Grid.py +++ b/partitioners/Grid.py @@ -8,8 +8,8 @@ from pyFTS.partitioners import partitioner class GridPartitioner(partitioner.Partitioner): """Even Length Grid Partitioner""" - def __init__(self, data, npart, func = Membership.trimf, transformation=None): - super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation) + def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None): + super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation, indexer=indexer) def build(self, data): sets = [] diff --git a/partitioners/Huarng.py b/partitioners/Huarng.py index 3b91a8c..ba56dee 100644 --- a/partitioners/Huarng.py +++ b/partitioners/Huarng.py @@ -12,8 +12,8 @@ from pyFTS.partitioners import partitioner class HuarngPartitioner(partitioner.Partitioner): """Huarng Empirical Partitioner""" - def __init__(self, data,npart,func = Membership.trimf, transformation=None): - super(HuarngPartitioner, self).__init__("Huarng", data, npart, func=func, transformation=transformation) + def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None): + super(HuarngPartitioner, self).__init__("Huarng", data, npart, func=func, transformation=transformation, indexer=indexer) def build(self, data): diff = Transformations.Differential(1) diff --git a/probabilistic/ProbabilityDistribution.py b/probabilistic/ProbabilityDistribution.py index 2dbeb42..5087bc0 100644 --- a/probabilistic/ProbabilityDistribution.py +++ b/probabilistic/ProbabilityDistribution.py @@ -11,25 +11,20 @@ class ProbabilityDistribution(object): If type is histogram, the PDF is discrete If type is KDE the PDF is continuous """ - def __init__(self,type, **kwargs): + def __init__(self,type = "KDE", **kwargs): self.uod = kwargs.get("uod", None) - if type is None: - self.type = "KDE" + self.type = type + if self.type == "KDE": self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov")) - else: - self.type = type - self.description = kwargs.get("description", None) self.nbins = kwargs.get("num_bins", 100) - if self.type == "histogram": - - self.bins = kwargs.get("bins", None) - self.labels = kwargs.get("bins_labels", None) + self.bins = kwargs.get("bins", None) + self.labels = kwargs.get("bins_labels", None) if self.bins is None: - self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist() + self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist() self.labels = [str(k) for k in self.bins] self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) @@ -37,7 +32,14 @@ class ProbabilityDistribution(object): self.count = 0 for k in self.bins: self.distribution[k] = 0 - self.data = kwargs.get("data",None) + self.data = [] + + data = kwargs.get("data",None) + + if data is not None: + self.append(data) + + self.name = kwargs.get("name", "") def append(self, values): if self.type == "histogram": @@ -50,7 +52,7 @@ class ProbabilityDistribution(object): self.distribution = {} dens = self.density(self.bins) for v,d in enumerate(dens): - self.distribution[v] = d + self.distribution[self.bins[v]] = d def density(self, values): ret = [] diff --git a/tests/general.py b/tests/general.py index d94fe1e..f162c04 100644 --- a/tests/general.py +++ b/tests/general.py @@ -20,7 +20,7 @@ from pyFTS.models.seasonal import SeasonalIndexer os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") -#diff = Transformations.Differential(1) +diff = Transformations.Differential(1) #ix = SeasonalIndexer.LinearSeasonalIndexer([12, 24], [720, 1],[False, False]) """ @@ -72,22 +72,27 @@ sonda = sonda[:][527041:] sonda.index = np.arange(0,len(sonda.index)) sonda_treino = sonda[:1051200] -sonda_teste = sonda[1051201:] +sonda_teste = sonda[1051901:1051910] + +ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15') + +fs1 = Grid.GridPartitioner(sonda_treino,50,transformation=diff, indexer=ix_m15) + ''' from pyFTS.models.seasonal import SeasonalIndexer indexers = [] -for i in ["models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", "models/sonda_ix_Mhm15.pkl"]: +for i in ["models/sonda_ix_Mhm15.pkl"]: #, "models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", ]: obj = cUtil.load_obj(i) indexers.append( obj ) print(obj) partitioners = [] -transformations = ["", "_diff"] -for max_part in [10, 20, 30, 40, 50, 60]: +transformations = [""] #, "_diff"] +for max_part in [30, 40, 50, 60, 70, 80, 90]: for t in transformations: obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl") partitioners.append( obj ) @@ -96,17 +101,33 @@ for max_part in [10, 20, 30, 40, 50, 60]: from pyFTS.ensemble import ensemble, multiseasonal -fts = multiseasonal.SeasonalEnsembleFTS("") +fts = multiseasonal.SeasonalEnsembleFTS("sonda_msfts_Mhm15") fts.indexers = indexers fts.partitioners = partitioners +fts.indexer = indexers[0] + fts.train(sonda_treino, sets=None) ''' -ftse = cUtil.load_obj("models/sonda_msfts_ensemble.pkl") +#''' -tmp = ftse.forecastDistribution(sonda_teste) +#ix = cUtil.load_obj("models/sonda_ix_m15.pkl") +#ftse = cUtil.load_obj("models/msfts_Grid40_diff_Mhm15.pkl") + +#ftse.indexer = ix + +#ftse.update_uod(sonda_treino) + +#tmp = ftse.forecastDistribution(sonda_teste,h=1) + +#tmp = ftse.forecast(sonda_teste,h=1) + +#tmp[5].plot() +#''' + +''' from pyFTS.benchmarks import benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk @@ -299,7 +320,7 @@ diff = Transformations.Differential(1) fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff) - +''' tmp = sfts.SeasonalFTS("") tmp.indexer = ix tmp.appendTransformation(diff)