diff --git a/common/Transformations.py b/common/Transformations.py index 5bb9804..3a96a6f 100644 --- a/common/Transformations.py +++ b/common/Transformations.py @@ -57,6 +57,9 @@ class Differential(Transformation): if not isinstance(data, list): data = [data] + if not isinstance(param, list): + param = [param] + n = len(data) if not interval: diff --git a/ensemble/ensemble.py b/ensemble/ensemble.py index 9e6e191..c02aa1e 100644 --- a/ensemble/ensemble.py +++ b/ensemble/ensemble.py @@ -50,12 +50,15 @@ class EnsembleFTS(fts.FTS): def get_models_forecasts(self,data): tmp = [] for model in self.models: - sample = data[-model.order:] - forecast = model.forecast(sample) - if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0: - forecast = int(forecast[-1]) - elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: - forecast = np.nan + if self.is_multivariate or self.has_seasonality: + forecast = model.forecast(data) + else: + sample = data[-model.order:] + forecast = model.forecast(sample) + if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0: + forecast = int(forecast[-1]) + elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0: + forecast = np.nan tmp.append(forecast) return tmp diff --git a/ensemble/multiseasonal.py b/ensemble/multiseasonal.py index 17b58a9..e5f5f0f 100644 --- a/ensemble/multiseasonal.py +++ b/ensemble/multiseasonal.py @@ -20,7 +20,10 @@ import multiprocessing def train_individual_model(partitioner, train_data, indexer): pttr = str(partitioner.__module__).split('.')[-1] - _key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name + diff = "_diff" if partitioner.transformation is not None else "" + _key = "msfts_" + pttr + str(partitioner.partitions) + diff + "_" + indexer.name + + print(_key) model = msfts.MultiSeasonalFTS(_key, indexer=indexer) model.appendTransformation(partitioner.transformation) @@ -28,8 +31,6 @@ def train_individual_model(partitioner, train_data, indexer): cUtil.persist_obj(model, "models/"+_key+".pkl") - print(_key) - return model @@ -54,23 +55,28 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): for ix in self.indexers: for pt in self.partitioners: pool[count] = {'ix': ix, 'pt': pt} + count += 1 - results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys()) + results = Parallel(n_jobs=num_cores)( + delayed(train_individual_model)(deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix'])) + for m in pool.keys()) for tmp in results: self.appendModel(tmp) + cUtil.persist_obj(self, "models/"+self.name+".pkl") + def forecastDistribution(self, data, **kwargs): ret = [] h = kwargs.get("h",10) - for k in data: + for k in data.index: - tmp = self.get_models_forecasts(k) + tmp = self.get_models_forecasts(data.ix[k]) - dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h) + dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max]) ret.append(dist) diff --git a/models/msfts.py b/models/msfts.py index 06c48b4..06658fa 100644 --- a/models/msfts.py +++ b/models/msfts.py @@ -46,7 +46,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS): index = self.indexer.get_season_of_data(data) ndata = self.indexer.get_data(data) - for k in np.arange(1, len(data)): + for k in np.arange(0, len(index)): flrg = self.flrgs[str(index[k])] @@ -54,7 +54,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS): ret.append(sum(mp) / len(mp)) - ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]]) + ret = self.doInverseTransformations(ret, params=[ndata]) return ret diff --git a/models/seasonal/SeasonalIndexer.py b/models/seasonal/SeasonalIndexer.py index 0211d0f..2a8d304 100644 --- a/models/seasonal/SeasonalIndexer.py +++ b/models/seasonal/SeasonalIndexer.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd from enum import Enum @@ -27,8 +28,8 @@ class SeasonalIndexer(object): class LinearSeasonalIndexer(SeasonalIndexer): - def __init__(self,seasons,units,ignore=None,**kwargs): - super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs) + def __init__(self,seasons,units,ignore=None, **kwargs): + super(LinearSeasonalIndexer, self).__init__(len(seasons), **kwargs) self.seasons = seasons self.units = units self.ignore = ignore @@ -78,7 +79,7 @@ class LinearSeasonalIndexer(SeasonalIndexer): class DataFrameSeasonalIndexer(SeasonalIndexer): def __init__(self,index_fields,index_seasons, data_fields,**kwargs): - super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs) + super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons), **kwargs) self.fields = index_fields self.seasons = index_seasons self.data_fields = data_fields @@ -133,7 +134,7 @@ class DateTime(Enum): class DateTimeSeasonalIndexer(SeasonalIndexer): def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs): - super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs) + super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), **kwargs) self.fields = index_fields self.seasons = index_seasons self.data_fields = data_fields @@ -163,14 +164,24 @@ class DateTimeSeasonalIndexer(SeasonalIndexer): return tmp // resolution def get_season_of_data(self, data): - # data = data.copy() + ret = [] - for ix in data.index: - date = data[self.date_field][ix] + + if isinstance(data, pd.DataFrame): + for ix in data.index: + date = data[self.date_field][ix] + season = [] + for c, f in enumerate(self.fields, start=0): + season.append(self.strip_datepart(date, f, self.seasons[c])) + ret.append(season) + + elif isinstance(data, pd.Series): + date = data[self.date_field] season = [] for c, f in enumerate(self.fields, start=0): - season.append( self.strip_datepart(date, f, self.seasons[c]) ) + season.append(self.strip_datepart(date, f, self.seasons[c])) ret.append(season) + return ret def get_season_by_index(self, index): diff --git a/probabilistic/ProbabilityDistribution.py b/probabilistic/ProbabilityDistribution.py index 115f37c..2dbeb42 100644 --- a/probabilistic/ProbabilityDistribution.py +++ b/probabilistic/ProbabilityDistribution.py @@ -16,24 +16,26 @@ class ProbabilityDistribution(object): if type is None: self.type = "KDE" - self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov")) + self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov")) else: self.type = type + self.description = kwargs.get("description", None) + self.nbins = kwargs.get("num_bins", 100) if self.type == "histogram": - self.nbins = kwargs.get("num_bins", None) + self.bins = kwargs.get("bins", None) self.labels = kwargs.get("bins_labels", None) - if self.bins is None: - self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist() - self.labels = [str(k) for k in self.bins] + if self.bins is None: + self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist() + self.labels = [str(k) for k in self.bins] - self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) - self.distribution = {} - self.count = 0 - for k in self.bins: self.distribution[k] = 0 + self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) + self.distribution = {} + self.count = 0 + for k in self.bins: self.distribution[k] = 0 self.data = kwargs.get("data",None) @@ -45,6 +47,10 @@ class ProbabilityDistribution(object): self.count += 1 else: self.data.extend(values) + self.distribution = {} + dens = self.density(self.bins) + for v,d in enumerate(dens): + self.distribution[v] = d def density(self, values): ret = [] @@ -111,7 +117,10 @@ class ProbabilityDistribution(object): fig = plt.figure(figsize=tam) axis = fig.add_subplot(111) - ys = [self.distribution[k]/self.count for k in self.bins] + if self.type == "histogram": + ys = [self.distribution[k]/self.count for k in self.bins] + else: + ys = [self.distribution[k] for k in self.bins] axis.plot(self.bins, ys,c=color, label=self.name) diff --git a/tests/general.py b/tests/general.py index 115bfc2..d94fe1e 100644 --- a/tests/general.py +++ b/tests/general.py @@ -74,6 +74,7 @@ sonda.index = np.arange(0,len(sonda.index)) sonda_treino = sonda[:1051200] sonda_teste = sonda[1051201:] +''' from pyFTS.models.seasonal import SeasonalIndexer indexers = [] @@ -93,16 +94,18 @@ for max_part in [10, 20, 30, 40, 50, 60]: print(obj) -from pyFTS import ensemble +from pyFTS.ensemble import ensemble, multiseasonal -fts = ensemble.SeasonalEnsembleFTS("") +fts = multiseasonal.SeasonalEnsembleFTS("") fts.indexers = indexers fts.partitioners = partitioners fts.train(sonda_treino, sets=None) +''' +ftse = cUtil.load_obj("models/sonda_msfts_ensemble.pkl") -cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl") +tmp = ftse.forecastDistribution(sonda_teste) from pyFTS.benchmarks import benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk