From 7e98b34b16d9c0e944e7b08850aec19bd74b6bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Mon, 3 Jul 2017 18:39:10 -0300 Subject: [PATCH] - Refactoring: moving Ensemble methods to subpackage ensemble - Parallel Seasonal Ensemble --- ensemble/__init__.py | 0 ensemble.py => ensemble/ensemble.py | 24 +------- ensemble/multiseasonal.py | 77 ++++++++++++++++++++++++ models/msfts.py | 2 +- models/seasonal/SeasonalIndexer.py | 15 ++--- probabilistic/ProbabilityDistribution.py | 19 +++--- tests/general.py | 45 ++++++-------- 7 files changed, 118 insertions(+), 64 deletions(-) create mode 100644 ensemble/__init__.py rename ensemble.py => ensemble/ensemble.py (90%) create mode 100644 ensemble/multiseasonal.py diff --git a/ensemble/__init__.py b/ensemble/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ensemble.py b/ensemble/ensemble.py similarity index 90% rename from ensemble.py rename to ensemble/ensemble.py index 589fc1e..9e6e191 100644 --- a/ensemble.py +++ b/ensemble/ensemble.py @@ -12,6 +12,7 @@ from pyFTS.common import Transformations import scipy.stats as st from pyFTS import tree from pyFTS.models import msfts +from pyFTS.probabilistic import ProbabilityDistribution, kde def sampler(data, quantiles): ret = [] @@ -242,28 +243,5 @@ class AllMethodEnsembleFTS(EnsembleFTS): self.appendModel(model) -class SeasonalEnsembleFTS(EnsembleFTS): - def __init__(self, name, **kwargs): - super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs) - self.min_order = 1 - self.indexers = [] - self.partitioners = [] - self.is_multivariate = True - self.has_seasonality = True - self.has_probability_forecasting = True - - def train(self, data, sets, order=1, parameters=None): - self.original_max = max(data) - self.original_min = min(data) - - for ix in self.indexers: - for pt in self.partitioners: - - model = msfts.MultiSeasonalFTS() - model.indexer = ix - model.appendTransformation(pt.transformation) - model.train(data,pt.sets,order=1) - - self.appendModel(model) diff --git a/ensemble/multiseasonal.py b/ensemble/multiseasonal.py new file mode 100644 index 0000000..17b58a9 --- /dev/null +++ b/ensemble/multiseasonal.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import numpy as np +import pandas as pd +import math +from operator import itemgetter +from pyFTS.common import FLR, FuzzySet, SortedCollection +from pyFTS import fts, chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu, sfts +from pyFTS.benchmarks import arima, quantreg +from pyFTS.common import Transformations, Util as cUtil +import scipy.stats as st +from pyFTS.ensemble import ensemble +from pyFTS.models import msfts +from pyFTS.probabilistic import ProbabilityDistribution, kde +from copy import deepcopy +from joblib import Parallel, delayed +import multiprocessing + + +def train_individual_model(partitioner, train_data, indexer): + pttr = str(partitioner.__module__).split('.')[-1] + _key = "msfts_" + pttr + str(partitioner.partitions) + "_" + indexer.name + + model = msfts.MultiSeasonalFTS(_key, indexer=indexer) + model.appendTransformation(partitioner.transformation) + model.train(train_data, partitioner.sets, order=1) + + cUtil.persist_obj(model, "models/"+_key+".pkl") + + print(_key) + + return model + + +class SeasonalEnsembleFTS(ensemble.EnsembleFTS): + def __init__(self, name, **kwargs): + super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs) + self.min_order = 1 + self.indexers = [] + self.partitioners = [] + self.is_multivariate = True + self.has_seasonality = True + self.has_probability_forecasting = True + + def train(self, data, sets, order=1, parameters=None): + self.original_max = max(data) + self.original_min = min(data) + + num_cores = multiprocessing.cpu_count() + + pool = {} + count = 0 + for ix in self.indexers: + for pt in self.partitioners: + pool[count] = {'ix': ix, 'pt': pt} + + results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(deepcopy(pool[m]['pt']), deepcopy(data), deepcopy(pool[m]['ix'])) for m in pool.keys()) + + for tmp in results: + self.appendModel(tmp) + + def forecastDistribution(self, data, **kwargs): + + ret = [] + + h = kwargs.get("h",10) + + for k in data: + + tmp = self.get_models_forecasts(k) + + dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h) + + ret.append(dist) + + return ret \ No newline at end of file diff --git a/models/msfts.py b/models/msfts.py index f23a1c8..06c48b4 100644 --- a/models/msfts.py +++ b/models/msfts.py @@ -7,7 +7,7 @@ class MultiSeasonalFTS(sfts.SeasonalFTS): """ Multi-Seasonal Fuzzy Time Series """ - def __init__(self, order, name, indexer, **kwargs): + def __init__(self, name, indexer, **kwargs): super(MultiSeasonalFTS, self).__init__("MSFTS") self.name = "Multi Seasonal FTS" self.shortname = "MSFTS " + name diff --git a/models/seasonal/SeasonalIndexer.py b/models/seasonal/SeasonalIndexer.py index 184226f..0211d0f 100644 --- a/models/seasonal/SeasonalIndexer.py +++ b/models/seasonal/SeasonalIndexer.py @@ -6,8 +6,9 @@ class SeasonalIndexer(object): """ Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set """ - def __init__(self,num_seasons): + def __init__(self,num_seasons, **kwargs): self.num_seasons = num_seasons + self.name = kwargs.get("name","") def get_season_of_data(self,data): pass @@ -26,8 +27,8 @@ class SeasonalIndexer(object): class LinearSeasonalIndexer(SeasonalIndexer): - def __init__(self,seasons,units,ignore=None): - super(LinearSeasonalIndexer, self).__init__(len(seasons)) + def __init__(self,seasons,units,ignore=None,**kwargs): + super(LinearSeasonalIndexer, self).__init__(len(seasons),kwargs) self.seasons = seasons self.units = units self.ignore = ignore @@ -76,8 +77,8 @@ class LinearSeasonalIndexer(SeasonalIndexer): class DataFrameSeasonalIndexer(SeasonalIndexer): - def __init__(self,index_fields,index_seasons, data_fields): - super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons)) + def __init__(self,index_fields,index_seasons, data_fields,**kwargs): + super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons),kwargs) self.fields = index_fields self.seasons = index_seasons self.data_fields = data_fields @@ -131,8 +132,8 @@ class DateTime(Enum): class DateTimeSeasonalIndexer(SeasonalIndexer): - def __init__(self,date_field, index_fields, index_seasons, data_fields): - super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons)) + def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs): + super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), kwargs) self.fields = index_fields self.seasons = index_seasons self.data_fields = data_fields diff --git a/probabilistic/ProbabilityDistribution.py b/probabilistic/ProbabilityDistribution.py index b238289..115f37c 100644 --- a/probabilistic/ProbabilityDistribution.py +++ b/probabilistic/ProbabilityDistribution.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt from pyFTS.common import FuzzySet,SortedCollection +from pyFTS.probabilistic import kde class ProbabilityDistribution(object): @@ -11,14 +12,15 @@ class ProbabilityDistribution(object): If type is KDE the PDF is continuous """ def __init__(self,type, **kwargs): + self.uod = kwargs.get("uod", None) + if type is None: self.type = "KDE" + self.kde = kde.KernelSmoothing(kwargs.get("h", 1), kwargs.get("method", "epanechnikov")) else: self.type = type self.description = kwargs.get("description", None) - self.uod = kwargs.get("uod", None) - if self.type == "histogram": self.nbins = kwargs.get("num_bins", None) self.bins = kwargs.get("bins", None) @@ -45,14 +47,15 @@ class ProbabilityDistribution(object): self.data.extend(values) def density(self, values): - if self.type == "histogram": - ret = [] - for k in values: + ret = [] + for k in values: + if self.type == "histogram": v = self.index.find_ge(k) ret.append(self.distribution[v] / self.count) - return ret - else: - pass + else: + v = self.kde.probability(k, self.data) + ret.append(v) + return ret def cummulative(self, values): diff --git a/tests/general.py b/tests/general.py index b5d3088..115bfc2 100644 --- a/tests/general.py +++ b/tests/general.py @@ -76,38 +76,33 @@ sonda_teste = sonda[1051201:] from pyFTS.models.seasonal import SeasonalIndexer -ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg') +indexers = [] -cUtil.persist_obj(ix_m15, "models/sonda_ix_m15.pkl") +for i in ["models/sonda_ix_m15.pkl", "models/sonda_ix_Mh.pkl", "models/sonda_ix_Mhm15.pkl"]: + obj = cUtil.load_obj(i) + indexers.append( obj ) + print(obj) + +partitioners = [] + +transformations = ["", "_diff"] +for max_part in [10, 20, 30, 40, 50, 60]: + for t in transformations: + obj = cUtil.load_obj("models/sonda_fs_grid_" + str(max_part) + t + ".pkl") + partitioners.append( obj ) + print(obj) -ix_Mh = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,SeasonalIndexer.DateTime.hour], - [None, None],'glo_avg') +from pyFTS import ensemble -cUtil.persist_obj(ix_Mh, "models/sonda_ix_Mh.pkl") +fts = ensemble.SeasonalEnsembleFTS("") -ix_Mhm15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month, - SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute], - [None, None,15],'glo_avg') +fts.indexers = indexers +fts.partitioners = partitioners -cUtil.persist_obj(ix_Mhm15, "models/sonda_ix_Mhm15.pkl") - - -tmp = ix_Mh.get_data(sonda_treino) -for max_part in [10, 20, 30, 40, 50]: - - fs1 = Grid.GridPartitionerTrimf(tmp,max_part) - - cUtil.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl") - - fs2 = FCM.FCMPartitionerTrimf(tmp, max_part) - - cUtil.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl") - - fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part) - - cUtil.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl") +fts.train(sonda_treino, sets=None) +cUtil.persist_obj(fts, "models/msfts_ensemble_sonda_grid.pkl") from pyFTS.benchmarks import benchmarks as bchmk #from pyFTS.benchmarks import distributed_benchmarks as bchmk