From 011e0ee4ce659f247f80658d3d7f9101b09acce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Wed, 5 Jul 2017 15:35:22 -0300 Subject: [PATCH] - Bugfixes and improvements on ProbabilityDistribution and KDE, msfts - Scale Transformation --- benchmarks/Measures.py | 2 +- common/FLR.py | 6 ++-- common/Transformations.py | 35 +++++++++++++++++++++ ensemble/multiseasonal.py | 3 +- models/msfts.py | 8 ++--- probabilistic/ProbabilityDistribution.py | 10 ++++-- probabilistic/kde.py | 13 ++++++-- tests/general.py | 40 ++++++++++++++++++------ 8 files changed, 94 insertions(+), 23 deletions(-) diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index 2796aca..21fd558 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -234,7 +234,7 @@ def get_point_statistics(data, model, indexer=None): """Condensate all measures for point forecasters""" if indexer is not None: - ndata = np.array(indexer.get_data(data[model.order:])) + ndata = np.array(indexer.get_data(data)) else: ndata = np.array(data[model.order:]) diff --git a/common/FLR.py b/common/FLR.py index 551828e..aeec3a3 100644 --- a/common/FLR.py +++ b/common/FLR.py @@ -62,7 +62,7 @@ def generateRecurrentFLRs(fuzzyData): return flrs -def generateIndexedFLRs(sets, indexer, data): +def generateIndexedFLRs(sets, indexer, data, transformation=None): """ Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence :param sets: fuzzy sets @@ -73,7 +73,9 @@ def generateIndexedFLRs(sets, indexer, data): flrs = [] index = indexer.get_season_of_data(data) ndata = indexer.get_data(data) - for k in np.arange(1,len(data)): + if transformation is not None: + ndata = transformation.apply(ndata) + for k in np.arange(1,len(ndata)): lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets) rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets) season = index[k] diff --git a/common/Transformations.py b/common/Transformations.py index 3a96a6f..62a24f3 100644 --- a/common/Transformations.py +++ b/common/Transformations.py @@ -73,6 +73,41 @@ class Differential(Transformation): return inc +class Scale(Transformation): + def __init__(self, min=0, max=1): + super(Scale, self).__init__([min, max]) + self.data_max = None + self.data_min = None + self.transf_max = max + self.transf_min = min + + def apply(self, data, param=None,**kwargs): + if self.data_max is None: + self.data_max = np.nanmax(data) + self.data_min = np.nanmin(data) + data_range = self.data_max - self.data_min + transf_range = self.transf_max - self.transf_min + if isinstance(data, list): + tmp = [(k + (-1 * self.data_min)) / data_range for k in data] + tmp2 = [ (k * transf_range) + self.transf_min for k in tmp] + else: + tmp = (data + (-1 * self.data_min)) / data_range + tmp2 = (tmp * transf_range) + self.transf_min + + return tmp2 + + def inverse(self, data, param, **kwargs): + data_range = self.data_max - self.data_min + transf_range = self.transf_max - self.transf_min + if isinstance(data, list): + tmp2 = [(k - self.transf_min) / transf_range for k in data] + tmp = [(k * data_range) + self.data_min for k in tmp2] + else: + tmp2 = (data - self.transf_min) / transf_range + tmp = (tmp2 * data_range) + self.data_min + return tmp + + class AdaptiveExpectation(Transformation): """ Adaptive Expectation post processing diff --git a/ensemble/multiseasonal.py b/ensemble/multiseasonal.py index 816b90f..154d43d 100644 --- a/ensemble/multiseasonal.py +++ b/ensemble/multiseasonal.py @@ -82,7 +82,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): tmp = np.ravel(tmp).tolist() - dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max], data=tmp) + dist = ProbabilityDistribution.ProbabilityDistribution("KDE", uod=[self.original_min, self.original_max], data=tmp, + **kwargs) ret.append(dist) diff --git a/models/msfts.py b/models/msfts.py index 06658fa..1ca980e 100644 --- a/models/msfts.py +++ b/models/msfts.py @@ -13,10 +13,10 @@ class MultiSeasonalFTS(sfts.SeasonalFTS): self.shortname = "MSFTS " + name self.detail = "" self.seasonality = 1 - self.hasSeasonality = True - self.hasPointForecasting = True - self.isHighOrder = True - self.isMultivariate = True + self.has_seasonality = True + self.has_point_forecasting = True + self.is_high_order = False + self.is_multivariate = True self.indexer = indexer self.flrgs = {} diff --git a/probabilistic/ProbabilityDistribution.py b/probabilistic/ProbabilityDistribution.py index 5087bc0..46eda71 100644 --- a/probabilistic/ProbabilityDistribution.py +++ b/probabilistic/ProbabilityDistribution.py @@ -123,8 +123,11 @@ class ProbabilityDistribution(object): ys = [self.distribution[k]/self.count for k in self.bins] else: ys = [self.distribution[k] for k in self.bins] + yp = [0 for k in self.data] + axis.plot(self.data, yp, c="red") - axis.plot(self.bins, ys,c=color, label=self.name) + + axis.plot(self.bins, ys, c=color, label=self.name) axis.set_xlabel('Universe of Discourse') axis.set_ylabel('Probability') @@ -134,5 +137,8 @@ class ProbabilityDistribution(object): body = '|' for k in sorted(self.distribution.keys()): head += str(round(k,2)) + '\t|' - body += str(round(self.distribution[k] / self.count,3)) + '\t|' + if self.type == "histogram": + body += str(round(self.distribution[k] / self.count,3)) + '\t|' + else: + body += str(round(self.distribution[k], 3)) + '\t|' return head + '\n' + body diff --git a/probabilistic/kde.py b/probabilistic/kde.py index 82acdba..6b5c020 100644 --- a/probabilistic/kde.py +++ b/probabilistic/kde.py @@ -1,3 +1,6 @@ +from pyFTS.common import Transformations +import numpy as np + """ Kernel Density Estimation """ @@ -8,17 +11,21 @@ class KernelSmoothing(object): def __init__(self,h, method="epanechnikov"): self.h = h self.method = method + self.transf = Transformations.Scale(min=0,max=1) def kernel(self, u): if self.method == "epanechnikov": - return (3/4) * (1 - u**2) + return (3/4)*(1 - u**2) elif self.method == "gaussian": - return 0.5 + return (1/np.sqrt(2*np.pi))*np.exp(-0.5*u**2) elif self.method == "uniform": return 0.5 def probability(self, x, data): l = len(data) - p = sum([self.kernel((x - k)/self.h) for k in data]) / l*self.h + + ndata = self.transf.apply(data) + nx = self.transf.apply(x) + p = sum([self.kernel((nx - k)/self.h) for k in ndata]) / l*self.h return p \ No newline at end of file diff --git a/tests/general.py b/tests/general.py index f162c04..94050bd 100644 --- a/tests/general.py +++ b/tests/general.py @@ -62,22 +62,43 @@ DATASETS #print(lag) #print(a) - -sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";") +#''' +sonda = pd.read_csv("DataSets/SONDA_BSB_15MIN_AVG.csv", sep=";") sonda['data'] = pd.to_datetime(sonda['data']) -sonda = sonda[:][527041:] +#sonda = sonda[:][527041:].dropna() sonda.index = np.arange(0,len(sonda.index)) -sonda_treino = sonda[:1051200] -sonda_teste = sonda[1051901:1051910] +sonda_treino = sonda[:105313].dropna() +sonda_teste = sonda[105314:].dropna() -ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15') +#ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15') -fs1 = Grid.GridPartitioner(sonda_treino,50,transformation=diff, indexer=ix_m15) +#fs1 = Grid.GridPartitioner(sonda_treino, 50, transformation=diff, indexer=ix_m15) +#ix = cUtil.load_obj("models/sonda_ix_Mhm15.pkl") + +#fs = cUtil.load_obj("models/sonda_fs_Entropy40_diff.pkl") + +#from pyFTS.models import msfts + +#obj = msfts.MultiSeasonalFTS("sonda_msfts_Entropy40_Mhm15", indexer=ix) + +#obj.appendTransformation(diff) + +#obj.train(sonda_treino, fs.sets) + +#cUtil.persist_obj(obj, "models/sonda_msfts_Entropy40_Mhm15.pkl") + +ftse = cUtil.load_obj("models/sonda_ensemble_msfts.pkl") + +tmp = ftse.forecastDistribution(sonda_teste[850:860], h=0.5, method="gaussian") + +print(tmp[0]) + +#''' ''' from pyFTS.models.seasonal import SeasonalIndexer @@ -223,7 +244,7 @@ bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv", nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -""" + from pyFTS import pwfts from pyFTS.common import Transformations @@ -240,7 +261,7 @@ from pyFTS.partitioners import Grid #bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'], # distributions=[True], save=True, file="pictures/best_ahead_forecasts", # time_from=40, time_to=60, resolution=100) -''' + experiments = [ ["experiments/taiex_point_synthetic_diff.csv","experiments/taiex_point_analytic_diff.csv",16], ["experiments/nasdaq_point_synthetic_diff.csv","experiments/nasdaq_point_analytic_diff.csv", 11], @@ -320,7 +341,6 @@ diff = Transformations.Differential(1) fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff) -''' tmp = sfts.SeasonalFTS("") tmp.indexer = ix tmp.appendTransformation(diff)