From 8787d749ff147063d3c3800a5dde45ee32caf9a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Tue, 14 Feb 2017 23:01:44 -0200 Subject: [PATCH] Probability density function at FTS --- benchmarks/Measures.py | 42 ++++++++++++++++++++++ common/FuzzySet.py | 4 +++ fts.py | 80 +++++++++++++++++++++++++++++++++++++++++- pfts.py | 11 ------ tests/pfts.py | 30 ++++++++++++---- 5 files changed, 148 insertions(+), 19 deletions(-) diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index 5d6f4e4..a15d5c7 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from pyFTS.common import FuzzySet,SortedCollection # Autocorrelation function estimative @@ -144,3 +145,44 @@ def crps(targets, densities): _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) return _crps / float(l * n) + + +def pdf(data, bins=100): + _mx = max(data) + _mn = min(data) + _pdf = {} + percentiles = np.linspace(_mn, _mx, bins).tolist() + + print (percentiles) + + index_percentiles = SortedCollection.SortedCollection(iterable=percentiles) + + for k in percentiles: _pdf[k] = 0 + + for k in data: + v = index_percentiles.find_ge(k) + _pdf[v] += 1 + + norm = sum(list(_pdf.values())) + for k in _pdf: _pdf[k] /= norm + + return _pdf + + +def pdf_fuzzysets(data,sets): + _pdf = {} + for k in sets: _pdf[k.name] = 0 + for k in data: + memberships = FuzzySet.fuzzyInstance(k, sets) + for c, fs in enumerate(sets, start=0): + _pdf[fs.name] += memberships[c] + + norm = sum(list(_pdf.values())) + for k in _pdf: _pdf[k] /= norm + + return _pdf + + +def entropy(pdf): + h = -sum([pdf[k] * np.log(pdf[k]) for k in pdf]) + return h \ No newline at end of file diff --git a/common/FuzzySet.py b/common/FuzzySet.py index e4d6843..b749e83 100644 --- a/common/FuzzySet.py +++ b/common/FuzzySet.py @@ -32,6 +32,10 @@ def getMaxMembershipFuzzySet(inst, fuzzySets): mv = fuzzyInstance(inst, fuzzySets) return fuzzySets[np.argwhere(mv == max(mv))[0, 0]] +def getMaxMembershipFuzzySetIndex(inst, fuzzySets): + mv = fuzzyInstance(inst, fuzzySets) + return np.argwhere(mv == max(mv))[0, 0] + def fuzzySeries(data, fuzzySets): fts = [] diff --git a/fts.py b/fts.py index fad4fb2..15108e2 100644 --- a/fts.py +++ b/fts.py @@ -1,5 +1,8 @@ import numpy as np -from pyFTS import * +import pandas as pd +from pyFTS import tree +from pyFTS.common import FuzzySet, SortedCollection +from pyFTS.benchmarks import Measures class FTS(object): @@ -100,3 +103,78 @@ class FTS(object): for r in sorted(self.flrgs): tmp = tmp + str(self.flrgs[r]) + "\n" return tmp + + def buildTreeWithoutOrder(self, node, lags, level): + + if level not in lags: + return + + for s in lags[level]: + node.appendChild(tree.FLRGTreeNode(s)) + + for child in node.getChildren(): + self.buildTreeWithoutOrder(child, lags, level + 1) + + def generate_data(self,bins=100): + + dim_uod = tuple([bins for k in range(0,self.order)]) + + dim_fs = tuple([ len(self.sets) for k in range(0, self.order)]) + + simulation_uod = np.zeros(shape=dim_uod, dtype=float) + + simulation_fs = np.zeros(shape=dim_fs, dtype=float) + + percentiles = np.linspace(self.sets[0].lower, self.sets[-1].upper, bins).tolist() + + pdf_uod = {} + + for k in percentiles: + pdf_uod[k] = 0 + + pdf_fs = {} + for k in self.sets: + pdf_fs[k.name] = 0 + + index_percentiles = SortedCollection.SortedCollection(iterable=percentiles) + + lags = {} + + for o in np.arange(0, self.order): + lags[o] = percentiles + + # Build the tree with all possible paths + + root = tree.FLRGTreeNode(None) + + self.buildTreeWithoutOrder(root, lags, 0) + + # Trace the possible paths + + + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) + + index_uod = tuple([percentiles.index(k) for k in path]) + + index_fs = tuple([ FuzzySet.getMaxMembershipFuzzySetIndex(k, self.sets) for k in path]) + + forecast = self.forecast(path)[0] + + simulation_uod[index_uod] = forecast + + simulation_fs[index_fs] = forecast + + + pdf_fs = Measures.pdf_fuzzysets(np.ravel(simulation_fs),self.sets) + + pdf_uod = Measures.pdf(np.ravel(simulation_fs), bins=bins) + + #tmp_pdf_fs = pd.DataFrame( [[pdf_fs[k] for k in sorted(pdf_fs)]], columns=[k for k in sorted(pdf_fs)]) + #tmp_pdf_uod = pd.DataFrame([[pdf_uod[k] for k in sorted(pdf_uod)]], columns=[k for k in sorted(pdf_uod)]) + + return [pdf_fs, pdf_uod, simulation_fs, simulation_uod ] + + + + diff --git a/pfts.py b/pfts.py index 0a91fbd..87fef27 100644 --- a/pfts.py +++ b/pfts.py @@ -378,17 +378,6 @@ class ProbabilisticFTS(ifts.IntervalFTS): grid[k] += 1 return grid - def buildTreeWithoutOrder(self, node, lags, level): - - if level not in lags: - return - - for s in lags[level]: - node.appendChild(tree.FLRGTreeNode(s)) - - for child in node.getChildren(): - self.buildTreeWithoutOrder(child, lags, level + 1) - def forecastAheadDistribution(self, data, steps, resolution, parameters=2): ret = [] diff --git a/tests/pfts.py b/tests/pfts.py index 03c2f22..32a36d4 100644 --- a/tests/pfts.py +++ b/tests/pfts.py @@ -35,24 +35,40 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe diff = Transformations.Differential(1) -fs = Grid.GridPartitionerTrimf(gauss_treino,7) +fs = Grid.GridPartitionerTrimf(gauss_treino,10) #tmp = chen.ConventionalFTS("") pfts1 = pfts.ProbabilisticFTS("1") #pfts1.appendTransformation(diff) -pfts1.train(gauss_treino,fs,1) -pfts2 = pfts.ProbabilisticFTS("n = 2") +pfts1.train(gauss_treino,fs,2) +#pfts2 = pfts.ProbabilisticFTS("n = 2") #pfts2.appendTransformation(diff) -pfts2.train(gauss_treino,fs,2) +#pfts2.train(gauss_treino,fs,2) -pfts3 = pfts.ProbabilisticFTS("n = 3") +#pfts3 = pfts.ProbabilisticFTS("n = 3") #pfts3.appendTransformation(diff) -pfts3.train(gauss_treino,fs,3) +#pfts3.train(gauss_treino,fs,3) -densities1 = pfts1.forecastAheadDistribution(gauss_teste[:50],2,1.50, parameters=2) +#densities1 = pfts1.forecastAheadDistribution(gauss_teste[:50],2,1.50, parameters=2) #print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50)) +sim_fs, sim_uod =pfts1.generate_data(bins=10) + +#print(pdf_fs) + +#print(sim_fs) + +print(sim_uod) + +print(np.ravel(sim_uod)) + +#print(sim_uod) + +#print (Measures.pdf(gauss_treino,bins=10)) + + +