From 61b5d8900927da858b0492350360a5c441003cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Thu, 14 Sep 2017 11:37:50 -0300 Subject: [PATCH] PWFTS: improved probabilistic forecasting method --- pyFTS/hofts.py | 2 +- pyFTS/ifts.py | 2 +- pyFTS/partitioners/Grid.py | 1 + pyFTS/pwfts.py | 67 ++++++++++++++++++++++++++------------ pyFTS/tests/pwfts.py | 36 ++++++++++++++++---- 5 files changed, 79 insertions(+), 29 deletions(-) diff --git a/pyFTS/hofts.py b/pyFTS/hofts.py index 02ba613..4aedc84 100644 --- a/pyFTS/hofts.py +++ b/pyFTS/hofts.py @@ -49,7 +49,7 @@ class HighOrderFLRG(object): class HighOrderFTS(fts.FTS): """Conventional High Order Fuzzy Time Series""" def __init__(self, name, **kwargs): - super(HighOrderFTS, self).__init__(1, "HOFTS" + name, **kwargs) + super(HighOrderFTS, self).__init__(name="HOFTS" + name, **kwargs) self.name = "High Order FTS" self.shortname = "HOFTS" + name self.detail = "Chen" diff --git a/pyFTS/ifts.py b/pyFTS/ifts.py index c58dd15..6584776 100644 --- a/pyFTS/ifts.py +++ b/pyFTS/ifts.py @@ -9,7 +9,7 @@ from pyFTS import hofts, fts, tree class IntervalFTS(hofts.HighOrderFTS): """High Order Interval Fuzzy Time Series""" def __init__(self, name, **kwargs): - super(IntervalFTS, self).__init__(order=1, name="IFTS " + name, **kwargs) + super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs) self.shortname = "IFTS " + name self.name = "Interval FTS" self.detail = "Silva, P.; GuimarĂ£es, F.; Sadaei, H. (2016)" diff --git a/pyFTS/partitioners/Grid.py b/pyFTS/partitioners/Grid.py index 1776ae0..0e7b5c2 100644 --- a/pyFTS/partitioners/Grid.py +++ b/pyFTS/partitioners/Grid.py @@ -31,5 +31,6 @@ class GridPartitioner(partitioner.Partitioner): FuzzySet.FuzzySet(self.prefix + str(count), Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c)) count += 1 + self.min = self.min - partlen return sets diff --git a/pyFTS/pwfts.py b/pyFTS/pwfts.py index 3ef6246..068f740 100644 --- a/pyFTS/pwfts.py +++ b/pyFTS/pwfts.py @@ -35,17 +35,31 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): def get_RHSprobability(self, c): return self.RHS[c] / self.frequency_count - def get_LHSprobability(self, x, norm, uod, nbins): + def lhs_probability(self, x, norm, uod, nbins): pk = self.frequency_count / norm - mv = [] - for set in self.LHS: - mv.append( set.membership(x) ) - min_mv = np.prod(mv) - tmp = pk * (min_mv / self.partition_function(uod, nbins=nbins)) + tmp = pk * (self.lhs_membership(x) / self.partition_function(uod, nbins=nbins)) return tmp + def rhs_conditional_probability(self, x, sets, uod, nbins): + total = 0.0 + for rhs in self.RHS: + set = sets[rhs] + wi = self.get_RHSprobability(rhs) + mv = set.membership(x) / set.partition_function(uod, nbins=nbins) + total += wi * mv + + return total + + def lhs_membership(self,x): + mv = [] + for set in self.LHS: + mv.append(set.membership(x)) + + min_mv = np.prod(mv) + return min_mv + def partition_function(self, uod, nbins=100): if self.Z is None: self.Z = 0.0 @@ -466,30 +480,41 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def forecastDistribution(self, data, **kwargs): + smooth = kwargs.get("smooth", "histogram") + nbins = kwargs.get("num_bins", 100) + + ndata = np.array(self.doTransformations(data)) + + l = len(ndata) + ret = [] + uod = self.get_UoD() + _keys = sorted(self.flrgs.keys()) + _bins = np.linspace(uod[0], uod[1], nbins) - smooth = kwargs.get("smooth", "KDE") - alpha = kwargs.get("alpha", None) + for k in np.arange(self.order - 1, l): + sample = ndata[k - (self.order - 1): k + 1] + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) - for k in data.index: + for bin in _bins: + num = [] + den = [] + for s in _keys: + flrg = self.flrgs[s] + pk = flrg.lhs_probability(sample, self.global_frequency_count, uod, nbins) + wi = flrg.rhs_conditional_probability(bin, self.setsDict, uod, nbins) + num.append(wi * pk) + den.append(pk) + pf = sum(num) / sum(den) - tmp = self.get_models_forecasts(data.ix[k]) - - if alpha is None: - tmp = np.ravel(tmp).tolist() - else: - tmp = self.get_distribution_interquantile( np.ravel(tmp).tolist(), alpha) - - name = str(self.indexer.get_index(data.ix[k])) - - dist = ProbabilityDistribution.ProbabilityDistribution(smooth, - uod=[self.original_min, self.original_max], - data=tmp, name=name, **kwargs) + dist.set(bin, pf) ret.append(dist) return ret + + def forecastAhead(self, data, steps, **kwargs): ret = [data[k] for k in np.arange(len(data) - self.order, len(data))] diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 421c1ee..749e539 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -35,11 +35,11 @@ from pyFTS import pwfts from pyFTS import tree from pyFTS.benchmarks import benchmarks as bchmk -uod = [10162, 21271] +#uod = [10162, 21271] enrollments_fs1 = Grid.GridPartitioner(enrollments, 6) for s in enrollments_fs1.sets: - print(s.partition_function(uod, 100)) + print(s) #.partition_function(uod, 100)) pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1) pfts1_enrollments.train(enrollments, None, 1) @@ -51,10 +51,34 @@ print(pfts1_enrollments) norm = pfts1_enrollments.global_frequency_count uod = pfts1_enrollments.get_UoD() print(uod) -for k in sorted(pfts1_enrollments.flrgs.keys()): - flrg = pfts1_enrollments.flrgs[k] - #tmp = flrg.get_LHSprobability(15000, norm, uod, 100) - print(flrg.partition_function(uod,100)) +#for k in sorted(pfts1_enrollments.flrgs.keys()) +# flrg = pfts1_enrollments.flrgs[k] +# tmp = flrg.get_LHSprobability(15000, norm, uod, 100) +# print(tmp) #flrg.partition_function(uod,100)) + +#print("MARGINAL VERIFICATION") +#for s in sorted(pfts1_enrollments.flrgs.keys()): +# flrg = pfts1_enrollments.flrgs[s] + #print(flrg.get_LHSprobability(15000, norm, uod, 100)) +# print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)])) + +print("P(T+1 | T") +sets = pfts1_enrollments.setsDict +t = 15000 +pf = 0.0 +for t1 in np.linspace(uod[0], uod[1], 100): + num = [] + den = [] + for s in sorted(pfts1_enrollments.flrgs.keys()): + flrg = pfts1_enrollments.flrgs[s] + pk = flrg.get_LHSprobability(t, norm, uod, 100) + wi = flrg.get_RHS_conditional_probability(t1, sets, uod, 100) + num.append(wi * pk) + den.append(pk) + pt1 = sum(num)/sum(den) + pf += pt1 + print(str(round(t1,0)) + ": " + str(round(pt1, 3))) #/sum(den)) +print(pf) ''' pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2")