PWFTS: improved probabilistic forecasting method

This commit is contained in:
Petrônio Cândido 2017-09-14 11:37:50 -03:00
parent 0764d249ea
commit 61b5d89009
5 changed files with 79 additions and 29 deletions

View File

@ -49,7 +49,7 @@ class HighOrderFLRG(object):
class HighOrderFTS(fts.FTS): class HighOrderFTS(fts.FTS):
"""Conventional High Order Fuzzy Time Series""" """Conventional High Order Fuzzy Time Series"""
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(HighOrderFTS, self).__init__(1, "HOFTS" + name, **kwargs) super(HighOrderFTS, self).__init__(name="HOFTS" + name, **kwargs)
self.name = "High Order FTS" self.name = "High Order FTS"
self.shortname = "HOFTS" + name self.shortname = "HOFTS" + name
self.detail = "Chen" self.detail = "Chen"

View File

@ -9,7 +9,7 @@ from pyFTS import hofts, fts, tree
class IntervalFTS(hofts.HighOrderFTS): class IntervalFTS(hofts.HighOrderFTS):
"""High Order Interval Fuzzy Time Series""" """High Order Interval Fuzzy Time Series"""
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(IntervalFTS, self).__init__(order=1, name="IFTS " + name, **kwargs) super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs)
self.shortname = "IFTS " + name self.shortname = "IFTS " + name
self.name = "Interval FTS" self.name = "Interval FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)" self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"

View File

@ -31,5 +31,6 @@ class GridPartitioner(partitioner.Partitioner):
FuzzySet.FuzzySet(self.prefix + str(count), Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c)) FuzzySet.FuzzySet(self.prefix + str(count), Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c))
count += 1 count += 1
self.min = self.min - partlen
return sets return sets

View File

@ -35,17 +35,31 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def get_RHSprobability(self, c): def get_RHSprobability(self, c):
return self.RHS[c] / self.frequency_count return self.RHS[c] / self.frequency_count
def get_LHSprobability(self, x, norm, uod, nbins): def lhs_probability(self, x, norm, uod, nbins):
pk = self.frequency_count / norm pk = self.frequency_count / norm
mv = []
for set in self.LHS:
mv.append( set.membership(x) )
min_mv = np.prod(mv) tmp = pk * (self.lhs_membership(x) / self.partition_function(uod, nbins=nbins))
tmp = pk * (min_mv / self.partition_function(uod, nbins=nbins))
return tmp return tmp
def rhs_conditional_probability(self, x, sets, uod, nbins):
total = 0.0
for rhs in self.RHS:
set = sets[rhs]
wi = self.get_RHSprobability(rhs)
mv = set.membership(x) / set.partition_function(uod, nbins=nbins)
total += wi * mv
return total
def lhs_membership(self,x):
mv = []
for set in self.LHS:
mv.append(set.membership(x))
min_mv = np.prod(mv)
return min_mv
def partition_function(self, uod, nbins=100): def partition_function(self, uod, nbins=100):
if self.Z is None: if self.Z is None:
self.Z = 0.0 self.Z = 0.0
@ -466,30 +480,41 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def forecastDistribution(self, data, **kwargs): def forecastDistribution(self, data, **kwargs):
smooth = kwargs.get("smooth", "histogram")
nbins = kwargs.get("num_bins", 100)
ndata = np.array(self.doTransformations(data))
l = len(ndata)
ret = [] ret = []
uod = self.get_UoD()
_keys = sorted(self.flrgs.keys())
_bins = np.linspace(uod[0], uod[1], nbins)
smooth = kwargs.get("smooth", "KDE") for k in np.arange(self.order - 1, l):
alpha = kwargs.get("alpha", None) sample = ndata[k - (self.order - 1): k + 1]
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for k in data.index: for bin in _bins:
num = []
den = []
for s in _keys:
flrg = self.flrgs[s]
pk = flrg.lhs_probability(sample, self.global_frequency_count, uod, nbins)
wi = flrg.rhs_conditional_probability(bin, self.setsDict, uod, nbins)
num.append(wi * pk)
den.append(pk)
pf = sum(num) / sum(den)
tmp = self.get_models_forecasts(data.ix[k]) dist.set(bin, pf)
if alpha is None:
tmp = np.ravel(tmp).tolist()
else:
tmp = self.get_distribution_interquantile( np.ravel(tmp).tolist(), alpha)
name = str(self.indexer.get_index(data.ix[k]))
dist = ProbabilityDistribution.ProbabilityDistribution(smooth,
uod=[self.original_min, self.original_max],
data=tmp, name=name, **kwargs)
ret.append(dist) ret.append(dist)
return ret return ret
def forecastAhead(self, data, steps, **kwargs): def forecastAhead(self, data, steps, **kwargs):
ret = [data[k] for k in np.arange(len(data) - self.order, len(data))] ret = [data[k] for k in np.arange(len(data) - self.order, len(data))]

View File

@ -35,11 +35,11 @@ from pyFTS import pwfts
from pyFTS import tree from pyFTS import tree
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
uod = [10162, 21271] #uod = [10162, 21271]
enrollments_fs1 = Grid.GridPartitioner(enrollments, 6) enrollments_fs1 = Grid.GridPartitioner(enrollments, 6)
for s in enrollments_fs1.sets: for s in enrollments_fs1.sets:
print(s.partition_function(uod, 100)) print(s) #.partition_function(uod, 100))
pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1) pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1)
pfts1_enrollments.train(enrollments, None, 1) pfts1_enrollments.train(enrollments, None, 1)
@ -51,10 +51,34 @@ print(pfts1_enrollments)
norm = pfts1_enrollments.global_frequency_count norm = pfts1_enrollments.global_frequency_count
uod = pfts1_enrollments.get_UoD() uod = pfts1_enrollments.get_UoD()
print(uod) print(uod)
for k in sorted(pfts1_enrollments.flrgs.keys()): #for k in sorted(pfts1_enrollments.flrgs.keys())
flrg = pfts1_enrollments.flrgs[k] # flrg = pfts1_enrollments.flrgs[k]
#tmp = flrg.get_LHSprobability(15000, norm, uod, 100) # tmp = flrg.get_LHSprobability(15000, norm, uod, 100)
print(flrg.partition_function(uod,100)) # print(tmp) #flrg.partition_function(uod,100))
#print("MARGINAL VERIFICATION")
#for s in sorted(pfts1_enrollments.flrgs.keys()):
# flrg = pfts1_enrollments.flrgs[s]
#print(flrg.get_LHSprobability(15000, norm, uod, 100))
# print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)]))
print("P(T+1 | T")
sets = pfts1_enrollments.setsDict
t = 15000
pf = 0.0
for t1 in np.linspace(uod[0], uod[1], 100):
num = []
den = []
for s in sorted(pfts1_enrollments.flrgs.keys()):
flrg = pfts1_enrollments.flrgs[s]
pk = flrg.get_LHSprobability(t, norm, uod, 100)
wi = flrg.get_RHS_conditional_probability(t1, sets, uod, 100)
num.append(wi * pk)
den.append(pk)
pt1 = sum(num)/sum(den)
pf += pt1
print(str(round(t1,0)) + ": " + str(round(pt1, 3))) #/sum(den))
print(pf)
''' '''
pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2") pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2")