PWFTS new one-step-ahead probabilistic and quantile-interval forecasts

This commit is contained in:
Petrônio Cândido 2017-09-20 14:31:45 -03:00
parent 61b5d89009
commit 1ec03d34de
4 changed files with 203 additions and 162 deletions

View File

@ -497,6 +497,20 @@ def print_interval_statistics(original, models):
print(ret) print(ret)
def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
lower = [kk[0] for kk in intervals]
upper = [kk[1] for kk in intervals]
mi = min(lower) * 0.95
ma = max(upper) * 1.05
for k in np.arange(0, order):
lower.insert(0, None)
upper.insert(0, None)
if typeonlegend: label += " (Interval)"
axis.plot(lower, color=color, label=label, ls=ls,linewidth=linewidth)
axis.plot(upper, color=color, ls=ls,linewidth=linewidth)
return [mi, ma]
def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
points=True, intervals=True, linewidth=1.5): points=True, intervals=True, linewidth=1.5):
""" """
@ -526,34 +540,28 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
for count, fts in enumerate(models, start=0): for count, fts in enumerate(models, start=0):
if fts.has_point_forecasting and points: if fts.has_point_forecasting and points:
forecasted = fts.forecast(original) forecasts = fts.forecast(original)
if isinstance(forecasted, np.ndarray): if isinstance(forecasts, np.ndarray):
forecasted = forecasted.tolist() forecasts = forecasts.tolist()
mi.append(min(forecasted) * 0.95) mi.append(min(forecasts) * 0.95)
ma.append(max(forecasted) * 1.05) ma.append(max(forecasts) * 1.05)
for k in np.arange(0, fts.order): for k in np.arange(0, fts.order):
forecasted.insert(0, None) forecasts.insert(0, None)
lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "") lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Point)" if typeonlegend: lbl += " (Point)"
ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth) ax.plot(forecasts, color=colors[count], label=lbl, ls="-",linewidth=linewidth)
if fts.has_interval_forecasting and intervals: if fts.has_interval_forecasting and intervals:
forecasted = fts.forecastInterval(original) forecasts = fts.forecastInterval(original)
lower = [kk[0] for kk in forecasted]
upper = [kk[1] for kk in forecasted]
mi.append(min(lower) * 0.95)
ma.append(max(upper) * 1.05)
for k in np.arange(0, fts.order):
lower.insert(0, None)
upper.insert(0, None)
lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "") lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
if typeonlegend: lbl += " (Interval)"
if not points and intervals: if not points and intervals:
ls = "-" ls = "-"
else: else:
ls = "--" ls = "--"
ax.plot(lower, color=colors[count], label=lbl, ls=ls,linewidth=linewidth) tmpmi, tmpma = plot_interval(ax, forecasts, fts.order, label=lbl, typeonlegend=typeonlegend,
ax.plot(upper, color=colors[count], ls=ls,linewidth=linewidth) color=colors[count], ls=ls, linewidth=linewidth)
mi.append(tmpmi)
ma.append(tmpma)
handles0, labels0 = ax.get_legend_handles_labels() handles0, labels0 = ax.get_legend_handles_labels()
lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
@ -825,8 +833,10 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to
cb = fig.colorbar(pc, ax=ax) cb = fig.colorbar(pc, ax=ax)
cb.set_label('Density') cb.set_label('Density')
from pyFTS.common import Transformations from pyFTS.common import Transformations
def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from): def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from):
from matplotlib.patches import Rectangle from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection from matplotlib.collections import PatchCollection

View File

@ -26,11 +26,16 @@ class ProbabilityDistribution(object):
if self.bins is None: if self.bins is None:
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist() self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
self.resolution = (self.uod[1] - self.uod[0])/self.nbins
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) if self.uod is not None:
self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.quantile_index = None
self.distribution = {} self.distribution = {}
self.cdf = None
self.qtl = None
self.count = 0 self.count = 0
for k in self.bins: self.distribution[k] = 0 for k in self.bins: self.distribution[k] = 0
@ -44,13 +49,13 @@ class ProbabilityDistribution(object):
self.name = kwargs.get("name", "") self.name = kwargs.get("name", "")
def set(self, value, density): def set(self, value, density):
k = self.index.find_ge(np.round(value,3)) k = self.bin_index.find_ge(value)
self.distribution[k] = density self.distribution[k] = density
def append(self, values): def append(self, values):
if self.type == "histogram": if self.type == "histogram":
for k in values: for k in values:
v = self.index.find_ge(k) v = self.bin_index.find_ge(k)
self.distribution[v] += 1 self.distribution[v] += 1
self.count += 1 self.count += 1
else: else:
@ -63,7 +68,7 @@ class ProbabilityDistribution(object):
def appendInterval(self, intervals): def appendInterval(self, intervals):
if self.type == "histogram": if self.type == "histogram":
for interval in intervals: for interval in intervals:
for k in self.index.inside(interval[0], interval[1]): for k in self.bin_index.inside(interval[0], interval[1]):
self.distribution[k] += 1 self.distribution[k] += 1
self.count += 1 self.count += 1
@ -77,13 +82,13 @@ class ProbabilityDistribution(object):
for k in values: for k in values:
if self.type == "histogram": if self.type == "histogram":
v = self.index.find_ge(k) v = self.bin_index.find_ge(k)
ret.append(self.distribution[v] / self.count) ret.append(self.distribution[v] / self.count)
elif self.type == "KDE": elif self.type == "KDE":
v = self.kde.probability(k, self.data) v = self.kde.probability(k, self.data)
ret.append(v) ret.append(v)
else: else:
v = self.index.find_ge(k) v = self.bin_index.find_ge(k)
ret.append(self.distribution[v]) ret.append(self.distribution[v])
if scalar: if scalar:
@ -91,28 +96,51 @@ class ProbabilityDistribution(object):
return ret return ret
def cdf(self, value): def build_cdf_qtl(self):
ret = 0 ret = 0.0
for k in self.bins: self.cdf = {}
if k < value: self.qtl = {}
ret += self.density(k) for k in sorted(self.bins):
else: ret += self.density(k)
return ret if k not in self.cdf:
self.cdf[k] = ret
return ret if str(ret) not in self.qtl:
self.qtl[str(ret)] = []
self.qtl[str(ret)].append(k)
_keys = [float(k) for k in sorted(self.qtl.keys())]
self.quantile_index = SortedCollection.SortedCollection(iterable=_keys)
def cummulative(self, values): def cummulative(self, values):
if self.cdf is None:
self.build_cdf_qtl()
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for k in values: for val in values:
ret.append(self.cdf(k)) k = self.bin_index.find_ge(val)
ret.append(self.cdf[k])
else: else:
return self.cdf(values) k = self.bin_index.find_ge(values)
return self.cdf[values]
def quantile(self, values):
if self.qtl is None:
self.build_cdf_qtl()
def quantile(self, qt): if isinstance(values, list):
pass ret = []
for val in values:
k = self.quantile_index.find_ge(val)
ret.append(self.qtl[str(k)][0])
else:
k = self.quantile_index.find_ge(values)
ret = self.qtl[str(k)[0]]
return ret
def entropy(self): def entropy(self):
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
@ -157,6 +185,7 @@ class ProbabilityDistribution(object):
return _s / len(data) return _s / len(data)
def plot(self,axis=None,color="black",tam=[10, 6], title = None): def plot(self,axis=None,color="black",tam=[10, 6], title = None):
if axis is None: if axis is None:
fig = plt.figure(figsize=tam) fig = plt.figure(figsize=tam)
axis = fig.add_subplot(111) axis = fig.add_subplot(111)
@ -177,12 +206,12 @@ class ProbabilityDistribution(object):
axis.set_ylabel('Probability') axis.set_ylabel('Probability')
def __str__(self): def __str__(self):
head = '|' ret = ""
body = '|'
for k in sorted(self.distribution.keys()): for k in sorted(self.distribution.keys()):
head += str(round(k,2)) + '\t|' ret += str(round(k,2)) + ':\t'
if self.type == "histogram": if self.type == "histogram":
body += str(round(self.distribution[k] / self.count,3)) + '\t|' ret += str(round(self.distribution[k] / self.count,3))
else: else:
body += str(round(self.distribution[k], 3)) + '\t|' ret += str(round(self.distribution[k], 6))
return head + '\n' + body ret += '\n'
return ret

View File

@ -54,8 +54,8 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def lhs_membership(self,x): def lhs_membership(self,x):
mv = [] mv = []
for set in self.LHS: for count, set in enumerate(self.LHS):
mv.append(set.membership(x)) mv.append(set.membership(x[count]))
min_mv = np.prod(mv) min_mv = np.prod(mv)
return min_mv return min_mv
@ -92,6 +92,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.has_probability_forecasting = True self.has_probability_forecasting = True
self.is_high_order = True self.is_high_order = True
self.auto_update = kwargs.get('update',False) self.auto_update = kwargs.get('update',False)
self.interval_method = kwargs.get('interval_method','extremum')
self.alpha = kwargs.get('alpha', 0.05)
def train(self, data, sets, order=1,parameters='Fuzzy'): def train(self, data, sets, order=1,parameters='Fuzzy'):
@ -374,6 +376,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def forecastInterval(self, data, **kwargs): def forecastInterval(self, data, **kwargs):
if 'method' in kwargs:
self.interval_method = kwargs.get('method','quantile')
if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha', 0.05)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
@ -382,105 +390,112 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
for k in np.arange(self.order - 1, l): for k in np.arange(self.order - 1, l):
# print(k) if self.interval_method == 'extremum':
self.interval_extremum(k, ndata, ret)
affected_flrgs = []
affected_flrgs_memberships = []
norms = []
up = []
lo = []
# Find the sets which membership > 0 for each lag
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyInstance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower:
idx = [0]
elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, instance))
lags[count] = idx
count += 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTree(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.appendLHS(self.sets[kk])
assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS])
##
affected_flrgs.append(flrg)
# Find the general membership of FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else: else:
self.interval_quantile(k, ndata, ret)
mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values
tmp = np.argwhere(mv) # get the indices of values > 0
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower:
idx = [0]
elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, ndata[k]))
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.appendLHS(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
for count, flrg in enumerate(affected_flrgs):
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.get_flrg_global_probability(flrg) * affected_flrgs_memberships[count]
if norm == 0:
norm = self.get_flrg_global_probability(flrg) # * 0.001
up.append(norm * self.getUpper(flrg))
lo.append(norm * self.getLower(flrg))
norms.append(norm)
# gerar o intervalo
norm = sum(norms)
if norm == 0:
ret.append([0, 0])
else:
lo_ = sum(lo) / norm
up_ = sum(up) / norm
ret.append([lo_, up_])
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True)
return ret return ret
def interval_quantile(self, k, ndata, ret):
dist = self.forecastDistribution(ndata)
lo_qt = dist[0].quantile(self.alpha)
up_qt = dist[0].quantile(1.0 - self.alpha)
ret.append([lo_qt, up_qt])
def interval_extremum(self, k, ndata, ret):
affected_flrgs = []
affected_flrgs_memberships = []
norms = []
up = []
lo = []
# Find the sets which membership > 0 for each lag
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyInstance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower:
idx = [0]
elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, instance))
lags[count] = idx
count += 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTree(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.appendLHS(self.sets[kk])
assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS])
##
affected_flrgs.append(flrg)
# Find the general membership of FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values
tmp = np.argwhere(mv) # get the indices of values > 0
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower:
idx = [0]
elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, ndata[k]))
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.appendLHS(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
for count, flrg in enumerate(affected_flrgs):
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.get_flrg_global_probability(flrg) * affected_flrgs_memberships[count]
if norm == 0:
norm = self.get_flrg_global_probability(flrg) # * 0.001
up.append(norm * self.getUpper(flrg))
lo.append(norm * self.getLower(flrg))
norms.append(norm)
# gerar o intervalo
norm = sum(norms)
if norm == 0:
ret.append([0, 0])
else:
lo_ = sum(lo) / norm
up_ = sum(up) / norm
ret.append([lo_, up_])
def forecastDistribution(self, data, **kwargs): def forecastDistribution(self, data, **kwargs):
smooth = kwargs.get("smooth", "histogram") smooth = kwargs.get("smooth", "none")
nbins = kwargs.get("num_bins", 100) nbins = kwargs.get("num_bins", 100)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
@ -513,8 +528,6 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
return ret return ret
def forecastAhead(self, data, steps, **kwargs): def forecastAhead(self, data, steps, **kwargs):
ret = [data[k] for k in np.arange(len(data) - self.order, len(data))] ret = [data[k] for k in np.arange(len(data) - self.order, len(data))]

View File

@ -38,19 +38,24 @@ from pyFTS.benchmarks import benchmarks as bchmk
#uod = [10162, 21271] #uod = [10162, 21271]
enrollments_fs1 = Grid.GridPartitioner(enrollments, 6) enrollments_fs1 = Grid.GridPartitioner(enrollments, 6)
for s in enrollments_fs1.sets: #for s in enrollments_fs1.sets:
print(s) #.partition_function(uod, 100)) # print(s) #.partition_function(uod, 100))
pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1) pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1)
pfts1_enrollments.train(enrollments, None, 1) pfts1_enrollments.train(enrollments, None, 1)
pfts1_enrollments.shortname = "1st Order" pfts1_enrollments.shortname = "1st Order"
print(pfts1_enrollments) #print(pfts1_enrollments)
tmp = pfts1_enrollments.forecastDistribution([15000])
#print(tmp[0])
print(tmp[0].quantile([0.05, 0.95]))
#pfts1_enrollments.AprioriPDF #pfts1_enrollments.AprioriPDF
norm = pfts1_enrollments.global_frequency_count #norm = pfts1_enrollments.global_frequency_count
uod = pfts1_enrollments.get_UoD() #uod = pfts1_enrollments.get_UoD()
print(uod)
#for k in sorted(pfts1_enrollments.flrgs.keys()) #for k in sorted(pfts1_enrollments.flrgs.keys())
# flrg = pfts1_enrollments.flrgs[k] # flrg = pfts1_enrollments.flrgs[k]
# tmp = flrg.get_LHSprobability(15000, norm, uod, 100) # tmp = flrg.get_LHSprobability(15000, norm, uod, 100)
@ -62,23 +67,7 @@ print(uod)
#print(flrg.get_LHSprobability(15000, norm, uod, 100)) #print(flrg.get_LHSprobability(15000, norm, uod, 100))
# print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)])) # print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)]))
print("P(T+1 | T")
sets = pfts1_enrollments.setsDict
t = 15000
pf = 0.0
for t1 in np.linspace(uod[0], uod[1], 100):
num = []
den = []
for s in sorted(pfts1_enrollments.flrgs.keys()):
flrg = pfts1_enrollments.flrgs[s]
pk = flrg.get_LHSprobability(t, norm, uod, 100)
wi = flrg.get_RHS_conditional_probability(t1, sets, uod, 100)
num.append(wi * pk)
den.append(pk)
pt1 = sum(num)/sum(den)
pf += pt1
print(str(round(t1,0)) + ": " + str(round(pt1, 3))) #/sum(den))
print(pf)
''' '''
pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2") pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2")