From 1ec03d34de299eab6bfeaf48d3bc0efe976677e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Wed, 20 Sep 2017 14:31:45 -0300 Subject: [PATCH] PWFTS new one-step-ahead probabilistic and quantile-interval forecasts --- pyFTS/benchmarks/benchmarks.py | 46 ++-- .../probabilistic/ProbabilityDistribution.py | 81 ++++--- pyFTS/pwfts.py | 203 ++++++++++-------- pyFTS/tests/pwfts.py | 35 ++- 4 files changed, 203 insertions(+), 162 deletions(-) diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 41441fa..6a35fcb 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -497,6 +497,20 @@ def print_interval_statistics(original, models): print(ret) +def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1): + lower = [kk[0] for kk in intervals] + upper = [kk[1] for kk in intervals] + mi = min(lower) * 0.95 + ma = max(upper) * 1.05 + for k in np.arange(0, order): + lower.insert(0, None) + upper.insert(0, None) + if typeonlegend: label += " (Interval)" + axis.plot(lower, color=color, label=label, ls=ls,linewidth=linewidth) + axis.plot(upper, color=color, ls=ls,linewidth=linewidth) + return [mi, ma] + + def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], points=True, intervals=True, linewidth=1.5): """ @@ -526,34 +540,28 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals for count, fts in enumerate(models, start=0): if fts.has_point_forecasting and points: - forecasted = fts.forecast(original) - if isinstance(forecasted, np.ndarray): - forecasted = forecasted.tolist() - mi.append(min(forecasted) * 0.95) - ma.append(max(forecasted) * 1.05) + forecasts = fts.forecast(original) + if isinstance(forecasts, np.ndarray): + forecasts = forecasts.tolist() + mi.append(min(forecasts) * 0.95) + ma.append(max(forecasts) * 1.05) for k in np.arange(0, fts.order): - forecasted.insert(0, None) + forecasts.insert(0, None) lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "") if typeonlegend: lbl += " (Point)" - ax.plot(forecasted, color=colors[count], label=lbl, ls="-",linewidth=linewidth) + ax.plot(forecasts, color=colors[count], label=lbl, ls="-",linewidth=linewidth) if fts.has_interval_forecasting and intervals: - forecasted = fts.forecastInterval(original) - lower = [kk[0] for kk in forecasted] - upper = [kk[1] for kk in forecasted] - mi.append(min(lower) * 0.95) - ma.append(max(upper) * 1.05) - for k in np.arange(0, fts.order): - lower.insert(0, None) - upper.insert(0, None) + forecasts = fts.forecastInterval(original) lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "") - if typeonlegend: lbl += " (Interval)" if not points and intervals: ls = "-" else: ls = "--" - ax.plot(lower, color=colors[count], label=lbl, ls=ls,linewidth=linewidth) - ax.plot(upper, color=colors[count], ls=ls,linewidth=linewidth) + tmpmi, tmpma = plot_interval(ax, forecasts, fts.order, label=lbl, typeonlegend=typeonlegend, + color=colors[count], ls=ls, linewidth=linewidth) + mi.append(tmpmi) + ma.append(tmpma) handles0, labels0 = ax.get_legend_handles_labels() lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) @@ -825,8 +833,10 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to cb = fig.colorbar(pc, ax=ax) cb.set_label('Density') + from pyFTS.common import Transformations + def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from): from matplotlib.patches import Rectangle from matplotlib.collections import PatchCollection diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index c826b2a..f1eb149 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -26,11 +26,16 @@ class ProbabilityDistribution(object): if self.bins is None: self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist() - self.resolution = (self.uod[1] - self.uod[0])/self.nbins self.labels = [str(k) for k in self.bins] - self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) + if self.uod is not None: + self.resolution = (self.uod[1] - self.uod[0]) / self.nbins + + self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) + self.quantile_index = None self.distribution = {} + self.cdf = None + self.qtl = None self.count = 0 for k in self.bins: self.distribution[k] = 0 @@ -44,13 +49,13 @@ class ProbabilityDistribution(object): self.name = kwargs.get("name", "") def set(self, value, density): - k = self.index.find_ge(np.round(value,3)) + k = self.bin_index.find_ge(value) self.distribution[k] = density def append(self, values): if self.type == "histogram": for k in values: - v = self.index.find_ge(k) + v = self.bin_index.find_ge(k) self.distribution[v] += 1 self.count += 1 else: @@ -63,7 +68,7 @@ class ProbabilityDistribution(object): def appendInterval(self, intervals): if self.type == "histogram": for interval in intervals: - for k in self.index.inside(interval[0], interval[1]): + for k in self.bin_index.inside(interval[0], interval[1]): self.distribution[k] += 1 self.count += 1 @@ -77,13 +82,13 @@ class ProbabilityDistribution(object): for k in values: if self.type == "histogram": - v = self.index.find_ge(k) + v = self.bin_index.find_ge(k) ret.append(self.distribution[v] / self.count) elif self.type == "KDE": v = self.kde.probability(k, self.data) ret.append(v) else: - v = self.index.find_ge(k) + v = self.bin_index.find_ge(k) ret.append(self.distribution[v]) if scalar: @@ -91,28 +96,51 @@ class ProbabilityDistribution(object): return ret - def cdf(self, value): - ret = 0 - for k in self.bins: - if k < value: - ret += self.density(k) - else: - return ret + def build_cdf_qtl(self): + ret = 0.0 + self.cdf = {} + self.qtl = {} + for k in sorted(self.bins): + ret += self.density(k) + if k not in self.cdf: + self.cdf[k] = ret - return ret + if str(ret) not in self.qtl: + self.qtl[str(ret)] = [] + self.qtl[str(ret)].append(k) + + _keys = [float(k) for k in sorted(self.qtl.keys())] + + self.quantile_index = SortedCollection.SortedCollection(iterable=_keys) def cummulative(self, values): + if self.cdf is None: + self.build_cdf_qtl() + if isinstance(values, list): ret = [] - for k in values: - ret.append(self.cdf(k)) + for val in values: + k = self.bin_index.find_ge(val) + ret.append(self.cdf[k]) else: - return self.cdf(values) + k = self.bin_index.find_ge(values) + return self.cdf[values] + def quantile(self, values): + if self.qtl is None: + self.build_cdf_qtl() - def quantile(self, qt): - pass + if isinstance(values, list): + ret = [] + for val in values: + k = self.quantile_index.find_ge(val) + ret.append(self.qtl[str(k)][0]) + else: + k = self.quantile_index.find_ge(values) + ret = self.qtl[str(k)[0]] + + return ret def entropy(self): h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 @@ -157,6 +185,7 @@ class ProbabilityDistribution(object): return _s / len(data) def plot(self,axis=None,color="black",tam=[10, 6], title = None): + if axis is None: fig = plt.figure(figsize=tam) axis = fig.add_subplot(111) @@ -177,12 +206,12 @@ class ProbabilityDistribution(object): axis.set_ylabel('Probability') def __str__(self): - head = '|' - body = '|' + ret = "" for k in sorted(self.distribution.keys()): - head += str(round(k,2)) + '\t|' + ret += str(round(k,2)) + ':\t' if self.type == "histogram": - body += str(round(self.distribution[k] / self.count,3)) + '\t|' + ret += str(round(self.distribution[k] / self.count,3)) else: - body += str(round(self.distribution[k], 3)) + '\t|' - return head + '\n' + body + ret += str(round(self.distribution[k], 6)) + ret += '\n' + return ret diff --git a/pyFTS/pwfts.py b/pyFTS/pwfts.py index 068f740..5327017 100644 --- a/pyFTS/pwfts.py +++ b/pyFTS/pwfts.py @@ -54,8 +54,8 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): def lhs_membership(self,x): mv = [] - for set in self.LHS: - mv.append(set.membership(x)) + for count, set in enumerate(self.LHS): + mv.append(set.membership(x[count])) min_mv = np.prod(mv) return min_mv @@ -92,6 +92,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.has_probability_forecasting = True self.is_high_order = True self.auto_update = kwargs.get('update',False) + self.interval_method = kwargs.get('interval_method','extremum') + self.alpha = kwargs.get('alpha', 0.05) def train(self, data, sets, order=1,parameters='Fuzzy'): @@ -374,6 +376,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def forecastInterval(self, data, **kwargs): + if 'method' in kwargs: + self.interval_method = kwargs.get('method','quantile') + + if 'alpha' in kwargs: + self.alpha = kwargs.get('alpha', 0.05) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -382,105 +390,112 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): for k in np.arange(self.order - 1, l): - # print(k) - - affected_flrgs = [] - affected_flrgs_memberships = [] - norms = [] - - up = [] - lo = [] - - # Find the sets which membership > 0 for each lag - count = 0 - lags = {} - if self.order > 1: - subset = ndata[k - (self.order - 1): k + 1] - - for instance in subset: - mb = FuzzySet.fuzzyInstance(instance, self.sets) - tmp = np.argwhere(mb) - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower: - idx = [0] - elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" % - (self.sets[0].lower, self.sets[-1].upper, instance)) - - lags[count] = idx - count += 1 - - # Build the tree with all possible paths - - root = tree.FLRGTreeNode(None) - - self.buildTree(root, lags, 0) - - # Trace the possible paths and build the PFLRG's - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) - flrg = hofts.HighOrderFLRG(self.order) - for kk in path: flrg.appendLHS(self.sets[kk]) - - assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS]) - - ## - affected_flrgs.append(flrg) - - # Find the general membership of FLRG - affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS))) - + if self.interval_method == 'extremum': + self.interval_extremum(k, ndata, ret) else: - - mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values - tmp = np.argwhere(mv) # get the indices of values > 0 - idx = np.ravel(tmp) # flatten the array - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower: - idx = [0] - elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" % - (self.sets[0].lower, self.sets[-1].upper, ndata[k])) - - for kk in idx: - flrg = hofts.HighOrderFLRG(self.order) - flrg.appendLHS(self.sets[kk]) - affected_flrgs.append(flrg) - affected_flrgs_memberships.append(mv[kk]) - - for count, flrg in enumerate(affected_flrgs): - # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinĂȘncia - norm = self.get_flrg_global_probability(flrg) * affected_flrgs_memberships[count] - if norm == 0: - norm = self.get_flrg_global_probability(flrg) # * 0.001 - up.append(norm * self.getUpper(flrg)) - lo.append(norm * self.getLower(flrg)) - norms.append(norm) - - # gerar o intervalo - norm = sum(norms) - if norm == 0: - ret.append([0, 0]) - else: - lo_ = sum(lo) / norm - up_ = sum(up) / norm - ret.append([lo_, up_]) + self.interval_quantile(k, ndata, ret) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]], interval=True) return ret + def interval_quantile(self, k, ndata, ret): + dist = self.forecastDistribution(ndata) + lo_qt = dist[0].quantile(self.alpha) + up_qt = dist[0].quantile(1.0 - self.alpha) + ret.append([lo_qt, up_qt]) + + def interval_extremum(self, k, ndata, ret): + affected_flrgs = [] + affected_flrgs_memberships = [] + norms = [] + up = [] + lo = [] + # Find the sets which membership > 0 for each lag + count = 0 + lags = {} + if self.order > 1: + subset = ndata[k - (self.order - 1): k + 1] + + for instance in subset: + mb = FuzzySet.fuzzyInstance(instance, self.sets) + tmp = np.argwhere(mb) + idx = np.ravel(tmp) # flatten the array + + if idx.size == 0: # the element is out of the bounds of the Universe of Discourse + if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower: + idx = [0] + elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper: + idx = [len(self.sets) - 1] + else: + raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" % + (self.sets[0].lower, self.sets[-1].upper, instance)) + + lags[count] = idx + count += 1 + + # Build the tree with all possible paths + + root = tree.FLRGTreeNode(None) + + self.buildTree(root, lags, 0) + + # Trace the possible paths and build the PFLRG's + + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) + flrg = hofts.HighOrderFLRG(self.order) + for kk in path: flrg.appendLHS(self.sets[kk]) + + assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS]) + + ## + affected_flrgs.append(flrg) + + # Find the general membership of FLRG + affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS))) + + else: + + mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values + tmp = np.argwhere(mv) # get the indices of values > 0 + idx = np.ravel(tmp) # flatten the array + + if idx.size == 0: # the element is out of the bounds of the Universe of Discourse + if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower: + idx = [0] + elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper: + idx = [len(self.sets) - 1] + else: + raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" % + (self.sets[0].lower, self.sets[-1].upper, ndata[k])) + + for kk in idx: + flrg = hofts.HighOrderFLRG(self.order) + flrg.appendLHS(self.sets[kk]) + affected_flrgs.append(flrg) + affected_flrgs_memberships.append(mv[kk]) + for count, flrg in enumerate(affected_flrgs): + # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinĂȘncia + norm = self.get_flrg_global_probability(flrg) * affected_flrgs_memberships[count] + if norm == 0: + norm = self.get_flrg_global_probability(flrg) # * 0.001 + up.append(norm * self.getUpper(flrg)) + lo.append(norm * self.getLower(flrg)) + norms.append(norm) + + # gerar o intervalo + norm = sum(norms) + if norm == 0: + ret.append([0, 0]) + else: + lo_ = sum(lo) / norm + up_ = sum(up) / norm + ret.append([lo_, up_]) + def forecastDistribution(self, data, **kwargs): - smooth = kwargs.get("smooth", "histogram") + smooth = kwargs.get("smooth", "none") nbins = kwargs.get("num_bins", 100) ndata = np.array(self.doTransformations(data)) @@ -513,8 +528,6 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): return ret - - def forecastAhead(self, data, steps, **kwargs): ret = [data[k] for k in np.arange(len(data) - self.order, len(data))] diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 749e539..437847a 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -38,19 +38,24 @@ from pyFTS.benchmarks import benchmarks as bchmk #uod = [10162, 21271] enrollments_fs1 = Grid.GridPartitioner(enrollments, 6) -for s in enrollments_fs1.sets: - print(s) #.partition_function(uod, 100)) +#for s in enrollments_fs1.sets: +# print(s) #.partition_function(uod, 100)) pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1) pfts1_enrollments.train(enrollments, None, 1) pfts1_enrollments.shortname = "1st Order" -print(pfts1_enrollments) +#print(pfts1_enrollments) + +tmp = pfts1_enrollments.forecastDistribution([15000]) +#print(tmp[0]) + +print(tmp[0].quantile([0.05, 0.95])) #pfts1_enrollments.AprioriPDF -norm = pfts1_enrollments.global_frequency_count -uod = pfts1_enrollments.get_UoD() -print(uod) +#norm = pfts1_enrollments.global_frequency_count +#uod = pfts1_enrollments.get_UoD() + #for k in sorted(pfts1_enrollments.flrgs.keys()) # flrg = pfts1_enrollments.flrgs[k] # tmp = flrg.get_LHSprobability(15000, norm, uod, 100) @@ -62,23 +67,7 @@ print(uod) #print(flrg.get_LHSprobability(15000, norm, uod, 100)) # print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)])) -print("P(T+1 | T") -sets = pfts1_enrollments.setsDict -t = 15000 -pf = 0.0 -for t1 in np.linspace(uod[0], uod[1], 100): - num = [] - den = [] - for s in sorted(pfts1_enrollments.flrgs.keys()): - flrg = pfts1_enrollments.flrgs[s] - pk = flrg.get_LHSprobability(t, norm, uod, 100) - wi = flrg.get_RHS_conditional_probability(t1, sets, uod, 100) - num.append(wi * pk) - den.append(pk) - pt1 = sum(num)/sum(den) - pf += pt1 - print(str(round(t1,0)) + ": " + str(round(pt1, 3))) #/sum(den)) -print(pf) + ''' pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2")