From 9fb79ea0806c81babb5e5f8dd711661b5452c5bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Fri, 22 Sep 2017 21:08:26 -0300 Subject: [PATCH] PWFTS new m-step-ahead probabilistic forecasting method --- pyFTS/ensemble/multiseasonal.py | 7 +- .../probabilistic/ProbabilityDistribution.py | 2 +- pyFTS/probabilistic/kde.py | 27 ++- pyFTS/pwfts.py | 163 ++++++------------ pyFTS/tests/pwfts.py | 21 ++- 5 files changed, 83 insertions(+), 137 deletions(-) diff --git a/pyFTS/ensemble/multiseasonal.py b/pyFTS/ensemble/multiseasonal.py index ea63589..57b0dd2 100644 --- a/pyFTS/ensemble/multiseasonal.py +++ b/pyFTS/ensemble/multiseasonal.py @@ -77,6 +77,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): smooth = kwargs.get("smooth", "KDE") alpha = kwargs.get("alpha", None) + uod = self.get_UoD() + for k in data.index: tmp = self.get_models_forecasts(data.ix[k]) @@ -88,9 +90,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS): name = str(self.indexer.get_index(data.ix[k])) - dist = ProbabilityDistribution.ProbabilityDistribution(smooth, - uod=[self.original_min, self.original_max], - data=tmp, name=name, **kwargs) + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=tmp, + name=name, **kwargs) ret.append(dist) diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index f1eb149..d66cda8 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -17,7 +17,7 @@ class ProbabilityDistribution(object): self.type = type if self.type == "KDE": - self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov")) + self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov")) self.nbins = kwargs.get("num_bins", 100) diff --git a/pyFTS/probabilistic/kde.py b/pyFTS/probabilistic/kde.py index 1eadb09..460b44a 100644 --- a/pyFTS/probabilistic/kde.py +++ b/pyFTS/probabilistic/kde.py @@ -8,39 +8,38 @@ Kernel Density Estimation class KernelSmoothing(object): """Kernel Density Estimation""" - def __init__(self,h, method="epanechnikov"): + def __init__(self,h, kernel="epanechnikov"): self.h = h - self.method = method + self.kernel = kernel self.transf = Transformations.Scale(min=0,max=1) - def kernel(self, u): - if self.method == "epanechnikov": + def kernel_function(self, u): + if self.kernel == "epanechnikov": tmp = (3/4)*(1.0 - u**2) return tmp if tmp > 0 else 0 - elif self.method == "gaussian": + elif self.kernel == "gaussian": return (1.0/np.sqrt(2*np.pi))*np.exp(-0.5*u**2) - elif self.method == "uniform": + elif self.kernel == "uniform": return 0.5 - elif self.method == "triangular": + elif self.kernel == "triangular": tmp = 1.0 - np.abs(u) return tmp if tmp > 0 else 0 - elif self.method == "logistic": + elif self.kernel == "logistic": return 1.0/(np.exp(u)+2+np.exp(-u)) - elif self.method == "cosine": + elif self.kernel == "cosine": return (np.pi/4.0)*np.cos((np.pi/2.0)*u) - elif self.method == "sigmoid": + elif self.kernel == "sigmoid": return (2.0/np.pi)*(1.0/(np.exp(u)+np.exp(-u))) - elif self.method == "tophat": + elif self.kernel == "tophat": return 1 if np.abs(u) < 0.5 else 0 - elif self.method == "exponential": + elif self.kernel == "exponential": return 0.5 * np.exp(-np.abs(u)) - def probability(self, x, data): l = len(data) ndata = self.transf.apply(data) nx = self.transf.apply(x) - p = sum([self.kernel((nx - k)/self.h) for k in ndata]) / l*self.h + p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h return p \ No newline at end of file diff --git a/pyFTS/pwfts.py b/pyFTS/pwfts.py index 5327017..6f65f89 100644 --- a/pyFTS/pwfts.py +++ b/pyFTS/pwfts.py @@ -566,23 +566,27 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = [] - resolution = kwargs.get('resolution',100) + method = kwargs.get('method', 2) + smooth = "KDE" if method != 4 else "none" + nbins = kwargs.get("num_bins", 100) - method = kwargs.get('method',2) + uod = self.get_UoD() + _bins = np.linspace(uod[0], uod[1], nbins).tolist() - intervals = self.forecastAheadInterval(data, steps) + if method != 4: + intervals = self.forecastAheadInterval(data, steps) + else: + l = len(data) + for k in np.arange(l - self.order, l): + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) + dist.set(data[k], 1.0) + ret.append(dist) - grid = self.getGridClean(resolution) + for k in np.arange(self.order, steps + self.order): - index = SortedCollection.SortedCollection(iterable=grid.keys()) + data = [] - if method == 1: - - grids = [] - for k in np.arange(0, steps): - grids.append(self.getGridClean(resolution)) - - for k in np.arange(self.order, steps + self.order): + if method == 1: lags = {} @@ -612,139 +616,74 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.buildTreeWithoutOrder(root, lags, 0) # Trace the possible paths - for p in root.paths(): path = list(reversed(list(filter(None.__ne__, p)))) - qtle = self.forecastInterval(path) + qtle = np.ravel(self.forecastInterval(path)) - grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle)) + data.extend(np.linspace(qtle[0],qtle[1],100).tolist()) - for k in np.arange(0, steps): - tmp = np.array([grids[k][q] for q in sorted(grids[k])]) - ret.append(tmp / sum(tmp)) - - elif method == 2: - - ret = [] - - for k in np.arange(self.order, steps + self.order): - - grid = self.getGridClean(resolution) - grid = self.gridCount(grid, resolution, index, intervals[k]) + elif method == 2: for qt in np.arange(0, 50, 1): # print(qt) qtle_lower = self.forecastInterval( [intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower)) + qtle_lower = np.ravel(qtle_lower) + data.extend(np.linspace(qtle_lower[0], qtle_lower[1], 100).tolist()) qtle_upper = self.forecastInterval( [intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper)) + qtle_upper = np.ravel(qtle_upper) + data.extend(np.linspace(qtle_upper[0], qtle_upper[1], 100).tolist()) qtle_mid = self.forecastInterval( [intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)]) - grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid)) + qtle_mid = np.ravel(qtle_mid) + data.extend(np.linspace(qtle_mid[0], qtle_mid[1], 100).tolist()) - tmp = np.array([grid[k] for k in sorted(grid) if not np.isnan(grid[k])]) - try: - ret.append(tmp / sum(tmp)) - except Exception as ex: - ret.append(0) + elif method == 3: + i = intervals[k] - else: - ret = [] + data = np.linspace(i[0],i[1],100).tolist() - for k in np.arange(self.order, steps + self.order): - grid = self.getGridClean(resolution) - grid = self.gridCount(grid, resolution, index, intervals[k]) - - tmp = np.array([grid[k] for k in sorted(grid)]) - - ret.append(tmp / sum(tmp)) - - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df - - def density(self, x, num_bins=100): - affected_flrgs = [] - affected_flrgs_memberships = [] - mv = FuzzySet.fuzzyInstance(x, self.sets) - tmp = np.argwhere(mv) - idx = np.ravel(tmp) - - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if x <= self.sets[0].lower: - idx = [0] - elif x >= self.sets[-1].upper: - idx = [len(self.sets) - 1] else: - raise Exception(x) + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins, + uod=uod, **kwargs) + lags = {} - for kk in idx: - flrg = ProbabilisticWeightedFLRG(self.order) - flrg.appendLHS(self.sets[kk]) - affected_flrgs.append(flrg) - affected_flrgs_memberships.append(mv[kk]) + cc = 0 - total_prob = 0.0 + for dd in ret[k - self.order: k]: + vals = [float(v) for v in dd.bins if round(dd.density(v),4) > 0] + lags[cc] = sorted(vals) + cc += 1 - for count, flrg in enumerate(affected_flrgs): - _flrg = self.flrgs[flrg.strLHS()] - pk = _flrg.frequency_count / self.global_frequency_count - priori = pk * (affected_flrgs_memberships[count]) # / _flrg.partition_function(uod=self.get_UoD(), nbins=num_bins)) - #print(flrg.strLHS() + ": PK=" + str(pk) + " Priori=" + str(priori)) - #posteriori = self.get_conditional_probability(k, flrg) * priori - total_prob += priori + root = tree.FLRGTreeNode(None) - return total_prob + self.buildTreeWithoutOrder(root, lags, 0) - def AprioriPDF(self, **kwargs): - nbins = kwargs.get('num_bins', 100) - pdf = ProbabilityDistribution.ProbabilityDistribution(uod=[self.original_min, self.original_max], num_bins=nbins) - t = 0.0 + # Trace the possible paths + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) - for k in pdf.bins: - #print("BIN: " + str(k) ) - affected_flrgs = [] - affected_flrgs_memberships = [] + pk = np.prod([ret[k - self.order + o].density(path[o]) + for o in np.arange(0,self.order)]) - mv = FuzzySet.fuzzyInstance(k, self.sets) - tmp = np.argwhere(mv) - idx = np.ravel(tmp) + d = self.forecastDistribution(path)[0] - if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if k <= self.sets[0].lower: - idx = [0] - elif k >= self.sets[-1].upper: - idx = [len(self.sets) - 1] - else: - raise Exception(k) + for bin in _bins: + dist.set(bin, dist.density(bin) + pk * d.density(bin)) - for kk in idx: - flrg = ProbabilisticWeightedFLRG(self.order) - flrg.appendLHS(self.sets[kk]) - affected_flrgs.append(flrg) - affected_flrgs_memberships.append(mv[kk]) + if method != 4: + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins, data=data, + uod=uod, **kwargs) - total_prob = 0.0 + ret.append(dist) - for count, flrg in enumerate(affected_flrgs): - _flrg = self.flrgs[flrg.strLHS()] - pk = _flrg.frequency_count / self.global_frequency_count - priori = pk * (affected_flrgs_memberships[count] / _flrg.partition_function(uod=self.get_UoD())) - #print(flrg.strLHS() + ": PK=" + str(pk) + " Priori=" + str(priori)) - posteriori = self.get_conditional_probability(k, flrg) * priori - total_prob += posteriori + return ret - t += total_prob - pdf.set(k, total_prob) - print(t) - - return pdf def __str__(self): tmp = self.name + ":\n" diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 437847a..e2c9eaf 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -22,8 +22,15 @@ from numpy import random os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") +''' enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") enrollments = np.array(enrollments["Enrollments"]) +''' + +taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") +data = np.array(taiexpd["avg"][:5000]) +del(taiexpd) + import importlib import pandas as pd @@ -37,20 +44,20 @@ from pyFTS.benchmarks import benchmarks as bchmk #uod = [10162, 21271] -enrollments_fs1 = Grid.GridPartitioner(enrollments, 6) +fs1 = Grid.GridPartitioner(data[:3000], 30) #for s in enrollments_fs1.sets: # print(s) #.partition_function(uod, 100)) -pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1) -pfts1_enrollments.train(enrollments, None, 1) -pfts1_enrollments.shortname = "1st Order" +pfts1 = pwfts.ProbabilisticWeightedFTS("1", partitioner=fs1) +pfts1.train(data, None, 1) +pfts1.shortname = "1st Order" #print(pfts1_enrollments) -tmp = pfts1_enrollments.forecastDistribution([15000]) -#print(tmp[0]) +tmp = pfts1.forecastAheadDistribution(data[3000:3020],20, method=3, h=0.45, kernel="gaussian") +print(tmp[0]) -print(tmp[0].quantile([0.05, 0.95])) +#print(tmp[0].quantile([0.05, 0.95])) #pfts1_enrollments.AprioriPDF #norm = pfts1_enrollments.global_frequency_count