PWFTS new m-step-ahead probabilistic forecasting method
This commit is contained in:
parent
1ec03d34de
commit
9fb79ea080
@ -77,6 +77,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
|||||||
smooth = kwargs.get("smooth", "KDE")
|
smooth = kwargs.get("smooth", "KDE")
|
||||||
alpha = kwargs.get("alpha", None)
|
alpha = kwargs.get("alpha", None)
|
||||||
|
|
||||||
|
uod = self.get_UoD()
|
||||||
|
|
||||||
for k in data.index:
|
for k in data.index:
|
||||||
|
|
||||||
tmp = self.get_models_forecasts(data.ix[k])
|
tmp = self.get_models_forecasts(data.ix[k])
|
||||||
@ -88,9 +90,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
|||||||
|
|
||||||
name = str(self.indexer.get_index(data.ix[k]))
|
name = str(self.indexer.get_index(data.ix[k]))
|
||||||
|
|
||||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth,
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=tmp,
|
||||||
uod=[self.original_min, self.original_max],
|
name=name, **kwargs)
|
||||||
data=tmp, name=name, **kwargs)
|
|
||||||
|
|
||||||
ret.append(dist)
|
ret.append(dist)
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
self.type = type
|
self.type = type
|
||||||
if self.type == "KDE":
|
if self.type == "KDE":
|
||||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 10), kwargs.get("method", "epanechnikov"))
|
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
||||||
|
|
||||||
self.nbins = kwargs.get("num_bins", 100)
|
self.nbins = kwargs.get("num_bins", 100)
|
||||||
|
|
||||||
|
@ -8,39 +8,38 @@ Kernel Density Estimation
|
|||||||
|
|
||||||
class KernelSmoothing(object):
|
class KernelSmoothing(object):
|
||||||
"""Kernel Density Estimation"""
|
"""Kernel Density Estimation"""
|
||||||
def __init__(self,h, method="epanechnikov"):
|
def __init__(self,h, kernel="epanechnikov"):
|
||||||
self.h = h
|
self.h = h
|
||||||
self.method = method
|
self.kernel = kernel
|
||||||
self.transf = Transformations.Scale(min=0,max=1)
|
self.transf = Transformations.Scale(min=0,max=1)
|
||||||
|
|
||||||
def kernel(self, u):
|
def kernel_function(self, u):
|
||||||
if self.method == "epanechnikov":
|
if self.kernel == "epanechnikov":
|
||||||
tmp = (3/4)*(1.0 - u**2)
|
tmp = (3/4)*(1.0 - u**2)
|
||||||
return tmp if tmp > 0 else 0
|
return tmp if tmp > 0 else 0
|
||||||
elif self.method == "gaussian":
|
elif self.kernel == "gaussian":
|
||||||
return (1.0/np.sqrt(2*np.pi))*np.exp(-0.5*u**2)
|
return (1.0/np.sqrt(2*np.pi))*np.exp(-0.5*u**2)
|
||||||
elif self.method == "uniform":
|
elif self.kernel == "uniform":
|
||||||
return 0.5
|
return 0.5
|
||||||
elif self.method == "triangular":
|
elif self.kernel == "triangular":
|
||||||
tmp = 1.0 - np.abs(u)
|
tmp = 1.0 - np.abs(u)
|
||||||
return tmp if tmp > 0 else 0
|
return tmp if tmp > 0 else 0
|
||||||
elif self.method == "logistic":
|
elif self.kernel == "logistic":
|
||||||
return 1.0/(np.exp(u)+2+np.exp(-u))
|
return 1.0/(np.exp(u)+2+np.exp(-u))
|
||||||
elif self.method == "cosine":
|
elif self.kernel == "cosine":
|
||||||
return (np.pi/4.0)*np.cos((np.pi/2.0)*u)
|
return (np.pi/4.0)*np.cos((np.pi/2.0)*u)
|
||||||
elif self.method == "sigmoid":
|
elif self.kernel == "sigmoid":
|
||||||
return (2.0/np.pi)*(1.0/(np.exp(u)+np.exp(-u)))
|
return (2.0/np.pi)*(1.0/(np.exp(u)+np.exp(-u)))
|
||||||
elif self.method == "tophat":
|
elif self.kernel == "tophat":
|
||||||
return 1 if np.abs(u) < 0.5 else 0
|
return 1 if np.abs(u) < 0.5 else 0
|
||||||
elif self.method == "exponential":
|
elif self.kernel == "exponential":
|
||||||
return 0.5 * np.exp(-np.abs(u))
|
return 0.5 * np.exp(-np.abs(u))
|
||||||
|
|
||||||
|
|
||||||
def probability(self, x, data):
|
def probability(self, x, data):
|
||||||
l = len(data)
|
l = len(data)
|
||||||
|
|
||||||
ndata = self.transf.apply(data)
|
ndata = self.transf.apply(data)
|
||||||
nx = self.transf.apply(x)
|
nx = self.transf.apply(x)
|
||||||
p = sum([self.kernel((nx - k)/self.h) for k in ndata]) / l*self.h
|
p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h
|
||||||
|
|
||||||
return p
|
return p
|
163
pyFTS/pwfts.py
163
pyFTS/pwfts.py
@ -566,24 +566,28 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
resolution = kwargs.get('resolution',100)
|
method = kwargs.get('method', 2)
|
||||||
|
smooth = "KDE" if method != 4 else "none"
|
||||||
|
nbins = kwargs.get("num_bins", 100)
|
||||||
|
|
||||||
method = kwargs.get('method',2)
|
uod = self.get_UoD()
|
||||||
|
_bins = np.linspace(uod[0], uod[1], nbins).tolist()
|
||||||
|
|
||||||
|
if method != 4:
|
||||||
intervals = self.forecastAheadInterval(data, steps)
|
intervals = self.forecastAheadInterval(data, steps)
|
||||||
|
else:
|
||||||
grid = self.getGridClean(resolution)
|
l = len(data)
|
||||||
|
for k in np.arange(l - self.order, l):
|
||||||
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||||
|
dist.set(data[k], 1.0)
|
||||||
if method == 1:
|
ret.append(dist)
|
||||||
|
|
||||||
grids = []
|
|
||||||
for k in np.arange(0, steps):
|
|
||||||
grids.append(self.getGridClean(resolution))
|
|
||||||
|
|
||||||
for k in np.arange(self.order, steps + self.order):
|
for k in np.arange(self.order, steps + self.order):
|
||||||
|
|
||||||
|
data = []
|
||||||
|
|
||||||
|
if method == 1:
|
||||||
|
|
||||||
lags = {}
|
lags = {}
|
||||||
|
|
||||||
cc = 0
|
cc = 0
|
||||||
@ -612,139 +616,74 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
self.buildTreeWithoutOrder(root, lags, 0)
|
self.buildTreeWithoutOrder(root, lags, 0)
|
||||||
|
|
||||||
# Trace the possible paths
|
# Trace the possible paths
|
||||||
|
|
||||||
for p in root.paths():
|
for p in root.paths():
|
||||||
path = list(reversed(list(filter(None.__ne__, p))))
|
path = list(reversed(list(filter(None.__ne__, p))))
|
||||||
|
|
||||||
qtle = self.forecastInterval(path)
|
qtle = np.ravel(self.forecastInterval(path))
|
||||||
|
|
||||||
grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle))
|
data.extend(np.linspace(qtle[0],qtle[1],100).tolist())
|
||||||
|
|
||||||
for k in np.arange(0, steps):
|
|
||||||
tmp = np.array([grids[k][q] for q in sorted(grids[k])])
|
|
||||||
ret.append(tmp / sum(tmp))
|
|
||||||
|
|
||||||
elif method == 2:
|
elif method == 2:
|
||||||
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
for k in np.arange(self.order, steps + self.order):
|
|
||||||
|
|
||||||
grid = self.getGridClean(resolution)
|
|
||||||
grid = self.gridCount(grid, resolution, index, intervals[k])
|
|
||||||
|
|
||||||
for qt in np.arange(0, 50, 1):
|
for qt in np.arange(0, 50, 1):
|
||||||
# print(qt)
|
# print(qt)
|
||||||
qtle_lower = self.forecastInterval(
|
qtle_lower = self.forecastInterval(
|
||||||
[intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
|
[intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
|
||||||
np.arange(k - self.order, k)])
|
np.arange(k - self.order, k)])
|
||||||
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower))
|
qtle_lower = np.ravel(qtle_lower)
|
||||||
|
data.extend(np.linspace(qtle_lower[0], qtle_lower[1], 100).tolist())
|
||||||
qtle_upper = self.forecastInterval(
|
qtle_upper = self.forecastInterval(
|
||||||
[intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
|
[intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
|
||||||
np.arange(k - self.order, k)])
|
np.arange(k - self.order, k)])
|
||||||
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper))
|
qtle_upper = np.ravel(qtle_upper)
|
||||||
|
data.extend(np.linspace(qtle_upper[0], qtle_upper[1], 100).tolist())
|
||||||
qtle_mid = self.forecastInterval(
|
qtle_mid = self.forecastInterval(
|
||||||
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
|
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
|
||||||
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid))
|
qtle_mid = np.ravel(qtle_mid)
|
||||||
|
data.extend(np.linspace(qtle_mid[0], qtle_mid[1], 100).tolist())
|
||||||
|
|
||||||
tmp = np.array([grid[k] for k in sorted(grid) if not np.isnan(grid[k])])
|
elif method == 3:
|
||||||
try:
|
i = intervals[k]
|
||||||
ret.append(tmp / sum(tmp))
|
|
||||||
except Exception as ex:
|
data = np.linspace(i[0],i[1],100).tolist()
|
||||||
ret.append(0)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
ret = []
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins,
|
||||||
|
uod=uod, **kwargs)
|
||||||
|
lags = {}
|
||||||
|
|
||||||
for k in np.arange(self.order, steps + self.order):
|
cc = 0
|
||||||
grid = self.getGridClean(resolution)
|
|
||||||
grid = self.gridCount(grid, resolution, index, intervals[k])
|
|
||||||
|
|
||||||
tmp = np.array([grid[k] for k in sorted(grid)])
|
for dd in ret[k - self.order: k]:
|
||||||
|
vals = [float(v) for v in dd.bins if round(dd.density(v),4) > 0]
|
||||||
|
lags[cc] = sorted(vals)
|
||||||
|
cc += 1
|
||||||
|
|
||||||
ret.append(tmp / sum(tmp))
|
root = tree.FLRGTreeNode(None)
|
||||||
|
|
||||||
grid = self.getGridClean(resolution)
|
self.buildTreeWithoutOrder(root, lags, 0)
|
||||||
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
||||||
return df
|
|
||||||
|
|
||||||
def density(self, x, num_bins=100):
|
# Trace the possible paths
|
||||||
affected_flrgs = []
|
for p in root.paths():
|
||||||
affected_flrgs_memberships = []
|
path = list(reversed(list(filter(None.__ne__, p))))
|
||||||
mv = FuzzySet.fuzzyInstance(x, self.sets)
|
|
||||||
tmp = np.argwhere(mv)
|
|
||||||
idx = np.ravel(tmp)
|
|
||||||
|
|
||||||
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
|
pk = np.prod([ret[k - self.order + o].density(path[o])
|
||||||
if x <= self.sets[0].lower:
|
for o in np.arange(0,self.order)])
|
||||||
idx = [0]
|
|
||||||
elif x >= self.sets[-1].upper:
|
|
||||||
idx = [len(self.sets) - 1]
|
|
||||||
else:
|
|
||||||
raise Exception(x)
|
|
||||||
|
|
||||||
for kk in idx:
|
d = self.forecastDistribution(path)[0]
|
||||||
flrg = ProbabilisticWeightedFLRG(self.order)
|
|
||||||
flrg.appendLHS(self.sets[kk])
|
|
||||||
affected_flrgs.append(flrg)
|
|
||||||
affected_flrgs_memberships.append(mv[kk])
|
|
||||||
|
|
||||||
total_prob = 0.0
|
for bin in _bins:
|
||||||
|
dist.set(bin, dist.density(bin) + pk * d.density(bin))
|
||||||
|
|
||||||
for count, flrg in enumerate(affected_flrgs):
|
if method != 4:
|
||||||
_flrg = self.flrgs[flrg.strLHS()]
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, bins=_bins, data=data,
|
||||||
pk = _flrg.frequency_count / self.global_frequency_count
|
uod=uod, **kwargs)
|
||||||
priori = pk * (affected_flrgs_memberships[count]) # / _flrg.partition_function(uod=self.get_UoD(), nbins=num_bins))
|
|
||||||
#print(flrg.strLHS() + ": PK=" + str(pk) + " Priori=" + str(priori))
|
|
||||||
#posteriori = self.get_conditional_probability(k, flrg) * priori
|
|
||||||
total_prob += priori
|
|
||||||
|
|
||||||
return total_prob
|
ret.append(dist)
|
||||||
|
|
||||||
def AprioriPDF(self, **kwargs):
|
return ret
|
||||||
nbins = kwargs.get('num_bins', 100)
|
|
||||||
pdf = ProbabilityDistribution.ProbabilityDistribution(uod=[self.original_min, self.original_max], num_bins=nbins)
|
|
||||||
t = 0.0
|
|
||||||
|
|
||||||
for k in pdf.bins:
|
|
||||||
#print("BIN: " + str(k) )
|
|
||||||
affected_flrgs = []
|
|
||||||
affected_flrgs_memberships = []
|
|
||||||
|
|
||||||
mv = FuzzySet.fuzzyInstance(k, self.sets)
|
|
||||||
tmp = np.argwhere(mv)
|
|
||||||
idx = np.ravel(tmp)
|
|
||||||
|
|
||||||
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
|
|
||||||
if k <= self.sets[0].lower:
|
|
||||||
idx = [0]
|
|
||||||
elif k >= self.sets[-1].upper:
|
|
||||||
idx = [len(self.sets) - 1]
|
|
||||||
else:
|
|
||||||
raise Exception(k)
|
|
||||||
|
|
||||||
for kk in idx:
|
|
||||||
flrg = ProbabilisticWeightedFLRG(self.order)
|
|
||||||
flrg.appendLHS(self.sets[kk])
|
|
||||||
affected_flrgs.append(flrg)
|
|
||||||
affected_flrgs_memberships.append(mv[kk])
|
|
||||||
|
|
||||||
total_prob = 0.0
|
|
||||||
|
|
||||||
for count, flrg in enumerate(affected_flrgs):
|
|
||||||
_flrg = self.flrgs[flrg.strLHS()]
|
|
||||||
pk = _flrg.frequency_count / self.global_frequency_count
|
|
||||||
priori = pk * (affected_flrgs_memberships[count] / _flrg.partition_function(uod=self.get_UoD()))
|
|
||||||
#print(flrg.strLHS() + ": PK=" + str(pk) + " Priori=" + str(priori))
|
|
||||||
posteriori = self.get_conditional_probability(k, flrg) * priori
|
|
||||||
total_prob += posteriori
|
|
||||||
|
|
||||||
t += total_prob
|
|
||||||
pdf.set(k, total_prob)
|
|
||||||
|
|
||||||
print(t)
|
|
||||||
|
|
||||||
return pdf
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
tmp = self.name + ":\n"
|
tmp = self.name + ":\n"
|
||||||
|
@ -22,8 +22,15 @@ from numpy import random
|
|||||||
|
|
||||||
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
||||||
|
|
||||||
|
'''
|
||||||
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
||||||
enrollments = np.array(enrollments["Enrollments"])
|
enrollments = np.array(enrollments["Enrollments"])
|
||||||
|
'''
|
||||||
|
|
||||||
|
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
||||||
|
data = np.array(taiexpd["avg"][:5000])
|
||||||
|
del(taiexpd)
|
||||||
|
|
||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -37,20 +44,20 @@ from pyFTS.benchmarks import benchmarks as bchmk
|
|||||||
|
|
||||||
#uod = [10162, 21271]
|
#uod = [10162, 21271]
|
||||||
|
|
||||||
enrollments_fs1 = Grid.GridPartitioner(enrollments, 6)
|
fs1 = Grid.GridPartitioner(data[:3000], 30)
|
||||||
#for s in enrollments_fs1.sets:
|
#for s in enrollments_fs1.sets:
|
||||||
# print(s) #.partition_function(uod, 100))
|
# print(s) #.partition_function(uod, 100))
|
||||||
|
|
||||||
pfts1_enrollments = pwfts.ProbabilisticWeightedFTS("1", partitioner=enrollments_fs1)
|
pfts1 = pwfts.ProbabilisticWeightedFTS("1", partitioner=fs1)
|
||||||
pfts1_enrollments.train(enrollments, None, 1)
|
pfts1.train(data, None, 1)
|
||||||
pfts1_enrollments.shortname = "1st Order"
|
pfts1.shortname = "1st Order"
|
||||||
|
|
||||||
#print(pfts1_enrollments)
|
#print(pfts1_enrollments)
|
||||||
|
|
||||||
tmp = pfts1_enrollments.forecastDistribution([15000])
|
tmp = pfts1.forecastAheadDistribution(data[3000:3020],20, method=3, h=0.45, kernel="gaussian")
|
||||||
#print(tmp[0])
|
print(tmp[0])
|
||||||
|
|
||||||
print(tmp[0].quantile([0.05, 0.95]))
|
#print(tmp[0].quantile([0.05, 0.95]))
|
||||||
|
|
||||||
#pfts1_enrollments.AprioriPDF
|
#pfts1_enrollments.AprioriPDF
|
||||||
#norm = pfts1_enrollments.global_frequency_count
|
#norm = pfts1_enrollments.global_frequency_count
|
||||||
|
Loading…
Reference in New Issue
Block a user