- Probability distributions

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-02-15 18:16:13 -02:00
parent 8787d749ff
commit cb12810e0a
5 changed files with 122 additions and 74 deletions

View File

@ -145,44 +145,3 @@ def crps(targets, densities):
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
return _crps / float(l * n) return _crps / float(l * n)
def pdf(data, bins=100):
_mx = max(data)
_mn = min(data)
_pdf = {}
percentiles = np.linspace(_mn, _mx, bins).tolist()
print (percentiles)
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
for k in percentiles: _pdf[k] = 0
for k in data:
v = index_percentiles.find_ge(k)
_pdf[v] += 1
norm = sum(list(_pdf.values()))
for k in _pdf: _pdf[k] /= norm
return _pdf
def pdf_fuzzysets(data,sets):
_pdf = {}
for k in sets: _pdf[k.name] = 0
for k in data:
memberships = FuzzySet.fuzzyInstance(k, sets)
for c, fs in enumerate(sets, start=0):
_pdf[fs.name] += memberships[c]
norm = sum(list(_pdf.values()))
for k in _pdf: _pdf[k] /= norm
return _pdf
def entropy(pdf):
h = -sum([pdf[k] * np.log(pdf[k]) for k in pdf])
return h

View File

@ -0,0 +1,73 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection
class ProbabilityDistribution(object):
def __init__(self,name,nbins,uod,bins=None,labels=None, data=None):
self.name = name
self.nbins = nbins
self.uod = uod
if bins is None:
#range = (uod[1] - uod[0])/nbins
#self.bins = np.arange(uod[0],uod[1],range).tolist()
self.bins = np.linspace(uod[0], uod[1], nbins).tolist()
self.labels = [str(k) for k in self.bins]
else:
self.bins = bins
self.labels = labels
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.distribution = {}
self.count = 0
for k in self.bins: self.distribution[k] = 0
if data is not None: self.append(data)
def append(self, values):
for k in values:
v = self.index.find_ge(k)
self.distribution[v] += 1
self.count += 1
def density(self, values):
ret = []
for k in values:
v = self.index.find_ge(k)
ret.append(self.distribution[v] / self.count)
return ret
def entropy(self):
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins])
return h
def empiricalloglikelihood(self):
_s = 0
for k in self.bins:
if self.distribution[k] > 0:
_s += np.log(self.distribution[k])
return _s
def pseudologlikelihood(self, data):
densities = self.density(data)
_s = 0
for k in densities:
if k > 0:
_s += np.log(k)
return _s
def plot(self,axis=None,color="black",tam=[10, 6]):
if axis is None:
fig = plt.figure(figsize=tam)
axis = fig.add_subplot(111)
ys = [self.distribution[k]/self.count for k in self.bins]
axis.plot(self.bins, ys,c=color, label=self.name)
axis.set_xlabel('Universe of Discourse')
axis.set_ylabel('Probability')

View File

@ -9,7 +9,7 @@ import matplotlib.cm as cmx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis from pyFTS.benchmarks import Measures, naive, ResidualAnalysis, ProbabilityDistribution
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
@ -24,7 +24,8 @@ styles = ['-','--','-.',':','.']
nsty = len(styles) nsty = len(styles)
def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True, def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None): series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
distributions=False):
if models is None: if models is None:
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
@ -58,7 +59,7 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs, order=order) mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts) objs.append(mfts)
lcolors.append(colors[count % ncol]) lcolors.append(colors[(count + order) % ncol])
if statistics: if statistics:
print(getPointStatistics(data_test, objs)) print(getPointStatistics(data_test, objs))
@ -71,6 +72,21 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
intervals=False) intervals=False)
if distributions:
lcolors.insert(0,'black')
pmfs = []
pmfs.append(
ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_train), max(data_train)], data=data_train) )
for m in objs:
forecasts = m.forecast(data_train)
pmfs.append(
ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_train), max(data_train)],
data=forecasts))
print(getProbabilityDistributionStatistics(pmfs,data_train))
plotProbabilityDistributions(pmfs, lcolors)
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
@ -108,6 +124,17 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
return ret return ret
def getProbabilityDistributionStatistics(pmfs, data):
ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n"
for k in pmfs:
ret += k.name + " & "
ret += str(k.entropy()) + " & "
ret += str(k.empiricalloglikelihood())+ " & "
ret += str(k.pseudologlikelihood(data))
ret += " \\\\ \n"
return ret
def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None): models=None, transformation=None):
if models is None: if models is None:
@ -215,6 +242,18 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
Util.showAndSaveImage(fig, file, save, lgd=legends) Util.showAndSaveImage(fig, file, save, lgd=legends)
def plotProbabilityDistributions(pmfs,lcolors):
fig = plt.figure(figsize=[15, 7])
ax = fig.add_subplot(111)
for k,m in enumerate(pmfs,start=0):
m.plot(ax, color=lcolors[k])
handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0, labels0)
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None, option=2): models=None, transformation=None, option=2):
if models is None: if models is None:
@ -786,3 +825,4 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
plt.tight_layout() plt.tight_layout()
Util.showAndSaveImage(fig, file, save) Util.showAndSaveImage(fig, file, save)

14
fts.py
View File

@ -115,7 +115,7 @@ class FTS(object):
for child in node.getChildren(): for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1) self.buildTreeWithoutOrder(child, lags, level + 1)
def generate_data(self,bins=100): def inputoutputmapping(self,bins=100):
dim_uod = tuple([bins for k in range(0,self.order)]) dim_uod = tuple([bins for k in range(0,self.order)])
@ -136,8 +136,6 @@ class FTS(object):
for k in self.sets: for k in self.sets:
pdf_fs[k.name] = 0 pdf_fs[k.name] = 0
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
lags = {} lags = {}
for o in np.arange(0, self.order): for o in np.arange(0, self.order):
@ -165,15 +163,7 @@ class FTS(object):
simulation_fs[index_fs] = forecast simulation_fs[index_fs] = forecast
return [simulation_fs, simulation_uod ]
pdf_fs = Measures.pdf_fuzzysets(np.ravel(simulation_fs),self.sets)
pdf_uod = Measures.pdf(np.ravel(simulation_fs), bins=bins)
#tmp_pdf_fs = pd.DataFrame( [[pdf_fs[k] for k in sorted(pdf_fs)]], columns=[k for k in sorted(pdf_fs)])
#tmp_pdf_uod = pd.DataFrame([[pdf_uod[k] for k in sorted(pdf_uod)]], columns=[k for k in sorted(pdf_uod)])
return [pdf_fs, pdf_uod, simulation_fs, simulation_uod ]

View File

@ -39,12 +39,12 @@ fs = Grid.GridPartitionerTrimf(gauss_treino,10)
#tmp = chen.ConventionalFTS("") #tmp = chen.ConventionalFTS("")
pfts1 = pfts.ProbabilisticFTS("1") #pfts1 = pfts.ProbabilisticFTS("1")
#pfts1.appendTransformation(diff) #pfts1.appendTransformation(diff)
pfts1.train(gauss_treino,fs,2) pfts1.train(gauss_treino,fs,1)
#pfts2 = pfts.ProbabilisticFTS("n = 2") pfts2 = pfts.ProbabilisticFTS("n = 2")
#pfts2.appendTransformation(diff) #pfts2.appendTransformation(diff)
#pfts2.train(gauss_treino,fs,2) pfts2.train(gauss_treino,fs,2)
#pfts3 = pfts.ProbabilisticFTS("n = 3") #pfts3 = pfts.ProbabilisticFTS("n = 3")
#pfts3.appendTransformation(diff) #pfts3.appendTransformation(diff)
@ -54,20 +54,6 @@ pfts1.train(gauss_treino,fs,2)
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50)) #print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
sim_fs, sim_uod =pfts1.generate_data(bins=10)
#print(pdf_fs)
#print(sim_fs)
print(sim_uod)
print(np.ravel(sim_uod))
#print(sim_uod)
#print (Measures.pdf(gauss_treino,bins=10))