- Probability distributions

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-02-15 18:16:13 -02:00
parent 8787d749ff
commit cb12810e0a
5 changed files with 122 additions and 74 deletions

View File

@ -145,44 +145,3 @@ def crps(targets, densities):
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
return _crps / float(l * n)
def pdf(data, bins=100):
_mx = max(data)
_mn = min(data)
_pdf = {}
percentiles = np.linspace(_mn, _mx, bins).tolist()
print (percentiles)
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
for k in percentiles: _pdf[k] = 0
for k in data:
v = index_percentiles.find_ge(k)
_pdf[v] += 1
norm = sum(list(_pdf.values()))
for k in _pdf: _pdf[k] /= norm
return _pdf
def pdf_fuzzysets(data,sets):
_pdf = {}
for k in sets: _pdf[k.name] = 0
for k in data:
memberships = FuzzySet.fuzzyInstance(k, sets)
for c, fs in enumerate(sets, start=0):
_pdf[fs.name] += memberships[c]
norm = sum(list(_pdf.values()))
for k in _pdf: _pdf[k] /= norm
return _pdf
def entropy(pdf):
h = -sum([pdf[k] * np.log(pdf[k]) for k in pdf])
return h

View File

@ -0,0 +1,73 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection
class ProbabilityDistribution(object):
def __init__(self,name,nbins,uod,bins=None,labels=None, data=None):
self.name = name
self.nbins = nbins
self.uod = uod
if bins is None:
#range = (uod[1] - uod[0])/nbins
#self.bins = np.arange(uod[0],uod[1],range).tolist()
self.bins = np.linspace(uod[0], uod[1], nbins).tolist()
self.labels = [str(k) for k in self.bins]
else:
self.bins = bins
self.labels = labels
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.distribution = {}
self.count = 0
for k in self.bins: self.distribution[k] = 0
if data is not None: self.append(data)
def append(self, values):
for k in values:
v = self.index.find_ge(k)
self.distribution[v] += 1
self.count += 1
def density(self, values):
ret = []
for k in values:
v = self.index.find_ge(k)
ret.append(self.distribution[v] / self.count)
return ret
def entropy(self):
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins])
return h
def empiricalloglikelihood(self):
_s = 0
for k in self.bins:
if self.distribution[k] > 0:
_s += np.log(self.distribution[k])
return _s
def pseudologlikelihood(self, data):
densities = self.density(data)
_s = 0
for k in densities:
if k > 0:
_s += np.log(k)
return _s
def plot(self,axis=None,color="black",tam=[10, 6]):
if axis is None:
fig = plt.figure(figsize=tam)
axis = fig.add_subplot(111)
ys = [self.distribution[k]/self.count for k in self.bins]
axis.plot(self.bins, ys,c=color, label=self.name)
axis.set_xlabel('Universe of Discourse')
axis.set_ylabel('Probability')

View File

@ -9,7 +9,7 @@ import matplotlib.cm as cmx
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis, ProbabilityDistribution
from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
@ -24,7 +24,8 @@ styles = ['-','--','-.',':','.']
nsty = len(styles)
def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None):
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
distributions=False):
if models is None:
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
@ -58,7 +59,7 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)
lcolors.append(colors[count % ncol])
lcolors.append(colors[(count + order) % ncol])
if statistics:
print(getPointStatistics(data_test, objs))
@ -71,6 +72,21 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
intervals=False)
if distributions:
lcolors.insert(0,'black')
pmfs = []
pmfs.append(
ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_train), max(data_train)], data=data_train) )
for m in objs:
forecasts = m.forecast(data_train)
pmfs.append(
ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_train), max(data_train)],
data=forecasts))
print(getProbabilityDistributionStatistics(pmfs,data_train))
plotProbabilityDistributions(pmfs, lcolors)
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
@ -108,6 +124,17 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
return ret
def getProbabilityDistributionStatistics(pmfs, data):
ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n"
for k in pmfs:
ret += k.name + " & "
ret += str(k.entropy()) + " & "
ret += str(k.empiricalloglikelihood())+ " & "
ret += str(k.pseudologlikelihood(data))
ret += " \\\\ \n"
return ret
def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None):
if models is None:
@ -215,6 +242,18 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
Util.showAndSaveImage(fig, file, save, lgd=legends)
def plotProbabilityDistributions(pmfs,lcolors):
fig = plt.figure(figsize=[15, 7])
ax = fig.add_subplot(111)
for k,m in enumerate(pmfs,start=0):
m.plot(ax, color=lcolors[k])
handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0, labels0)
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None, option=2):
if models is None:
@ -786,3 +825,4 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)

14
fts.py
View File

@ -115,7 +115,7 @@ class FTS(object):
for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1)
def generate_data(self,bins=100):
def inputoutputmapping(self,bins=100):
dim_uod = tuple([bins for k in range(0,self.order)])
@ -136,8 +136,6 @@ class FTS(object):
for k in self.sets:
pdf_fs[k.name] = 0
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
lags = {}
for o in np.arange(0, self.order):
@ -165,15 +163,7 @@ class FTS(object):
simulation_fs[index_fs] = forecast
pdf_fs = Measures.pdf_fuzzysets(np.ravel(simulation_fs),self.sets)
pdf_uod = Measures.pdf(np.ravel(simulation_fs), bins=bins)
#tmp_pdf_fs = pd.DataFrame( [[pdf_fs[k] for k in sorted(pdf_fs)]], columns=[k for k in sorted(pdf_fs)])
#tmp_pdf_uod = pd.DataFrame([[pdf_uod[k] for k in sorted(pdf_uod)]], columns=[k for k in sorted(pdf_uod)])
return [pdf_fs, pdf_uod, simulation_fs, simulation_uod ]
return [simulation_fs, simulation_uod ]

View File

@ -39,12 +39,12 @@ fs = Grid.GridPartitionerTrimf(gauss_treino,10)
#tmp = chen.ConventionalFTS("")
pfts1 = pfts.ProbabilisticFTS("1")
#pfts1 = pfts.ProbabilisticFTS("1")
#pfts1.appendTransformation(diff)
pfts1.train(gauss_treino,fs,2)
#pfts2 = pfts.ProbabilisticFTS("n = 2")
pfts1.train(gauss_treino,fs,1)
pfts2 = pfts.ProbabilisticFTS("n = 2")
#pfts2.appendTransformation(diff)
#pfts2.train(gauss_treino,fs,2)
pfts2.train(gauss_treino,fs,2)
#pfts3 = pfts.ProbabilisticFTS("n = 3")
#pfts3.appendTransformation(diff)
@ -54,20 +54,6 @@ pfts1.train(gauss_treino,fs,2)
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
sim_fs, sim_uod =pfts1.generate_data(bins=10)
#print(pdf_fs)
#print(sim_fs)
print(sim_uod)
print(np.ravel(sim_uod))
#print(sim_uod)
#print (Measures.pdf(gauss_treino,bins=10))