- Probability distributions
This commit is contained in:
parent
8787d749ff
commit
cb12810e0a
@ -145,44 +145,3 @@ def crps(targets, densities):
|
||||
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
||||
|
||||
return _crps / float(l * n)
|
||||
|
||||
|
||||
def pdf(data, bins=100):
|
||||
_mx = max(data)
|
||||
_mn = min(data)
|
||||
_pdf = {}
|
||||
percentiles = np.linspace(_mn, _mx, bins).tolist()
|
||||
|
||||
print (percentiles)
|
||||
|
||||
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
|
||||
|
||||
for k in percentiles: _pdf[k] = 0
|
||||
|
||||
for k in data:
|
||||
v = index_percentiles.find_ge(k)
|
||||
_pdf[v] += 1
|
||||
|
||||
norm = sum(list(_pdf.values()))
|
||||
for k in _pdf: _pdf[k] /= norm
|
||||
|
||||
return _pdf
|
||||
|
||||
|
||||
def pdf_fuzzysets(data,sets):
|
||||
_pdf = {}
|
||||
for k in sets: _pdf[k.name] = 0
|
||||
for k in data:
|
||||
memberships = FuzzySet.fuzzyInstance(k, sets)
|
||||
for c, fs in enumerate(sets, start=0):
|
||||
_pdf[fs.name] += memberships[c]
|
||||
|
||||
norm = sum(list(_pdf.values()))
|
||||
for k in _pdf: _pdf[k] /= norm
|
||||
|
||||
return _pdf
|
||||
|
||||
|
||||
def entropy(pdf):
|
||||
h = -sum([pdf[k] * np.log(pdf[k]) for k in pdf])
|
||||
return h
|
73
benchmarks/ProbabilityDistribution.py
Normal file
73
benchmarks/ProbabilityDistribution.py
Normal file
@ -0,0 +1,73 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from pyFTS.common import FuzzySet,SortedCollection
|
||||
|
||||
|
||||
class ProbabilityDistribution(object):
|
||||
def __init__(self,name,nbins,uod,bins=None,labels=None, data=None):
|
||||
self.name = name
|
||||
self.nbins = nbins
|
||||
self.uod = uod
|
||||
if bins is None:
|
||||
#range = (uod[1] - uod[0])/nbins
|
||||
#self.bins = np.arange(uod[0],uod[1],range).tolist()
|
||||
self.bins = np.linspace(uod[0], uod[1], nbins).tolist()
|
||||
self.labels = [str(k) for k in self.bins]
|
||||
else:
|
||||
self.bins = bins
|
||||
self.labels = labels
|
||||
|
||||
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||
self.distribution = {}
|
||||
self.count = 0
|
||||
for k in self.bins: self.distribution[k] = 0
|
||||
|
||||
if data is not None: self.append(data)
|
||||
|
||||
def append(self, values):
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
self.distribution[v] += 1
|
||||
self.count += 1
|
||||
|
||||
def density(self, values):
|
||||
ret = []
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
ret.append(self.distribution[v] / self.count)
|
||||
return ret
|
||||
|
||||
def entropy(self):
|
||||
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
for k in self.bins])
|
||||
return h
|
||||
|
||||
def empiricalloglikelihood(self):
|
||||
_s = 0
|
||||
for k in self.bins:
|
||||
if self.distribution[k] > 0:
|
||||
_s += np.log(self.distribution[k])
|
||||
return _s
|
||||
|
||||
def pseudologlikelihood(self, data):
|
||||
|
||||
densities = self.density(data)
|
||||
|
||||
_s = 0
|
||||
for k in densities:
|
||||
if k > 0:
|
||||
_s += np.log(k)
|
||||
return _s
|
||||
|
||||
def plot(self,axis=None,color="black",tam=[10, 6]):
|
||||
if axis is None:
|
||||
fig = plt.figure(figsize=tam)
|
||||
axis = fig.add_subplot(111)
|
||||
|
||||
ys = [self.distribution[k]/self.count for k in self.bins]
|
||||
|
||||
axis.plot(self.bins, ys,c=color, label=self.name)
|
||||
|
||||
axis.set_xlabel('Universe of Discourse')
|
||||
axis.set_ylabel('Probability')
|
@ -9,7 +9,7 @@ import matplotlib.cm as cmx
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
# from sklearn.cross_validation import KFold
|
||||
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis
|
||||
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis, ProbabilityDistribution
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
||||
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
|
||||
@ -24,7 +24,8 @@ styles = ['-','--','-.',':','.']
|
||||
nsty = len(styles)
|
||||
|
||||
def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
|
||||
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None):
|
||||
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
|
||||
distributions=False):
|
||||
|
||||
if models is None:
|
||||
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||
@ -58,7 +59,7 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
|
||||
mfts.appendTransformation(transformation)
|
||||
mfts.train(data_train, data_train_fs, order=order)
|
||||
objs.append(mfts)
|
||||
lcolors.append(colors[count % ncol])
|
||||
lcolors.append(colors[(count + order) % ncol])
|
||||
|
||||
if statistics:
|
||||
print(getPointStatistics(data_test, objs))
|
||||
@ -71,6 +72,21 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
|
||||
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
|
||||
intervals=False)
|
||||
|
||||
if distributions:
|
||||
lcolors.insert(0,'black')
|
||||
pmfs = []
|
||||
pmfs.append(
|
||||
ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_train), max(data_train)], data=data_train) )
|
||||
|
||||
for m in objs:
|
||||
forecasts = m.forecast(data_train)
|
||||
pmfs.append(
|
||||
ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_train), max(data_train)],
|
||||
data=forecasts))
|
||||
print(getProbabilityDistributionStatistics(pmfs,data_train))
|
||||
|
||||
plotProbabilityDistributions(pmfs, lcolors)
|
||||
|
||||
|
||||
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
||||
ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
|
||||
@ -108,6 +124,17 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
|
||||
return ret
|
||||
|
||||
|
||||
def getProbabilityDistributionStatistics(pmfs, data):
|
||||
ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n"
|
||||
for k in pmfs:
|
||||
ret += k.name + " & "
|
||||
ret += str(k.entropy()) + " & "
|
||||
ret += str(k.empiricalloglikelihood())+ " & "
|
||||
ret += str(k.pseudologlikelihood(data))
|
||||
ret += " \\\\ \n"
|
||||
return ret
|
||||
|
||||
|
||||
def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
|
||||
models=None, transformation=None):
|
||||
if models is None:
|
||||
@ -215,6 +242,18 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
|
||||
|
||||
Util.showAndSaveImage(fig, file, save, lgd=legends)
|
||||
|
||||
|
||||
def plotProbabilityDistributions(pmfs,lcolors):
|
||||
fig = plt.figure(figsize=[15, 7])
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
for k,m in enumerate(pmfs,start=0):
|
||||
m.plot(ax, color=lcolors[k])
|
||||
|
||||
handles0, labels0 = ax.get_legend_handles_labels()
|
||||
ax.legend(handles0, labels0)
|
||||
|
||||
|
||||
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
|
||||
models=None, transformation=None, option=2):
|
||||
if models is None:
|
||||
@ -786,3 +825,4 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
|
||||
plt.tight_layout()
|
||||
|
||||
Util.showAndSaveImage(fig, file, save)
|
||||
|
||||
|
14
fts.py
14
fts.py
@ -115,7 +115,7 @@ class FTS(object):
|
||||
for child in node.getChildren():
|
||||
self.buildTreeWithoutOrder(child, lags, level + 1)
|
||||
|
||||
def generate_data(self,bins=100):
|
||||
def inputoutputmapping(self,bins=100):
|
||||
|
||||
dim_uod = tuple([bins for k in range(0,self.order)])
|
||||
|
||||
@ -136,8 +136,6 @@ class FTS(object):
|
||||
for k in self.sets:
|
||||
pdf_fs[k.name] = 0
|
||||
|
||||
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
|
||||
|
||||
lags = {}
|
||||
|
||||
for o in np.arange(0, self.order):
|
||||
@ -165,15 +163,7 @@ class FTS(object):
|
||||
|
||||
simulation_fs[index_fs] = forecast
|
||||
|
||||
|
||||
pdf_fs = Measures.pdf_fuzzysets(np.ravel(simulation_fs),self.sets)
|
||||
|
||||
pdf_uod = Measures.pdf(np.ravel(simulation_fs), bins=bins)
|
||||
|
||||
#tmp_pdf_fs = pd.DataFrame( [[pdf_fs[k] for k in sorted(pdf_fs)]], columns=[k for k in sorted(pdf_fs)])
|
||||
#tmp_pdf_uod = pd.DataFrame([[pdf_uod[k] for k in sorted(pdf_uod)]], columns=[k for k in sorted(pdf_uod)])
|
||||
|
||||
return [pdf_fs, pdf_uod, simulation_fs, simulation_uod ]
|
||||
return [simulation_fs, simulation_uod ]
|
||||
|
||||
|
||||
|
||||
|
@ -39,12 +39,12 @@ fs = Grid.GridPartitionerTrimf(gauss_treino,10)
|
||||
|
||||
#tmp = chen.ConventionalFTS("")
|
||||
|
||||
pfts1 = pfts.ProbabilisticFTS("1")
|
||||
#pfts1 = pfts.ProbabilisticFTS("1")
|
||||
#pfts1.appendTransformation(diff)
|
||||
pfts1.train(gauss_treino,fs,2)
|
||||
#pfts2 = pfts.ProbabilisticFTS("n = 2")
|
||||
pfts1.train(gauss_treino,fs,1)
|
||||
pfts2 = pfts.ProbabilisticFTS("n = 2")
|
||||
#pfts2.appendTransformation(diff)
|
||||
#pfts2.train(gauss_treino,fs,2)
|
||||
pfts2.train(gauss_treino,fs,2)
|
||||
|
||||
#pfts3 = pfts.ProbabilisticFTS("n = 3")
|
||||
#pfts3.appendTransformation(diff)
|
||||
@ -54,20 +54,6 @@ pfts1.train(gauss_treino,fs,2)
|
||||
|
||||
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
|
||||
|
||||
sim_fs, sim_uod =pfts1.generate_data(bins=10)
|
||||
|
||||
#print(pdf_fs)
|
||||
|
||||
#print(sim_fs)
|
||||
|
||||
print(sim_uod)
|
||||
|
||||
print(np.ravel(sim_uod))
|
||||
|
||||
#print(sim_uod)
|
||||
|
||||
#print (Measures.pdf(gauss_treino,bins=10))
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user