- Probability distributions
This commit is contained in:
parent
8787d749ff
commit
cb12810e0a
@ -145,44 +145,3 @@ def crps(targets, densities):
|
|||||||
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
||||||
|
|
||||||
return _crps / float(l * n)
|
return _crps / float(l * n)
|
||||||
|
|
||||||
|
|
||||||
def pdf(data, bins=100):
|
|
||||||
_mx = max(data)
|
|
||||||
_mn = min(data)
|
|
||||||
_pdf = {}
|
|
||||||
percentiles = np.linspace(_mn, _mx, bins).tolist()
|
|
||||||
|
|
||||||
print (percentiles)
|
|
||||||
|
|
||||||
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
|
|
||||||
|
|
||||||
for k in percentiles: _pdf[k] = 0
|
|
||||||
|
|
||||||
for k in data:
|
|
||||||
v = index_percentiles.find_ge(k)
|
|
||||||
_pdf[v] += 1
|
|
||||||
|
|
||||||
norm = sum(list(_pdf.values()))
|
|
||||||
for k in _pdf: _pdf[k] /= norm
|
|
||||||
|
|
||||||
return _pdf
|
|
||||||
|
|
||||||
|
|
||||||
def pdf_fuzzysets(data,sets):
|
|
||||||
_pdf = {}
|
|
||||||
for k in sets: _pdf[k.name] = 0
|
|
||||||
for k in data:
|
|
||||||
memberships = FuzzySet.fuzzyInstance(k, sets)
|
|
||||||
for c, fs in enumerate(sets, start=0):
|
|
||||||
_pdf[fs.name] += memberships[c]
|
|
||||||
|
|
||||||
norm = sum(list(_pdf.values()))
|
|
||||||
for k in _pdf: _pdf[k] /= norm
|
|
||||||
|
|
||||||
return _pdf
|
|
||||||
|
|
||||||
|
|
||||||
def entropy(pdf):
|
|
||||||
h = -sum([pdf[k] * np.log(pdf[k]) for k in pdf])
|
|
||||||
return h
|
|
73
benchmarks/ProbabilityDistribution.py
Normal file
73
benchmarks/ProbabilityDistribution.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from pyFTS.common import FuzzySet,SortedCollection
|
||||||
|
|
||||||
|
|
||||||
|
class ProbabilityDistribution(object):
|
||||||
|
def __init__(self,name,nbins,uod,bins=None,labels=None, data=None):
|
||||||
|
self.name = name
|
||||||
|
self.nbins = nbins
|
||||||
|
self.uod = uod
|
||||||
|
if bins is None:
|
||||||
|
#range = (uod[1] - uod[0])/nbins
|
||||||
|
#self.bins = np.arange(uod[0],uod[1],range).tolist()
|
||||||
|
self.bins = np.linspace(uod[0], uod[1], nbins).tolist()
|
||||||
|
self.labels = [str(k) for k in self.bins]
|
||||||
|
else:
|
||||||
|
self.bins = bins
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||||
|
self.distribution = {}
|
||||||
|
self.count = 0
|
||||||
|
for k in self.bins: self.distribution[k] = 0
|
||||||
|
|
||||||
|
if data is not None: self.append(data)
|
||||||
|
|
||||||
|
def append(self, values):
|
||||||
|
for k in values:
|
||||||
|
v = self.index.find_ge(k)
|
||||||
|
self.distribution[v] += 1
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
|
def density(self, values):
|
||||||
|
ret = []
|
||||||
|
for k in values:
|
||||||
|
v = self.index.find_ge(k)
|
||||||
|
ret.append(self.distribution[v] / self.count)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def entropy(self):
|
||||||
|
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
||||||
|
for k in self.bins])
|
||||||
|
return h
|
||||||
|
|
||||||
|
def empiricalloglikelihood(self):
|
||||||
|
_s = 0
|
||||||
|
for k in self.bins:
|
||||||
|
if self.distribution[k] > 0:
|
||||||
|
_s += np.log(self.distribution[k])
|
||||||
|
return _s
|
||||||
|
|
||||||
|
def pseudologlikelihood(self, data):
|
||||||
|
|
||||||
|
densities = self.density(data)
|
||||||
|
|
||||||
|
_s = 0
|
||||||
|
for k in densities:
|
||||||
|
if k > 0:
|
||||||
|
_s += np.log(k)
|
||||||
|
return _s
|
||||||
|
|
||||||
|
def plot(self,axis=None,color="black",tam=[10, 6]):
|
||||||
|
if axis is None:
|
||||||
|
fig = plt.figure(figsize=tam)
|
||||||
|
axis = fig.add_subplot(111)
|
||||||
|
|
||||||
|
ys = [self.distribution[k]/self.count for k in self.bins]
|
||||||
|
|
||||||
|
axis.plot(self.bins, ys,c=color, label=self.name)
|
||||||
|
|
||||||
|
axis.set_xlabel('Universe of Discourse')
|
||||||
|
axis.set_ylabel('Probability')
|
@ -9,7 +9,7 @@ import matplotlib.cm as cmx
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
# from sklearn.cross_validation import KFold
|
# from sklearn.cross_validation import KFold
|
||||||
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis
|
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis, ProbabilityDistribution
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
||||||
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
|
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
|
||||||
@ -24,7 +24,8 @@ styles = ['-','--','-.',':','.']
|
|||||||
nsty = len(styles)
|
nsty = len(styles)
|
||||||
|
|
||||||
def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
|
def allPointForecasters(data_train, data_test, partitions, max_order=3, statistics=True, residuals=True,
|
||||||
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None):
|
series=True, save=False, file=None, tam=[20, 5], models=None, transformation=None,
|
||||||
|
distributions=False):
|
||||||
|
|
||||||
if models is None:
|
if models is None:
|
||||||
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||||
@ -58,7 +59,7 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
|
|||||||
mfts.appendTransformation(transformation)
|
mfts.appendTransformation(transformation)
|
||||||
mfts.train(data_train, data_train_fs, order=order)
|
mfts.train(data_train, data_train_fs, order=order)
|
||||||
objs.append(mfts)
|
objs.append(mfts)
|
||||||
lcolors.append(colors[count % ncol])
|
lcolors.append(colors[(count + order) % ncol])
|
||||||
|
|
||||||
if statistics:
|
if statistics:
|
||||||
print(getPointStatistics(data_test, objs))
|
print(getPointStatistics(data_test, objs))
|
||||||
@ -71,6 +72,21 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3, statisti
|
|||||||
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
|
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam,
|
||||||
intervals=False)
|
intervals=False)
|
||||||
|
|
||||||
|
if distributions:
|
||||||
|
lcolors.insert(0,'black')
|
||||||
|
pmfs = []
|
||||||
|
pmfs.append(
|
||||||
|
ProbabilityDistribution.ProbabilityDistribution("Original", 100, [min(data_train), max(data_train)], data=data_train) )
|
||||||
|
|
||||||
|
for m in objs:
|
||||||
|
forecasts = m.forecast(data_train)
|
||||||
|
pmfs.append(
|
||||||
|
ProbabilityDistribution.ProbabilityDistribution(m.shortname, 100, [min(data_train), max(data_train)],
|
||||||
|
data=forecasts))
|
||||||
|
print(getProbabilityDistributionStatistics(pmfs,data_train))
|
||||||
|
|
||||||
|
plotProbabilityDistributions(pmfs, lcolors)
|
||||||
|
|
||||||
|
|
||||||
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
||||||
ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
|
ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
|
||||||
@ -108,6 +124,17 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def getProbabilityDistributionStatistics(pmfs, data):
|
||||||
|
ret = "Model & Entropy & Empirical Likelihood & Pseudo Likelihood \\\\ \n"
|
||||||
|
for k in pmfs:
|
||||||
|
ret += k.name + " & "
|
||||||
|
ret += str(k.entropy()) + " & "
|
||||||
|
ret += str(k.empiricalloglikelihood())+ " & "
|
||||||
|
ret += str(k.pseudologlikelihood(data))
|
||||||
|
ret += " \\\\ \n"
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
|
def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
|
||||||
models=None, transformation=None):
|
models=None, transformation=None):
|
||||||
if models is None:
|
if models is None:
|
||||||
@ -215,6 +242,18 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
|
|||||||
|
|
||||||
Util.showAndSaveImage(fig, file, save, lgd=legends)
|
Util.showAndSaveImage(fig, file, save, lgd=legends)
|
||||||
|
|
||||||
|
|
||||||
|
def plotProbabilityDistributions(pmfs,lcolors):
|
||||||
|
fig = plt.figure(figsize=[15, 7])
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
|
for k,m in enumerate(pmfs,start=0):
|
||||||
|
m.plot(ax, color=lcolors[k])
|
||||||
|
|
||||||
|
handles0, labels0 = ax.get_legend_handles_labels()
|
||||||
|
ax.legend(handles0, labels0)
|
||||||
|
|
||||||
|
|
||||||
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
|
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
|
||||||
models=None, transformation=None, option=2):
|
models=None, transformation=None, option=2):
|
||||||
if models is None:
|
if models is None:
|
||||||
@ -786,3 +825,4 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
|
|||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
|
|
||||||
Util.showAndSaveImage(fig, file, save)
|
Util.showAndSaveImage(fig, file, save)
|
||||||
|
|
||||||
|
14
fts.py
14
fts.py
@ -115,7 +115,7 @@ class FTS(object):
|
|||||||
for child in node.getChildren():
|
for child in node.getChildren():
|
||||||
self.buildTreeWithoutOrder(child, lags, level + 1)
|
self.buildTreeWithoutOrder(child, lags, level + 1)
|
||||||
|
|
||||||
def generate_data(self,bins=100):
|
def inputoutputmapping(self,bins=100):
|
||||||
|
|
||||||
dim_uod = tuple([bins for k in range(0,self.order)])
|
dim_uod = tuple([bins for k in range(0,self.order)])
|
||||||
|
|
||||||
@ -136,8 +136,6 @@ class FTS(object):
|
|||||||
for k in self.sets:
|
for k in self.sets:
|
||||||
pdf_fs[k.name] = 0
|
pdf_fs[k.name] = 0
|
||||||
|
|
||||||
index_percentiles = SortedCollection.SortedCollection(iterable=percentiles)
|
|
||||||
|
|
||||||
lags = {}
|
lags = {}
|
||||||
|
|
||||||
for o in np.arange(0, self.order):
|
for o in np.arange(0, self.order):
|
||||||
@ -165,15 +163,7 @@ class FTS(object):
|
|||||||
|
|
||||||
simulation_fs[index_fs] = forecast
|
simulation_fs[index_fs] = forecast
|
||||||
|
|
||||||
|
return [simulation_fs, simulation_uod ]
|
||||||
pdf_fs = Measures.pdf_fuzzysets(np.ravel(simulation_fs),self.sets)
|
|
||||||
|
|
||||||
pdf_uod = Measures.pdf(np.ravel(simulation_fs), bins=bins)
|
|
||||||
|
|
||||||
#tmp_pdf_fs = pd.DataFrame( [[pdf_fs[k] for k in sorted(pdf_fs)]], columns=[k for k in sorted(pdf_fs)])
|
|
||||||
#tmp_pdf_uod = pd.DataFrame([[pdf_uod[k] for k in sorted(pdf_uod)]], columns=[k for k in sorted(pdf_uod)])
|
|
||||||
|
|
||||||
return [pdf_fs, pdf_uod, simulation_fs, simulation_uod ]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,12 +39,12 @@ fs = Grid.GridPartitionerTrimf(gauss_treino,10)
|
|||||||
|
|
||||||
#tmp = chen.ConventionalFTS("")
|
#tmp = chen.ConventionalFTS("")
|
||||||
|
|
||||||
pfts1 = pfts.ProbabilisticFTS("1")
|
#pfts1 = pfts.ProbabilisticFTS("1")
|
||||||
#pfts1.appendTransformation(diff)
|
#pfts1.appendTransformation(diff)
|
||||||
pfts1.train(gauss_treino,fs,2)
|
pfts1.train(gauss_treino,fs,1)
|
||||||
#pfts2 = pfts.ProbabilisticFTS("n = 2")
|
pfts2 = pfts.ProbabilisticFTS("n = 2")
|
||||||
#pfts2.appendTransformation(diff)
|
#pfts2.appendTransformation(diff)
|
||||||
#pfts2.train(gauss_treino,fs,2)
|
pfts2.train(gauss_treino,fs,2)
|
||||||
|
|
||||||
#pfts3 = pfts.ProbabilisticFTS("n = 3")
|
#pfts3 = pfts.ProbabilisticFTS("n = 3")
|
||||||
#pfts3.appendTransformation(diff)
|
#pfts3.appendTransformation(diff)
|
||||||
@ -54,20 +54,6 @@ pfts1.train(gauss_treino,fs,2)
|
|||||||
|
|
||||||
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
|
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
|
||||||
|
|
||||||
sim_fs, sim_uod =pfts1.generate_data(bins=10)
|
|
||||||
|
|
||||||
#print(pdf_fs)
|
|
||||||
|
|
||||||
#print(sim_fs)
|
|
||||||
|
|
||||||
print(sim_uod)
|
|
||||||
|
|
||||||
print(np.ravel(sim_uod))
|
|
||||||
|
|
||||||
#print(sim_uod)
|
|
||||||
|
|
||||||
#print (Measures.pdf(gauss_treino,bins=10))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user