Improving ProbabilityDistribution indexes

This commit is contained in:
Petrônio Cândido 2019-05-27 16:01:03 -03:00
parent 8e4b9b4121
commit e3f5faee2b

View File

@ -1,7 +1,8 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection,tree from pyFTS.common import FuzzySet
from scipy.spatial import KDTree
from pyFTS.probabilistic import kde from pyFTS.probabilistic import kde
@ -41,13 +42,13 @@ class ProbabilityDistribution(object):
self.nbins = kwargs.get("num_bins", 100) self.nbins = kwargs.get("num_bins", 100)
if self.bins is None: if self.bins is None:
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist() self.bins = np.linspace(self.uod[0], self.uod[1], int(self.nbins)).tolist()
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
if self.uod is not None: if self.uod is not None:
self.resolution = (self.uod[1] - self.uod[0]) / self.nbins self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) self.bin_index = KDTree(self.bins)
self.quantile_index = None self.quantile_index = None
self.distribution = {} self.distribution = {}
self.cdf = None self.cdf = None
@ -67,7 +68,8 @@ class ProbabilityDistribution(object):
:param value: A value in the universe of discourse from the distribution :param value: A value in the universe of discourse from the distribution
:param density: The probability density to assign to the value :param density: The probability density to assign to the value
""" """
k = self.bin_index.find_ge(value) _, ix = self.bin_index.query([value], 1)
k = self.labels[ix]
self.distribution[k] = density self.distribution[k] = density
def append(self, values): def append(self, values):
@ -78,7 +80,8 @@ class ProbabilityDistribution(object):
""" """
if self.type == "histogram": if self.type == "histogram":
for k in values: for k in values:
v = self.bin_index.find_ge(k) _, ix = self.bin_index.query([k], 1)
v = self.labels[ix]
self.distribution[v] += 1 self.distribution[v] += 1
self.count += 1 self.count += 1
else: else:
@ -96,8 +99,12 @@ class ProbabilityDistribution(object):
""" """
if self.type == "histogram": if self.type == "histogram":
for interval in intervals: for interval in intervals:
for k in self.bin_index.inside(interval[0], interval[1]): _, ix1 = self.bin_index.query([interval[0]], 1)
self.distribution[k] += 1 _, ix2 = self.bin_index.query([interval[1]], 1)
for k in range(ix1, ix2):
v = self.labels[k]
self.distribution[v] += 1
self.count += 1 self.count += 1
def density(self, values): def density(self, values):
@ -116,13 +123,15 @@ class ProbabilityDistribution(object):
for k in values: for k in values:
if self.type == "histogram": if self.type == "histogram":
v = self.bin_index.find_ge(k) _, ix = self.bin_index.query([k], 1)
v = self.labels[ix]
ret.append(self.distribution[v] / (self.count + 1e-5)) ret.append(self.distribution[v] / (self.count + 1e-5))
elif self.type == "KDE": elif self.type == "KDE":
v = self.kde.probability(k, self.data) v = self.kde.probability(k, self.data)
ret.append(v) ret.append(v)
else: else:
v = self.bin_index.find_ge(k) _, ix = self.bin_index.query([k], 1)
v = self.labels[ix]
ret.append(self.distribution[v]) ret.append(self.distribution[v])
if scalar: if scalar:
@ -149,7 +158,7 @@ class ProbabilityDistribution(object):
self.distribution = dist self.distribution = dist
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins)) self.bin_index = KDTree(self.bins)
self.quantile_index = None self.quantile_index = None
self.cdf = None self.cdf = None
self.qtl = None self.qtl = None
@ -178,7 +187,7 @@ class ProbabilityDistribution(object):
_keys = [float(k) for k in sorted(self.qtl.keys())] _keys = [float(k) for k in sorted(self.qtl.keys())]
self.quantile_index = SortedCollection.SortedCollection(iterable=_keys) self.quantile_index = KDTree(_keys)
def cumulative(self, values): def cumulative(self, values):
""" """
@ -194,10 +203,12 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
k = self.bin_index.find_ge(val) _, ix = self.bin_index.query([val], 1)
k = self.labels[ix]
ret.append(self.cdf[k]) ret.append(self.cdf[k])
else: else:
k = self.bin_index.find_ge(values) _, ix = self.bin_index.query([values], 1)
k = self.labels[ix]
return self.cdf[values] return self.cdf[values]
def quantile(self, values): def quantile(self, values):
@ -214,10 +225,12 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
k = self.quantile_index.find_ge(val) _, ix = self.quantile_index.query([val], 1)
k = self.labels[ix]
ret.append(self.qtl[str(k)][0]) ret.append(self.qtl[str(k)][0])
else: else:
k = self.quantile_index.find_ge(values) _, ix = self.quantile_index.query([values], 1)
k = self.labels[ix]
ret = self.qtl[str(k)] ret = self.qtl[str(k)]
return ret return ret