From 60785ac89f50abbe258b258c3d1f0ee46e08a94b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= <petronio.candido@gmail.com>
Date: Tue, 28 May 2019 14:32:06 -0300
Subject: [PATCH] Bugfixes in ProbabilityDistribution

---
 pyFTS/benchmarks/BSTS.py                      | 86 +++++++++++++++++++
 pyFTS/models/ensemble/ensemble.py             |  1 -
 .../probabilistic/ProbabilityDistribution.py  | 53 +++++-------
 pyFTS/probabilistic/kde.py                    | 22 +++--
 pyFTS/tests/general.py                        | 21 +----
 5 files changed, 126 insertions(+), 57 deletions(-)
 create mode 100644 pyFTS/benchmarks/BSTS.py

diff --git a/pyFTS/benchmarks/BSTS.py b/pyFTS/benchmarks/BSTS.py
new file mode 100644
index 0000000..acf1d9d
--- /dev/null
+++ b/pyFTS/benchmarks/BSTS.py
@@ -0,0 +1,86 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+import numpy as np
+import pandas as pd
+import pyflux as pf
+import scipy.stats as st
+from pyFTS.common import SortedCollection, fts
+from pyFTS.probabilistic import ProbabilityDistribution
+
+
+class ARIMA(fts.FTS):
+    """
+    Façade for statsmodels.tsa.arima_model
+    """
+    def __init__(self, **kwargs):
+        super(ARIMA, self).__init__(**kwargs)
+        self.name = "BSTS"
+        self.detail = "Bayesian Structural Time Series"
+        self.is_high_order = True
+        self.has_point_forecasting = True
+        self.has_interval_forecasting = True
+        self.has_probability_forecasting = True
+        self.model = None
+        self.model_fit = None
+        self.trained_data = None
+        self.p = 1
+        self.d = 0
+        self.q = 0
+        self.benchmark_only = True
+        self.min_order = 1
+        self.alpha = kwargs.get("alpha", 0.05)
+        self.order = kwargs.get("order", (1,0,0))
+        self._decompose_order(self.order)
+        self.model = None
+
+    def _decompose_order(self, order):
+        if isinstance(order, (tuple, set, list)):
+            self.p = order[0]
+            self.d = order[1]
+            self.q = order[2]
+            self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
+            self.max_lag = self.order
+            self.d = len(self.transformations)
+            self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
+
+    def train(self, data, **kwargs):
+
+        if 'order' in kwargs:
+            order = kwargs.pop('order')
+            self._decompose_order(order)
+
+        if self.indexer is not None:
+            data = self.indexer.get_data(data)
+
+        try:
+            self.model =  pf.ARIMA(data=data, ar=self.p, ma=self.q, integ=self.d, family=pf.Normal())
+            self.model_fit = self.model.fit('M-H', nsims=20000)
+        except Exception as ex:
+            print(ex)
+            self.model_fit = None
+
+    def forecast(self, ndata, **kwargs):
+
+        return ret
+
+    def forecast_interval(self, data, **kwargs):
+
+
+
+        return ret
+
+    def forecast_ahead_interval(self, ndata, steps, **kwargs):
+
+        return ret
+
+    def forecast_distribution(self, data, **kwargs):
+
+
+        return ret
+
+
+    def forecast_ahead_distribution(self, data, steps, **kwargs):
+
+
+        return ret
\ No newline at end of file
diff --git a/pyFTS/models/ensemble/ensemble.py b/pyFTS/models/ensemble/ensemble.py
index ea72920..c3188a2 100644
--- a/pyFTS/models/ensemble/ensemble.py
+++ b/pyFTS/models/ensemble/ensemble.py
@@ -239,7 +239,6 @@ class EnsembleFTS(fts.FTS):
 
         return ret
 
-
     def forecast_ahead_distribution(self, data, steps, **kwargs):
         if 'method' in kwargs:
             self.point_method = kwargs.get('method','mean')
diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py
index b8a249d..9a855cd 100644
--- a/pyFTS/probabilistic/ProbabilityDistribution.py
+++ b/pyFTS/probabilistic/ProbabilityDistribution.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-from pyFTS.common import FuzzySet
-from scipy.spatial import KDTree
+from pyFTS.common import FuzzySet,SortedCollection,tree
 from pyFTS.probabilistic import kde
 
 
@@ -32,7 +31,9 @@ class ProbabilityDistribution(object):
         data = kwargs.get("data", None)
 
         if self.type == "KDE":
-            self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
+            self.kde = kde.KernelSmoothing(h=kwargs.get("h", 0.5), kernel=kwargs.get("kernel", "epanechnikov"))
+
+        if self.data is not None:
             _min = np.nanmin(data)
             _min = _min * .7 if _min > 0 else _min * 1.3
             _max = np.nanmax(data)
@@ -42,13 +43,13 @@ class ProbabilityDistribution(object):
         self.nbins = kwargs.get("num_bins", 100)
 
         if self.bins is None:
-            self.bins = np.linspace(self.uod[0], self.uod[1], int(self.nbins)).tolist()
+            self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
             self.labels = [str(k) for k in self.bins]
 
         if self.uod is not None:
             self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
 
-        self.bin_index = KDTree(self.bins)
+        self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
         self.quantile_index = None
         self.distribution = {}
         self.cdf = None
@@ -68,8 +69,7 @@ class ProbabilityDistribution(object):
         :param value: A value in the universe of discourse from the distribution
         :param density: The probability density to assign to the value
         """
-        _, ix = self.bin_index.query([value], 1)
-        k = self.labels[ix]
+        k = self.bin_index.find_ge(value)
         self.distribution[k] = density
 
     def append(self, values):
@@ -80,8 +80,7 @@ class ProbabilityDistribution(object):
         """
         if self.type == "histogram":
             for k in values:
-                _, ix = self.bin_index.query([k], 1)
-                v = self.labels[ix]
+                v = self.bin_index.find_ge(k)
                 self.distribution[v] += 1
                 self.count += 1
         else:
@@ -99,12 +98,8 @@ class ProbabilityDistribution(object):
         """
         if self.type == "histogram":
             for interval in intervals:
-                _, ix1 = self.bin_index.query([interval[0]], 1)
-                _, ix2 = self.bin_index.query([interval[1]], 1)
-
-                for k in range(ix1, ix2):
-                    v = self.labels[k]
-                    self.distribution[v] += 1
+                for k in self.bin_index.inside(interval[0], interval[1]):
+                    self.distribution[k] += 1
                     self.count += 1
 
     def density(self, values):
@@ -123,15 +118,13 @@ class ProbabilityDistribution(object):
 
         for k in values:
             if self.type == "histogram":
-                _, ix = self.bin_index.query([k], 1)
-                v = self.labels[ix]
+                v = self.bin_index.find_ge(k)
                 ret.append(self.distribution[v] / (self.count + 1e-5))
             elif self.type == "KDE":
-                v = self.kde.probability(k, self.data)
+                v = self.kde.probability(k, data=self.data)
                 ret.append(v)
             else:
-                _, ix = self.bin_index.query([k], 1)
-                v = self.labels[ix]
+                v = self.bin_index.find_ge(k)
                 ret.append(self.distribution[v])
 
         if scalar:
@@ -158,7 +151,7 @@ class ProbabilityDistribution(object):
         self.distribution = dist
         self.labels = [str(k) for k in self.bins]
 
-        self.bin_index = KDTree(self.bins)
+        self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
         self.quantile_index = None
         self.cdf = None
         self.qtl = None
@@ -187,11 +180,11 @@ class ProbabilityDistribution(object):
 
         _keys = [float(k) for k in sorted(self.qtl.keys())]
 
-        self.quantile_index = KDTree(_keys)
+        self.quantile_index = SortedCollection.SortedCollection(iterable=_keys)
 
     def cumulative(self, values):
         """
-        Return the cumulative probability densities for the input values, 
+        Return the cumulative probability densities for the input values,
         such that F(x) = P(X <= x)
 
         :param values: A list of input values
@@ -203,17 +196,15 @@ class ProbabilityDistribution(object):
         if isinstance(values, list):
             ret = []
             for val in values:
-                _, ix = self.bin_index.query([val], 1)
-                k = self.labels[ix]
+                k = self.bin_index.find_ge(val)
                 ret.append(self.cdf[k])
         else:
-            _, ix = self.bin_index.query([values], 1)
-            k = self.labels[ix]
+            k = self.bin_index.find_ge(values)
             return self.cdf[values]
 
     def quantile(self, values):
         """
-        Return the Universe of Discourse values in relation to the quantile input values, 
+        Return the Universe of Discourse values in relation to the quantile input values,
         such that Q(tau) = min( {x | F(x) >= tau })
 
         :param values: input values
@@ -225,12 +216,10 @@ class ProbabilityDistribution(object):
         if isinstance(values, list):
             ret = []
             for val in values:
-                _, ix = self.quantile_index.query([val], 1)
-                k = self.labels[ix]
+                k = self.quantile_index.find_ge(val)
                 ret.append(self.qtl[str(k)][0])
         else:
-            _, ix = self.quantile_index.query([values], 1)
-            k = self.labels[ix]
+            k = self.quantile_index.find_ge(values)
             ret = self.qtl[str(k)]
 
         return ret
diff --git a/pyFTS/probabilistic/kde.py b/pyFTS/probabilistic/kde.py
index b5c668f..a77b9a2 100644
--- a/pyFTS/probabilistic/kde.py
+++ b/pyFTS/probabilistic/kde.py
@@ -10,13 +10,17 @@ import numpy as np
 
 class KernelSmoothing(object):
     """Kernel Density Estimation"""
-    def __init__(self,h, kernel="epanechnikov"):
-        self.h = h
+    def __init__(self, **kwargs):
+        self.h = kwargs.get('h',.5)
         """Width parameter"""
-        self.kernel = kernel
+        self.kernel = kwargs.get("kernel", "epanechnikov")
         """Kernel function"""
+        self.data = kwargs.get("data",None)
         self.transf = Transformations.Scale(min=0,max=1)
 
+        if self.data is not None:
+            self.data = self.transf.apply(self.data)
+
     def kernel_function(self, u):
         """
         Apply the kernel
@@ -45,7 +49,7 @@ class KernelSmoothing(object):
         elif self.kernel == "exponential":
             return 0.5 * np.exp(-np.abs(u))
 
-    def probability(self, x, data):
+    def probability(self, x, **kwargs):
         """
         Probability of the point x on data
 
@@ -53,10 +57,14 @@ class KernelSmoothing(object):
         :param data:
         :return:
         """
-        l = len(data)
 
-        ndata = self.transf.apply(data)
+        if self.data is None:
+            self.data = kwargs.get('data',None)
+            self.data = self.transf.apply(self.data)
+
+        l = len(self.data)
+
         nx = self.transf.apply(x)
-        p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h
+        p = sum([self.kernel_function((nx - k)/self.h) for k in self.data]) / l*self.h
 
         return p
\ No newline at end of file
diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py
index f50df95..c6c197a 100644
--- a/pyFTS/tests/general.py
+++ b/pyFTS/tests/general.py
@@ -21,25 +21,12 @@ from pyFTS.data import Enrollments, TAIEX
 x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.15)]
 y = [np.sin(k) for k in x]
 
-from pyFTS.models import hofts
-from pyFTS.partitioners import Grid, FCM, CMeans, Entropy
-from pyFTS.benchmarks import Measures
+from pyFTS.probabilistic import ProbabilityDistribution
 
-metodos = [FCM.FCMPartitioner]
-
-k = 35
-
-rows = []
-
-for contador, metodo in enumerate(metodos):
-    print(metodo)
-    part = metodo(data=y, npart=k)
-    model = hofts.HighOrderFTS(order=2, partitioner=part)
-    model.fit(y)
-    forecasts = model.predict(y)
-    for o in range(model.order):
-        forecasts.insert(0, None)
+pd = ProbabilityDistribution.ProbabilityDistribution(type='histogram', data=y, num_bins=30)
 
+print(pd.quantile([.5]))
+print(pd.cdf)
 
 '''
 model = fts.FCM_FTS(partitioner=fs, order=1)