From 8f2d2c8bcd47ee61ddf0312b57ab7c7c521491cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?=
 <petronio.candido@gmail.com>
Date: Fri, 13 Jan 2017 21:42:00 -0200
Subject: [PATCH] =?UTF-8?q?Otimiza=C3=A7=C3=A3o=20de=20forecastAheadDistri?=
 =?UTF-8?q?bution=20para=20um=20m=C3=A9todo=20greedy=20mais=20confi=C3=A1v?=
 =?UTF-8?q?el?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 benchmarks/Measures.py     |   3 +
 benchmarks/benchmarks.py   |  48 ++++++---
 common/SortedCollection.py | 211 +++++++++++++++++++++++++++++++++++++
 ifts.py                    |   3 +
 partitioners/Grid.py       |   2 -
 pfts.py                    | 129 +++++++++++++++++------
 tests/__init__.py          |   0
 tests/pfts.py              |  38 +++++++
 8 files changed, 386 insertions(+), 48 deletions(-)
 create mode 100644 common/SortedCollection.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/pfts.py

diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py
index 510af99..7343403 100644
--- a/benchmarks/Measures.py
+++ b/benchmarks/Measures.py
@@ -1,3 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
 import numpy as np
 import pandas as pd
 
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index bc424a6..91df83f 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -1,14 +1,19 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
 import numpy as np
 import pandas as pd
 import matplotlib as plt
 import matplotlib.colors as pltcolors
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
-from sklearn.cross_validation import KFold
+#from sklearn.cross_validation import KFold
 from pyFTS.benchmarks import Measures
 from pyFTS.partitioners import Grid
 from pyFTS.common import Membership, FuzzySet, FLR, Transformations
+import time
 
+current_milli_time = lambda: int(round(time.time() * 1000))
 
 def getIntervalStatistics(original, models):
     ret = "Model		& RMSE		& MAPE		& Sharpness		& Resolution		& Coverage	\\ \n"
@@ -32,6 +37,15 @@ def plotDistribution(dist):
                     vmin=0, vmax=1, edgecolors=None)
 
 
+def uniquefilename(name):
+    if '.' in name:
+        tmp = name.split('.')
+        return  tmp[0] + str(current_milli_time()) + '.' + tmp[1]
+    else:
+        return name + str(current_milli_time())
+
+
+
 def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, file=None,tam=[20, 5]):
     fig = plt.figure(figsize=tam)
     ax = fig.add_subplot(111)
@@ -76,15 +90,17 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
     ax.set_xlim([0, len(original)])
 
     if save:
-        fig.savefig(file)
+        plt.show()
+        fig.savefig(uniquefilename(file))
         plt.close(fig)
 
 
-def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to, interpol=False, save=False, file=None,tam=[20, 5]):
+def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to,
+                               interpol=False, save=False, file=None,tam=[20, 5], resolution=None):
     fig = plt.figure(figsize=tam)
     ax = fig.add_subplot(111)
 
-    percentile = (max(original) - min(original))/100
+    if resolution is None: resolution = (max(original) - min(original))/100
 
     mi = []
     ma = []
@@ -92,26 +108,27 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
     count = 0
     for fts in models:
         if fts.hasDistributionForecasting and distributions[count]:
-            density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to, percentile)
+            density = fts.forecastAheadDistribution2(original[time_from - fts.order:time_from], time_to, resolution)
 
             y = density.columns
             t = len(y)
 
             # interpol between time_from and time_from+1
-            if interpol:
-                diffs = [density[q][0] / 50 for q in density]
-                for p in np.arange(0, 50):
-                    xx = [(time_from - 1) + 0.02 * p for q in np.arange(0, t)]
-                    alpha2 = np.array([diffs[q] * p for q in np.arange(0, t)]) * 100
-                    ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges',
-                               norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
+            #if interpol:
+            #    diffs = [density[q][0] / 50 for q in density]
+            #    for p in np.arange(0, 50):
+            #        xx = [(time_from - 1) + 0.02 * p for q in np.arange(0, t)]
+            #        alpha2 = np.array([diffs[q] * p for q in np.arange(0, t)]) * 100
+            #        ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges',
+            #                   norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
             for k in density.index:
                 alpha = np.array([density[q][k] for q in density]) * 100
 
                 x = [time_from  + k for x in np.arange(0, t)]
 
-                ax.scatter(x, y, c=alpha, marker='s', linewidths=0, cmap='Oranges',
-                           norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
+                for cc in np.arange(0,resolution,5):
+                    ax.scatter(x, y+cc, c=alpha, marker='s', linewidths=0, cmap='Oranges',
+                               norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
                 if interpol and k < max(density.index):
                     diffs = [(density[q][k + 1] - density[q][k])/50 for q in density]
                     for p in np.arange(0,50):
@@ -151,7 +168,8 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
     ax.set_xlim([0, len(original)])
 
     if save:
-        fig.savefig(file)
+        plt.show()
+        fig.savefig(uniquefilename(file))
         plt.close(fig)
 
 
diff --git a/common/SortedCollection.py b/common/SortedCollection.py
new file mode 100644
index 0000000..f2dd8de
--- /dev/null
+++ b/common/SortedCollection.py
@@ -0,0 +1,211 @@
+from bisect import bisect_left, bisect_right
+
+#
+# Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/
+# Author: RAYMOND HETTINGER
+
+class SortedCollection(object):
+    '''Sequence sorted by a key function.
+
+    SortedCollection() is much easier to work with than using bisect() directly.
+    It supports key functions like those use in sorted(), min(), and max().
+    The result of the key function call is saved so that keys can be searched
+    efficiently.
+
+    Instead of returning an insertion-point which can be hard to interpret, the
+    five find-methods return a specific item in the sequence. They can scan for
+    exact matches, the last item less-than-or-equal to a key, or the first item
+    greater-than-or-equal to a key.
+
+    Once found, an item's ordinal position can be located with the index() method.
+    New items can be added with the insert() and insert_right() methods.
+    Old items can be deleted with the remove() method.
+
+    The usual sequence methods are provided to support indexing, slicing,
+    length lookup, clearing, copying, forward and reverse iteration, contains
+    checking, item counts, item removal, and a nice looking repr.
+
+    Finding and indexing are O(log n) operations while iteration and insertion
+    are O(n).  The initial sort is O(n log n).
+
+    The key function is stored in the 'key' attibute for easy introspection or
+    so that you can assign a new key function (triggering an automatic re-sort).
+
+    In short, the class was designed to handle all of the common use cases for
+    bisect but with a simpler API and support for key functions.
+
+    >>> from pprint import pprint
+    >>> from operator import itemgetter
+
+    >>> s = SortedCollection(key=itemgetter(2))
+    >>> for record in [
+    ...         ('roger', 'young', 30),
+    ...         ('angela', 'jones', 28),
+    ...         ('bill', 'smith', 22),
+    ...         ('david', 'thomas', 32)]:
+    ...     s.insert(record)
+
+    >>> pprint(list(s))         # show records sorted by age
+    [('bill', 'smith', 22),
+     ('angela', 'jones', 28),
+     ('roger', 'young', 30),
+     ('david', 'thomas', 32)]
+
+    >>> s.find_le(29)           # find oldest person aged 29 or younger
+    ('angela', 'jones', 28)
+    >>> s.find_lt(28)           # find oldest person under 28
+    ('bill', 'smith', 22)
+    >>> s.find_gt(28)           # find youngest person over 28
+    ('roger', 'young', 30)
+
+    >>> r = s.find_ge(32)       # find youngest person aged 32 or older
+    >>> s.index(r)              # get the index of their record
+    3
+    >>> s[3]                    # fetch the record at that index
+    ('david', 'thomas', 32)
+
+    >>> s.key = itemgetter(0)   # now sort by first name
+    >>> pprint(list(s))
+    [('angela', 'jones', 28),
+     ('bill', 'smith', 22),
+     ('david', 'thomas', 32),
+     ('roger', 'young', 30)]
+
+    '''
+
+    def __init__(self, iterable=(), key=None):
+        self._given_key = key
+        key = (lambda x: x) if key is None else key
+        decorated = sorted((key(item), item) for item in iterable)
+        self._keys = [k for k, item in decorated]
+        self._items = [item for k, item in decorated]
+        self._key = key
+
+    def _getkey(self):
+        return self._key
+
+    def _setkey(self, key):
+        if key is not self._key:
+            self.__init__(self._items, key=key)
+
+    def _delkey(self):
+        self._setkey(None)
+
+    key = property(_getkey, _setkey, _delkey, 'key function')
+
+    def clear(self):
+        self.__init__([], self._key)
+
+    def copy(self):
+        return self.__class__(self, self._key)
+
+    def __len__(self):
+        return len(self._items)
+
+    def __getitem__(self, i):
+        return self._items[i]
+
+    def __iter__(self):
+        return iter(self._items)
+
+    def __reversed__(self):
+        return reversed(self._items)
+
+    def __repr__(self):
+        return '%s(%r, key=%s)' % (
+            self.__class__.__name__,
+            self._items,
+            getattr(self._given_key, '__name__', repr(self._given_key))
+        )
+
+    def __reduce__(self):
+        return self.__class__, (self._items, self._given_key)
+
+    def __contains__(self, item):
+        k = self._key(item)
+        i = bisect_left(self._keys, k)
+        j = bisect_right(self._keys, k)
+        return item in self._items[i:j]
+
+    def index(self, item):
+        'Find the position of an item.  Raise ValueError if not found.'
+        k = self._key(item)
+        i = bisect_left(self._keys, k)
+        j = bisect_right(self._keys, k)
+        return self._items[i:j].index(item) + i
+
+    def count(self, item):
+        'Return number of occurrences of item'
+        k = self._key(item)
+        i = bisect_left(self._keys, k)
+        j = bisect_right(self._keys, k)
+        return self._items[i:j].count(item)
+
+    def insert(self, item):
+        'Insert a new item.  If equal keys are found, add to the left'
+        k = self._key(item)
+        i = bisect_left(self._keys, k)
+        self._keys.insert(i, k)
+        self._items.insert(i, item)
+
+    def insert_right(self, item):
+        'Insert a new item.  If equal keys are found, add to the right'
+        k = self._key(item)
+        i = bisect_right(self._keys, k)
+        self._keys.insert(i, k)
+        self._items.insert(i, item)
+
+    def remove(self, item):
+        'Remove first occurence of item.  Raise ValueError if not found'
+        i = self.index(item)
+        del self._keys[i]
+        del self._items[i]
+
+    def find(self, k):
+        'Return first item with a key == k.  Raise ValueError if not found.'
+        i = bisect_left(self._keys, k)
+        if i != len(self) and self._keys[i] == k:
+            return self._items[i]
+        raise ValueError('No item found with key equal to: %r' % (k,))
+
+    def find_le(self, k):
+        'Return last item with a key <= k.  Raise ValueError if not found.'
+        i = bisect_right(self._keys, k)
+        if i:
+            return self._items[i-1]
+        raise ValueError('No item found with key at or below: %r' % (k,))
+
+    def find_lt(self, k):
+        'Return last item with a key < k.  Raise ValueError if not found.'
+        i = bisect_left(self._keys, k)
+        if i:
+            return self._items[i-1]
+        raise ValueError('No item found with key below: %r' % (k,))
+
+    def find_ge(self, k):
+        'Return first item with a key >= equal to k.  Raise ValueError if not found'
+        i = bisect_left(self._keys, k)
+        if i != len(self):
+            return self._items[i]
+        raise ValueError('No item found with key at or above: %r' % (k,))
+
+    def find_gt(self, k):
+        'Return first item with a key > k.  Raise ValueError if not found'
+        i = bisect_right(self._keys, k)
+        if i != len(self):
+            return self._items[i]
+        raise ValueError('No item found with key above: %r' % (k,))
+
+    def between(self, ge, le):
+        g = bisect_left(self._keys, ge)
+        l = bisect_right(self._keys, le)
+        if g != len(self) and l != len(self):
+            return self._items[g : l]
+        raise ValueError('No item found with key at or above: %r' % (k,))
+
+    def inside(self, ge, le):
+        g = bisect_right(self._keys, ge)
+        l = bisect_left(self._keys, le)
+        if g != len(self) and l != len(self):
+            return self._items[g : l]
+        raise ValueError('No item found with key at or above: %r' % (k,))
\ No newline at end of file
diff --git a/ifts.py b/ifts.py
index b6f98a4..bb257ae 100644
--- a/ifts.py
+++ b/ifts.py
@@ -1,3 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
 import numpy as np
 from pyFTS.common import FuzzySet,FLR
 from pyFTS import hofts, fts, tree
diff --git a/partitioners/Grid.py b/partitioners/Grid.py
index 91bd9af..70361b9 100644
--- a/partitioners/Grid.py
+++ b/partitioners/Grid.py
@@ -11,10 +11,8 @@ def GridPartitionerTrimf(data, npart, names=None, prefix="A"):
     sets = []
     dmax = max(data)
     dmax += dmax * 0.1
-    print(dmax)
     dmin = min(data)
     dmin -= dmin * 0.1
-    print(dmin)
     dlen = dmax - dmin
     partlen = math.ceil(dlen / npart)
     #p2 = partlen / 2
diff --git a/pfts.py b/pfts.py
index de95f58..40256b4 100644
--- a/pfts.py
+++ b/pfts.py
@@ -1,10 +1,15 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
 import numpy as np
 import pandas as pd
 import math
-from pyFTS.common import FuzzySet, FLR
+from operator import itemgetter
+from pyFTS.common import FuzzySet, FLR, SortedCollection
 from pyFTS import hofts, ifts, tree
 
 
+
 class ProbabilisticFLRG(hofts.HighOrderFLRG):
     def __init__(self, order):
         super(ProbabilisticFLRG, self).__init__(order)
@@ -173,9 +178,9 @@ class ProbabilisticFTS(ifts.IntervalFTS):
                 idx = np.ravel(tmp)  # flatten the array
 
                 if idx.size == 0:  # the element is out of the bounds of the Universe of Discourse
-                    if math.ceil(ndata[k]) <= self.sets[0].lower:
+                    if ndata[k] <= self.sets[0].lower:
                         idx = [0]
-                    elif math.ceil(ndata[k]) >= self.sets[-1].upper:
+                    elif ndata[k] >= self.sets[-1].upper:
                         idx = [len(self.sets) - 1]
                     else:
                         raise Exception(ndata[k])
@@ -349,53 +354,111 @@ class ProbabilisticFTS(ifts.IntervalFTS):
                 grid[sbin] = grid[sbin] + 1
         return grid
 
-    def forecastDistributionAhead2(self, data, steps, resolution):
+    def gridCountIndexed(self, grid, resolution, index, interval):
+        #print(interval)
+        for k in index.inside(interval[0],interval[1]):
+            #print(k)
+            grid[k] += 1
+        return grid
+
+    def buildTreeWithoutOrder(self, node, lags, level):
+
+        if level not in lags:
+            return
+
+        for s in lags[level]:
+            node.appendChild(tree.FLRGTreeNode(s))
+
+        for child in node.getChildren():
+            self.buildTreeWithoutOrder(child, lags, level + 1)
+
+    def forecastAheadDistribution2(self, data, steps, resolution):
 
         ret = []
 
-        intervals = self.forecastAhead(data, steps)
+        intervals = self.forecastAheadInterval(data, steps)
 
-        for k in np.arange(self.order, steps):
+        lags = {}
 
-            grid = self.getGridClean(resolution)
-            grid = self.gridCount(grid, resolution, intervals[k])
+        cc = 0
 
-            lags = {}
+        for i in intervals:
+            nq = 2 * cc
+            if nq == 0: nq = 1
+            if nq > 50: nq = 50
+            st = 50 / nq
 
-            cc = 0
-            for x in np.arange(k - self.order, k):
-                tmp = []
-                for qt in np.arange(0, 100, 5):
-                    tmp.append(intervals[x][0] + qt * (intervals[x][1] - intervals[x][0]) / 100)
-                    tmp.append(intervals[x][1] - qt * (intervals[x][1] - intervals[x][0]) / 100)
-                tmp.append(intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2)
+            quantiles = []
 
-                lags[cc] = tmp
+            for qt in np.arange(0, 50, st):
+                quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100))
+                quantiles.append(i[0] - qt * ((i[1] - i[0]) / 100))
+            quantiles.append(i[0] + ((i[1] - i[0]) / 2))
 
-                cc = cc + 1
-            # Build the tree with all possible paths
+            quantiles = list(set(quantiles))
 
-            root = tree.FLRGTreeNode(None)
+            quantiles.sort()
 
-            self.buildTree(root, lags, 0)
+            #print(quantiles)
 
-            # Trace the possible paths and build the PFLRG's
+            lags[cc] = quantiles
 
-            for p in root.paths():
-                path = list(reversed(list(filter(None.__ne__, p))))
+            cc += 1
 
-                subset = [kk for kk in path]
+        # Build the tree with all possible paths
 
-                qtle = self.forecast(subset)
-                grid = self.gridCount(grid, resolution, np.ravel(qtle))
+        root = tree.FLRGTreeNode(None)
 
-            tmp = np.array([grid[k] for k in sorted(grid)])
+        self.buildTreeWithoutOrder(root, lags, 0)
+
+        #print(root)
+
+        #return
+
+        # Trace the possible paths and build the PFLRG's
+
+        grid = self.getGridClean(resolution)
+
+        ##index = SortedCollection.SortedCollection(key=lambda (k,v): itemgetter(1)(v))
+
+        index = SortedCollection.SortedCollection(iterable=grid.keys())
+
+        grids = []
+        for k in np.arange(0, steps):
+            grids.append(self.getGridClean(resolution))
+
+        for p in root.paths():
+            path = list(reversed(list(filter(None.__ne__, p))))
+
+            #print(path)
+
+            for k in np.arange(self.order, steps + self.order):
+
+                sample = path[k - self.order : k]
+
+                #print(sample)
+
+                qtle = self.forecastInterval(sample)
+
+                #grids[k - self.order] = self.gridCountPoints(grids[k - self.order], resolution, np.ravel(qtle))
+
+                # grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, np.ravel(qtle))
+
+                grids[k - self.order] = self.gridCountIndexed(grids[k - self.order], resolution, index, np.ravel(qtle))
+
+                #return
+
+                #print(grid)
+
+        for k in np.arange(0, steps):
+            tmp = np.array([grids[k][q] for q in sorted(grids[k])])
             ret.append(tmp / sum(tmp))
 
         grid = self.getGridClean(resolution)
         df = pd.DataFrame(ret, columns=sorted(grid))
         return df
 
+
     def forecastAheadDistribution(self, data, steps, resolution):
 
         ret = []
@@ -407,14 +470,18 @@ class ProbabilisticFTS(ifts.IntervalFTS):
             grid = self.getGridClean(resolution)
             grid = self.gridCount(grid, resolution, intervals[k])
 
-            for qt in np.arange(1, 50, 2):
+            nq = 2 * k
+            if nq > 50: nq = 50
+            st = 50 / nq
+
+            for qt in np.arange(0, 50, st):
                 # print(qt)
                 qtle_lower = self.forecastInterval(
-                    [intervals[x][0] + qt * (intervals[x][1] - intervals[x][0]) / 100 for x in
+                    [intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100 ) for x in
                      np.arange(k - self.order, k)])
                 grid = self.gridCount(grid, resolution, np.ravel(qtle_lower))
                 qtle_upper = self.forecastInterval(
-                    [intervals[x][1] - qt * (intervals[x][1] - intervals[x][0]) / 100 for x in
+                    [intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100 ) for x in
                      np.arange(k - self.order, k)])
                 grid = self.gridCount(grid, resolution, np.ravel(qtle_upper))
             qtle_mid = self.forecastInterval(
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/pfts.py b/tests/pfts.py
new file mode 100644
index 0000000..38d107e
--- /dev/null
+++ b/tests/pfts.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+import os
+import numpy as np
+import pandas as pd
+import matplotlib as plt
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+import pandas as pd
+from pyFTS.partitioners import Grid
+from pyFTS.common import FLR,FuzzySet,Membership
+from pyFTS import fts
+from pyFTS import hofts
+from pyFTS import ifts
+from pyFTS import pfts
+from pyFTS import tree
+from pyFTS.benchmarks import benchmarks as bchmk
+
+
+os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")
+
+enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
+enrollments = np.array(enrollments["Enrollments"])
+
+enrollments_fs1 = Grid.GridPartitionerTrimf(enrollments,6)
+
+pfts1_enrollments = pfts.ProbabilisticFTS("1")
+pfts1_enrollments.train(enrollments,enrollments_fs1,1)
+pfts1_enrollments.shortname = "1st Order"
+pfts2_enrollments = pfts.ProbabilisticFTS("2")
+pfts2_enrollments.dump = False
+pfts2_enrollments.shortname = "2nd Order"
+pfts2_enrollments.train(enrollments,enrollments_fs1,2)
+
+
+pfts1_enrollments.forecastAheadDistribution2(enrollments[:15],5,100)