Several bugfixes and optimizations

2017-02-01 14:54:37 -02:00 · 2017-02-01 14:54:37 -02:00 · 71c71ca9d9
commit 71c71ca9d9
parent 0cf938c2a6
6 changed files with 194 additions and 44 deletions
--- a/benchmarks/Measures.py
+++ b/benchmarks/Measures.py
@ -109,3 +109,38 @@ def coverage(targets, forecasts):
            preds.append(0)
    return np.mean(preds)

+
+def pmf_to_cdf(density):
+    ret = []
+    for row in density.index:
+        tmp = []
+        prev = 0
+        for col in density.columns:
+            prev += density[col][row]
+            tmp.append( prev )
+        ret.append(tmp)
+    df = pd.DataFrame(ret, columns=density.columns)
+    return df
+
+
+def heavyside_cdf(bins, targets):
+    ret = []
+    for t in targets:
+        result = [1 if b >= t else 0 for b in bins]
+        ret.append(result)
+    df = pd.DataFrame(ret, columns=bins)
+    return df
+
+
+# Continuous Ranked Probability Score
+def crps(targets, densities):
+    l = len(densities.columns)
+    n = len(densities.index)
+    Ff = pmf_to_cdf(densities)
+    Fa = heavyside_cdf(densities.columns, targets)
+
+    _crps = float(0.0)
+    for k in densities.index:
+        _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
+
+    return _crps / float(l * n)
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@ -5,6 +5,7 @@ import numpy as np
 import pandas as pd
 import matplotlib as plt
 import matplotlib.colors as pltcolors
+import matplotlib.cm as cmx
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 # from sklearn.cross_validation import KFold
@ -201,12 +202,71 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,

    Util.showAndSaveImage(fig, file, save, lgd=legends)

+def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
+                           models=None, transformation=None, option=2):
+    if models is None:
+        models = [pfts.ProbabilisticFTS]
+
+    if resolution is None: resolution = (max(data_train) - min(data_train)) / 100
+
+    objs = []
+
+    if transformation is not None:
+        data_train_fs = Grid.GridPartitionerTrimf(transformation.apply(data_train),partitions)
+    else:
+        data_train_fs = Grid.GridPartitionerTrimf(data_train, partitions)
+
+    lcolors = []
+
+    for count, model in Util.enumerate2(models, start=0, step=2):
+        mfts = model("")
+        if not mfts.isHighOrder:
+            if transformation is not None:
+                mfts.appendTransformation(transformation)
+            mfts.train(data_train, data_train_fs)
+            objs.append(mfts)
+            lcolors.append( colors[count % ncol] )
+        else:
+            for order in np.arange(1,max_order+1):
+                if order >= mfts.minOrder:
+                    mfts = model(" n = " + str(order))
+                    if transformation is not None:
+                        mfts.appendTransformation(transformation)
+                    mfts.train(data_train, data_train_fs, order=order)
+                    objs.append(mfts)
+                    lcolors.append(colors[count % ncol])
+
+    distributions = [False for k in objs]
+
+    distributions[0] = True
+
+    print(getDistributionStatistics(data_test[start:], objs, steps, resolution))
+
+    #plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=, save=save, file=file, tam=tam,  intervals=True)
+
+
+def getDistributionStatistics(original, models, steps, resolution):
+    ret = "Model	& Order     &  Interval & Distribution	\\\\ \n"
+    for fts in models:
+        densities1 = fts.forecastAheadDistribution(original,steps,resolution, parameters=3)
+        densities2 = fts.forecastAheadDistribution(original, steps, resolution, parameters=2)
+        ret += fts.shortname + "		& "
+        ret += str(fts.order) + "		& "
+        ret += str(round(Measures.crps(original, densities1), 3)) + "		& "
+        ret += str(round(Measures.crps(original, densities2), 3)) + "	\\\\ \n"
+    return ret
+

 def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to,
-                               interpol=False, save=False, file=None, tam=[20, 5], resolution=None):
+                               interpol=False, save=False, file=None, tam=[20, 5], resolution=None,
+                               cmap='Blues',option=2):
    fig = plt.figure(figsize=tam)
    ax = fig.add_subplot(111)

+    cm = plt.get_cmap(cmap)
+    cNorm = pltcolors.Normalize(vmin=0, vmax=1)
+    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
+
    if resolution is None: resolution = (max(original) - min(original)) / 100

    mi = []
@ -215,26 +275,44 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
    for count, fts in enumerate(models, start=0):
        if fts.hasDistributionForecasting and distributions[count]:
            density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from],
-                                                    time_to, resolution, parameters=True)
+                                                    time_to, resolution, parameters=option)

+            Y = []
+            X = []
+            C = []
+            S = []
            y = density.columns
            t = len(y)

+            ss = time_to ** 2
+
            for k in density.index:
-                alpha = np.array([density[q][k] for q in density]) * 100
+                #alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns]
+                col = [density[col][k]*5 for col in density.columns]

                x = [time_from + k for x in np.arange(0, t)]

-                for cc in np.arange(0, resolution, 5):
-                    ax.scatter(x, y + cc, c=alpha, marker='s', linewidths=0, cmap='Oranges', edgecolors=None)
-                if interpol and k < max(density.index):
-                    diffs = [(density[q][k + 1] - density[q][k]) / 50 for q in density]
-                    for p in np.arange(0, 50):
-                        xx = [time_from + k + 0.02 * p for q in np.arange(0, t)]
-                        alpha2 = np.array(
-                            [density[density.columns[q]][k] + diffs[q] * p for q in np.arange(0, t)]) * 100
-                        ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges',
-                                   norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
+                s = [ss for x in np.arange(0, t)]
+
+                ic = resolution/10
+
+                for cc in np.arange(0, resolution, ic):
+                    Y.append(y + cc)
+                    X.append(x)
+                    C.append(col)
+                    S.append(s)
+
+            Y = np.hstack(Y)
+            X = np.hstack(X)
+            C = np.hstack(C)
+            S = np.hstack(S)
+
+            s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap)
+            s.set_clim([0, 1])
+            cb = fig.colorbar(s)
+
+            cb.set_label('Density')
+

        if fts.hasIntervalForecasting:
            forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
@ -276,6 +354,8 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
    ax.set_xlabel('T')
    ax.set_xlim([0, len(original)])

+    #plt.colorbar()
+
    Util.showAndSaveImage(fig, file, save)


--- a/fts.py
+++ b/fts.py
@ -63,7 +63,6 @@ class FTS(object):

    def doTransformations(self,data,params=None,updateUoD=False):
        ndata = data
-
        if updateUoD:
            if min(data) < 0:
                self.original_min = min(data) * 1.1
--- a/partitioners/Grid.py
+++ b/partitioners/Grid.py
@ -9,18 +9,20 @@ from pyFTS.common import FuzzySet, Membership

 def GridPartitionerTrimf(data, npart, names=None, prefix="A"):
    sets = []
-    if min(data) < 0:
-        dmin = min(data) * 1.1
+    _min = min(data)
+    if _min < 0:
+        dmin = _min * 1.1
    else:
-        dmin = min(data) * 0.9
+        dmin = _min * 0.9

-    if max(data) > 0:
-        dmax = max(data) * 1.1
+    _max = max(data)
+    if _max > 0:
+        dmax = _max * 1.1
    else:
-        dmax = max(data) * 0.9
+        dmax = _max * 0.9

    dlen = dmax - dmin
-    partlen = math.ceil(dlen / npart)
+    partlen = dlen / npart

    count = 0
    for c in np.arange(dmin, dmax, partlen):
--- a/pfts.py
+++ b/pfts.py
@ -243,15 +243,15 @@ class ProbabilisticFTS(ifts.IntervalFTS):
                    idx = np.ravel(tmp)  # flatten the array

                    if idx.size == 0:  # the element is out of the bounds of the Universe of Discourse
-                        if instance <= self.sets[0].lower:
+                        if instance <= np.ceil(self.sets[0].lower):
                            idx = [0]
-                        elif instance >= self.sets[-1].upper:
+                        elif instance >= np.floor(self.sets[-1].upper):
                            idx = [len(self.sets) - 1]
                        else:
                            raise Exception(instance)

                    lags[count] = idx
-                    count = count + 1
+                    count += 1

                # Build the tree with all possible paths

@ -331,11 +331,16 @@ class ProbabilisticFTS(ifts.IntervalFTS):
        return ret

    def forecastAheadInterval(self, data, steps):
-        ret = [[data[k], data[k]] for k in np.arange(len(data) - self.order, len(data))]
+
+        l = len(data)
+
+        ret = [[data[k], data[k]] for k in np.arange(l - self.order, l)]

        for k in np.arange(self.order, steps+self.order):

-            if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper:
+            if (len(self.transformations) > 0 and ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[
+                -1].upper) or (len(self.transformations) == 0 and ret[-1][0] <= self.original_min and ret[-1][
+                1] >= self.original_max):
                ret.append(ret[-1])
            else:
                lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)])
@ -384,7 +389,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
        for child in node.getChildren():
            self.buildTreeWithoutOrder(child, lags, level + 1)

-    def forecastAheadDistribution(self, data, steps, resolution, parameters=None):
+    def forecastAheadDistribution(self, data, steps, resolution, parameters=2):

        ret = []

@ -394,7 +399,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):

        index = SortedCollection.SortedCollection(iterable=grid.keys())

-        if parameters is None:
+        if parameters == 1:

            grids = []
            for k in np.arange(0, steps):
@ -442,12 +447,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
                tmp = np.array([grids[k][q] for q in sorted(grids[k])])
                ret.append(tmp / sum(tmp))

-            grid = self.getGridClean(resolution)
-            df = pd.DataFrame(ret, columns=sorted(grid))
-            return df
-        else:
-
-            print("novo")
+        elif parameters == 2:

            ret = []

@ -474,9 +474,20 @@ class ProbabilisticFTS(ifts.IntervalFTS):

                ret.append(tmp / sum(tmp))

-            grid = self.getGridClean(resolution)
-            df = pd.DataFrame(ret, columns=sorted(grid))
-            return df
+        else:
+            ret = []
+
+            for k in np.arange(self.order, steps + self.order):
+                grid = self.getGridClean(resolution)
+                grid = self.gridCount(grid, resolution, index, intervals[k])
+
+                tmp = np.array([grid[k] for k in sorted(grid)])
+
+                ret.append(tmp / sum(tmp))
+
+        grid = self.getGridClean(resolution)
+        df = pd.DataFrame(ret, columns=sorted(grid))
+        return df


    def __str__(self):
--- a/tests/pfts.py
+++ b/tests/pfts.py
@ -13,23 +13,46 @@ from pyFTS.partitioners import Grid
 from pyFTS.common import FLR,FuzzySet,Membership,Transformations
 from pyFTS import fts,hofts,ifts,pfts,tree, chen
 from pyFTS.benchmarks import benchmarks as bchmk
+from pyFTS.benchmarks import Measures
+from numpy import random
+
+gauss_treino = random.normal(0,1.0,1600)
+gauss_teste = random.normal(0,1.0,400)


 os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")

-enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
-enrollments = np.array(enrollments["Enrollments"])
+#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
+#enrollments = np.array(enrollments["Enrollments"])

-#diff = Transformations.Differential(1)
+#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
+#taiex_treino = np.array(taiex["avg"][2500:3900])
+#taiex_teste = np.array(taiex["avg"][3901:4500])

-fs = Grid.GridPartitionerTrimf(enrollments,6)
+#nasdaq = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
+#nasdaq_treino = np.array(nasdaq["avg"][0:1600])
+#nasdaq_teste = np.array(nasdaq["avg"][1601:2000])
+
+diff = Transformations.Differential(1)
+
+fs = Grid.GridPartitionerTrimf(gauss_treino,7)

 #tmp = chen.ConventionalFTS("")

 pfts1 = pfts.ProbabilisticFTS("1")
 #pfts1.appendTransformation(diff)
-pfts1.train(enrollments,fs,1)
+pfts1.train(gauss_treino,fs,1)
+pfts2 = pfts.ProbabilisticFTS("n = 2")
+#pfts2.appendTransformation(diff)
+pfts2.train(gauss_treino,fs,2)
+
+pfts3 = pfts.ProbabilisticFTS("n = 3")
+#pfts3.appendTransformation(diff)
+pfts3.train(gauss_treino,fs,3)
+
+densities1 = pfts1.forecastAheadDistribution(gauss_teste[:50],2,1.50, parameters=2)
+
+#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
+

-#bchmk.plotComparedIntervalsAhead(enrollments,[pfts1], ["blue"],[True],5,10)

-pfts1.forecastAheadDistribution(enrollments,5,1, parameters=True)