From 537e7dcfe3991a4c16bf510cfb52c9bb608d7d5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?=
 <petronio.candido@gmail.com>
Date: Tue, 9 May 2017 10:27:47 -0300
Subject: [PATCH]  - Issue #3 - Code documentation with PEP 257 compliance  -
 Several bugfixes in benchmarks methods and optimizations

---
 benchmarks/Util.py                   | 18 ++++++++++------
 benchmarks/arima.py                  |  7 +++++--
 benchmarks/distributed_benchmarks.py |  3 +++
 common/Util.py                       | 31 ++++++++++++++++++++++++++++
 tests/general.py                     |  8 +++++++
 5 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/benchmarks/Util.py b/benchmarks/Util.py
index 9bead11..5e2f1fe 100644
--- a/benchmarks/Util.py
+++ b/benchmarks/Util.py
@@ -141,9 +141,15 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
                 print("Erro ao salvar ", k)
                 print("Exceção ", ex)
         columns = point_dataframe_analytic_columns(experiments)
-    dat = pd.DataFrame(ret, columns=columns)
-    if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
-    return dat
+    try:
+        dat = pd.DataFrame(ret, columns=columns)
+        if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
+        return dat
+    except Exception as ex:
+        print(ex)
+        print(experiments)
+        print(columns)
+        print(ret)
 
 
 def cast_dataframe_to_sintetic_point(infile, outfile, experiments):
@@ -193,9 +199,9 @@ def analytical_data_columns(experiments):
     return data_columns
 
 
-def plot_dataframe_point(file_synthetic, file_analytic, experiments):
+def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
 
-    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
+    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=tam)
 
     axes[0].set_title('RMSE')
     axes[1].set_title('SMAPE')
@@ -216,7 +222,7 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments):
     times = []
     labels = []
 
-    for b in bests.keys():
+    for b in sorted(bests.keys()):
         best = bests[b]
         tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
                 & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
diff --git a/benchmarks/arima.py b/benchmarks/arima.py
index 437f642..f61f6f0 100644
--- a/benchmarks/arima.py
+++ b/benchmarks/arima.py
@@ -32,8 +32,11 @@ class ARIMA(fts.FTS):
         self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")"
 
         old_fit = self.model_fit
-        self.model =  stats_arima(data, order=(self.p, self.d, self.q))
-        self.model_fit = self.model.fit(disp=0)
+        try:
+            self.model =  stats_arima(data, order=(self.p, self.d, self.q))
+            self.model_fit = self.model.fit(disp=0)
+        except:
+            self.model_fit = None
 
     def ar(self, data):
         return data.dot(self.model_fit.arparams)
diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py
index 988a1bd..d358405 100644
--- a/benchmarks/distributed_benchmarks.py
+++ b/benchmarks/distributed_benchmarks.py
@@ -35,6 +35,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
     from pyFTS import yu,chen,hofts,ifts,pwfts,ismailefendi,sadaei, song, cheng, hwang
     from pyFTS.partitioners import Grid, Entropy, FCM
     from pyFTS.benchmarks import Measures, naive, arima, quantreg
+    from pyFTS.common import Transformations
 
     tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
             cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
@@ -46,6 +47,8 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
 
     tmp3 = [Measures.get_point_statistics]
 
+    tmp5 = [Transformations.Differential]
+
     if mfts.benchmark_only:
         _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
     else:
diff --git a/common/Util.py b/common/Util.py
index aebce10..af71d9f 100644
--- a/common/Util.py
+++ b/common/Util.py
@@ -16,6 +16,13 @@ def uniquefilename(name):
 
 
 def showAndSaveImage(fig,file,flag,lgd=None):
+    """
+    Show and image and save on file
+    :param fig: Matplotlib Figure object
+    :param file: filename to save the picture
+    :param flag: if True the image will be saved
+    :param lgd: legend
+    """
     if flag:
         plt.show()
         if lgd is not None:
@@ -30,7 +37,16 @@ def enumerate2(xs, start=0, step=1):
         yield (start, x)
         start += step
 
+
 def sliding_window(data, windowsize, train=0.8, inc=0.1):
+    """
+    Sliding window method of cross validation for time series
+    :param data: the entire dataset
+    :param windowsize: window size
+    :param train: percentual of the window size will be used for training the models
+    :param inc: percentual of data used for slide the window
+    :return: window count, training set, test set
+    """
     l = len(data)
     ttrain = int(round(windowsize * train, 0))
     ic = int(round(windowsize * inc, 0))
@@ -43,15 +59,30 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1):
 
 
 def persist_obj(obj, file):
+    """
+    Persist an object on filesystem. This function depends on Dill package
+    :param obj: object on memory
+    :param file: file name to store the object
+    """
     with open(file, 'wb') as _file:
         dill.dump(obj, _file)
 
+
 def load_obj(file):
+    """
+    Load to memory an object stored filesystem. This function depends on Dill package
+    :param file: file name where the object is stored
+    :return: object
+    """
     with open(file, 'rb') as _file:
         obj = dill.load(_file)
     return obj
 
 def persist_env(file):
+    """
+    Persist an entire environment on file. This function depends on Dill package
+    :param file: file name to store the environment
+    """
     dill.dump_session(file)
 
 def load_env(file):
diff --git a/tests/general.py b/tests/general.py
index 3881cf1..6e79d5d 100644
--- a/tests/general.py
+++ b/tests/general.py
@@ -82,6 +82,14 @@ bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
                      dump=True, save=True, file="experiments/taiex_point_analytic.csv",
                      nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])
 
+diff = Transformations.Differential(1)
+
+bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
+                     partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
+                     partitions= np.arange(10,200,step=10), transformation=diff,
+                     dump=True, save=True, file="experiments/taiex_point_analytic_diff.csv",
+                     nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])
+
 #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])
 
 #parallel_util.explore_partitioners(taiex,20)