- Issue #3 - Code documentation with PEP 257 compliance

- Several bugfixes in benchmarks methods and optimizations
2017-05-09 10:27:47 -03:00 · 2017-05-09 10:27:47 -03:00 · 537e7dcfe3
commit 537e7dcfe3
parent 8df4f9c749
5 changed files with 59 additions and 8 deletions
--- a/benchmarks/Util.py
+++ b/benchmarks/Util.py
@ -141,9 +141,15 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
                print("Erro ao salvar ", k)
                print("Exceção ", ex)
        columns = point_dataframe_analytic_columns(experiments)
+    try:
        dat = pd.DataFrame(ret, columns=columns)
        if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
        return dat
+    except Exception as ex:
+        print(ex)
+        print(experiments)
+        print(columns)
+        print(ret)


 def cast_dataframe_to_sintetic_point(infile, outfile, experiments):
@ -193,9 +199,9 @@ def analytical_data_columns(experiments):
    return data_columns


-def plot_dataframe_point(file_synthetic, file_analytic, experiments):
+def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):

-    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
+    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=tam)

    axes[0].set_title('RMSE')
    axes[1].set_title('SMAPE')
@ -216,7 +222,7 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments):
    times = []
    labels = []

-    for b in bests.keys():
+    for b in sorted(bests.keys()):
        best = bests[b]
        tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
                & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
--- a/benchmarks/arima.py
+++ b/benchmarks/arima.py
@ -32,8 +32,11 @@ class ARIMA(fts.FTS):
        self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")"

        old_fit = self.model_fit
+        try:
            self.model =  stats_arima(data, order=(self.p, self.d, self.q))
            self.model_fit = self.model.fit(disp=0)
+        except:
+            self.model_fit = None

    def ar(self, data):
        return data.dot(self.model_fit.arparams)
--- a/benchmarks/distributed_benchmarks.py
+++ b/benchmarks/distributed_benchmarks.py
@ -35,6 +35,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
    from pyFTS import yu,chen,hofts,ifts,pwfts,ismailefendi,sadaei, song, cheng, hwang
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, naive, arima, quantreg
+    from pyFTS.common import Transformations

    tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
            cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
@ -46,6 +47,8 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo

    tmp3 = [Measures.get_point_statistics]

+    tmp5 = [Transformations.Differential]
+
    if mfts.benchmark_only:
        _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
    else:
--- a/common/Util.py
+++ b/common/Util.py
@ -16,6 +16,13 @@ def uniquefilename(name):


 def showAndSaveImage(fig,file,flag,lgd=None):
+    """
+    Show and image and save on file
+    :param fig: Matplotlib Figure object
+    :param file: filename to save the picture
+    :param flag: if True the image will be saved
+    :param lgd: legend
+    """
    if flag:
        plt.show()
        if lgd is not None:
@ -30,7 +37,16 @@ def enumerate2(xs, start=0, step=1):
        yield (start, x)
        start += step

+
 def sliding_window(data, windowsize, train=0.8, inc=0.1):
+    """
+    Sliding window method of cross validation for time series
+    :param data: the entire dataset
+    :param windowsize: window size
+    :param train: percentual of the window size will be used for training the models
+    :param inc: percentual of data used for slide the window
+    :return: window count, training set, test set
+    """
    l = len(data)
    ttrain = int(round(windowsize * train, 0))
    ic = int(round(windowsize * inc, 0))
@ -43,15 +59,30 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1):


 def persist_obj(obj, file):
+    """
+    Persist an object on filesystem. This function depends on Dill package
+    :param obj: object on memory
+    :param file: file name to store the object
+    """
    with open(file, 'wb') as _file:
        dill.dump(obj, _file)

+
 def load_obj(file):
+    """
+    Load to memory an object stored filesystem. This function depends on Dill package
+    :param file: file name where the object is stored
+    :return: object
+    """
    with open(file, 'rb') as _file:
        obj = dill.load(_file)
    return obj

 def persist_env(file):
+    """
+    Persist an entire environment on file. This function depends on Dill package
+    :param file: file name to store the environment
+    """
    dill.dump_session(file)

 def load_env(file):
--- a/tests/general.py
+++ b/tests/general.py
@ -82,6 +82,14 @@ bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
                     dump=True, save=True, file="experiments/taiex_point_analytic.csv",
                     nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])

+diff = Transformations.Differential(1)
+
+bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
+                     partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
+                     partitions= np.arange(10,200,step=10), transformation=diff,
+                     dump=True, save=True, file="experiments/taiex_point_analytic_diff.csv",
+                     nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])
+
 #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])

 #parallel_util.explore_partitioners(taiex,20)