From 537e7dcfe3991a4c16bf510cfb52c9bb608d7d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Tue, 9 May 2017 10:27:47 -0300 Subject: [PATCH] - Issue #3 - Code documentation with PEP 257 compliance - Several bugfixes in benchmarks methods and optimizations --- benchmarks/Util.py | 18 ++++++++++------ benchmarks/arima.py | 7 +++++-- benchmarks/distributed_benchmarks.py | 3 +++ common/Util.py | 31 ++++++++++++++++++++++++++++ tests/general.py | 8 +++++++ 5 files changed, 59 insertions(+), 8 deletions(-) diff --git a/benchmarks/Util.py b/benchmarks/Util.py index 9bead11..5e2f1fe 100644 --- a/benchmarks/Util.py +++ b/benchmarks/Util.py @@ -141,9 +141,15 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t print("Erro ao salvar ", k) print("Exceção ", ex) columns = point_dataframe_analytic_columns(experiments) - dat = pd.DataFrame(ret, columns=columns) - if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False) - return dat + try: + dat = pd.DataFrame(ret, columns=columns) + if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False) + return dat + except Exception as ex: + print(ex) + print(experiments) + print(columns) + print(ret) def cast_dataframe_to_sintetic_point(infile, outfile, experiments): @@ -193,9 +199,9 @@ def analytical_data_columns(experiments): return data_columns -def plot_dataframe_point(file_synthetic, file_analytic, experiments): +def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam): - fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8]) + fig, axes = plt.subplots(nrows=4, ncols=1, figsize=tam) axes[0].set_title('RMSE') axes[1].set_title('SMAPE') @@ -216,7 +222,7 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments): times = [] labels = [] - for b in bests.keys(): + for b in sorted(bests.keys()): best = bests[b] tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] diff --git a/benchmarks/arima.py b/benchmarks/arima.py index 437f642..f61f6f0 100644 --- a/benchmarks/arima.py +++ b/benchmarks/arima.py @@ -32,8 +32,11 @@ class ARIMA(fts.FTS): self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")" old_fit = self.model_fit - self.model = stats_arima(data, order=(self.p, self.d, self.q)) - self.model_fit = self.model.fit(disp=0) + try: + self.model = stats_arima(data, order=(self.p, self.d, self.q)) + self.model_fit = self.model.fit(disp=0) + except: + self.model_fit = None def ar(self, data): return data.dot(self.model_fit.arparams) diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py index 988a1bd..d358405 100644 --- a/benchmarks/distributed_benchmarks.py +++ b/benchmarks/distributed_benchmarks.py @@ -35,6 +35,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo from pyFTS import yu,chen,hofts,ifts,pwfts,ismailefendi,sadaei, song, cheng, hwang from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.benchmarks import Measures, naive, arima, quantreg + from pyFTS.common import Transformations tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS, @@ -46,6 +47,8 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo tmp3 = [Measures.get_point_statistics] + tmp5 = [Transformations.Differential] + if mfts.benchmark_only: _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") else: diff --git a/common/Util.py b/common/Util.py index aebce10..af71d9f 100644 --- a/common/Util.py +++ b/common/Util.py @@ -16,6 +16,13 @@ def uniquefilename(name): def showAndSaveImage(fig,file,flag,lgd=None): + """ + Show and image and save on file + :param fig: Matplotlib Figure object + :param file: filename to save the picture + :param flag: if True the image will be saved + :param lgd: legend + """ if flag: plt.show() if lgd is not None: @@ -30,7 +37,16 @@ def enumerate2(xs, start=0, step=1): yield (start, x) start += step + def sliding_window(data, windowsize, train=0.8, inc=0.1): + """ + Sliding window method of cross validation for time series + :param data: the entire dataset + :param windowsize: window size + :param train: percentual of the window size will be used for training the models + :param inc: percentual of data used for slide the window + :return: window count, training set, test set + """ l = len(data) ttrain = int(round(windowsize * train, 0)) ic = int(round(windowsize * inc, 0)) @@ -43,15 +59,30 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1): def persist_obj(obj, file): + """ + Persist an object on filesystem. This function depends on Dill package + :param obj: object on memory + :param file: file name to store the object + """ with open(file, 'wb') as _file: dill.dump(obj, _file) + def load_obj(file): + """ + Load to memory an object stored filesystem. This function depends on Dill package + :param file: file name where the object is stored + :return: object + """ with open(file, 'rb') as _file: obj = dill.load(_file) return obj def persist_env(file): + """ + Persist an entire environment on file. This function depends on Dill package + :param file: file name to store the environment + """ dill.dump_session(file) def load_env(file): diff --git a/tests/general.py b/tests/general.py index 3881cf1..6e79d5d 100644 --- a/tests/general.py +++ b/tests/general.py @@ -82,6 +82,14 @@ bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # dump=True, save=True, file="experiments/taiex_point_analytic.csv", nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts]) +diff = Transformations.Differential(1) + +bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # + partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], + partitions= np.arange(10,200,step=10), transformation=diff, + dump=True, save=True, file="experiments/taiex_point_analytic_diff.csv", + nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts]) + #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101']) #parallel_util.explore_partitioners(taiex,20)