From dd78b7e559b7679dc988310a7c315076cbfd3ca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= <petronio.candido@gmail.com>
Date: Fri, 18 Jan 2019 15:25:18 -0200
Subject: [PATCH] Evolutive optimizations

---
 pyFTS/common/Util.py             |  1 +
 pyFTS/hyperparam/Evolutionary.py | 29 ++++++++++++++++++-----------
 pyFTS/tests/hyperparam.py        | 30 ++++++++++++++++++++++--------
 3 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/pyFTS/common/Util.py b/pyFTS/common/Util.py
index b73b0b2..71ead6a 100644
--- a/pyFTS/common/Util.py
+++ b/pyFTS/common/Util.py
@@ -8,6 +8,7 @@ import dill
 import numpy as np
 
 
+
 def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
     if axis is None and rules_by_axis is None:
         rows = 1
diff --git a/pyFTS/hyperparam/Evolutionary.py b/pyFTS/hyperparam/Evolutionary.py
index 59f4d1c..452e08e 100644
--- a/pyFTS/hyperparam/Evolutionary.py
+++ b/pyFTS/hyperparam/Evolutionary.py
@@ -13,6 +13,7 @@ from pyFTS.partitioners import Grid, Entropy  # , Huarng
 from pyFTS.models import hofts
 from pyFTS.common import Membership
 from pyFTS.hyperparam import Util as hUtil
+from pyFTS.distributed import dispy
 
 
 #
@@ -437,37 +438,42 @@ def GeneticAlgorithm(dataset, **kwargs):
         if no_improvement_count == mgen:
             break
 
-    if collect_statistics:
-        return best, generation_statistics
-    else:
-        return best
+
+    return best, statistics
 
 
 def cluster_method(dataset, **kwargs):
     from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm
 
     inicio = time.time()
-    ret = GeneticAlgorithm(dataset, **kwargs)
+    ret, statistics = GeneticAlgorithm(dataset, **kwargs)
     fim = time.time()
     ret['time'] = fim - inicio
     ret['size'] = ret['len_lags']
-    return ret
+    return ret, statistics
 
 
 def process_jobs(jobs, datasetname, conn):
     for job in jobs:
-        result = job()
+        result,statistics = job()
         if job.status == dispy.DispyJob.Finished and result is not None:
             print("Processing result of {}".format(result))
 
             log_result(conn, datasetname, result)
-                
+
+            persist_statistics(statistics)
 
         else:
             print(job.exception)
             print(job.stdout)
 
 
+def persist_statistics(statistics):
+    import json
+    with open('statistics.txt', 'w') as file:
+        file.write(json.dumps(statistics))
+
+
 def log_result(conn, datasetname, result):
     metrics = ['rmse', 'size', 'time']
     for metric in metrics:
@@ -490,8 +496,9 @@ def execute(datasetname, dataset, **kwargs):
     if not distributed:
         ret = []
         for i in range(experiments):
-            result = cluster_method(dataset, **kwargs)
+            result, statistics = cluster_method(dataset, **kwargs)
             log_result(conn, datasetname, result)
+            persist_statistics(statistics)
             ret.append(result)
 
         return result
@@ -499,7 +506,7 @@ def execute(datasetname, dataset, **kwargs):
     elif distributed=='dispy':
         nodes = kwargs.get('nodes', ['127.0.0.1'])
 
-        cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
+        cluster, http_server = dispy.start_dispy_cluster(cluster_method, nodes=nodes)
 
 
         jobs = []
@@ -511,4 +518,4 @@ def execute(datasetname, dataset, **kwargs):
 
         process_jobs(jobs, datasetname, conn)
 
-        Util.stop_dispy_cluster(cluster, http_server)
+        dispy.stop_dispy_cluster(cluster, http_server)
diff --git a/pyFTS/tests/hyperparam.py b/pyFTS/tests/hyperparam.py
index a50edb2..b9aaa11 100644
--- a/pyFTS/tests/hyperparam.py
+++ b/pyFTS/tests/hyperparam.py
@@ -67,6 +67,7 @@ print(ret)
 
 from pyFTS.hyperparam import Evolutionary
 
+"""
 from pyFTS.data import SONDA
 
 data = np.array(SONDA.get_data('glo_avg'))
@@ -77,15 +78,28 @@ dataset = data[:1000000]
 
 del(data)
 
-ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5,
-                                                pmut=.3, window_size=800000, collect_statistics=True,
-                                                parameters={'distributed': 'spark',
-                                                            'url': 'spark://192.168.0.106:7077'})
+"""
 
-import json
+import pandas as pd
+df = pd.read_csv('https://query.data.world/s/i7eb73c4rluf2luasppsyxaurx5ol7', sep=';')
+dataset = df['glo_avg'].values
 
-print(ret)
+from pyFTS.models import hofts
+from pyFTS.partitioners import Grid
+from pyFTS.benchmarks import Measures
 
-with open('statistics.txt', 'w') as file:
-     file.write(json.dumps(statistics)) # use `json.loads` to do the reverse
+from time import  time
+
+t1 = time()
+
+
+Evolutionary.execute('SONDA', dataset,
+                     ngen=20, mgen=5, npop=15, pcruz=.5, pmut=.3,
+                     window_size=35000, train_rate=.6, increment_rate=1,
+                     collect_statistics=True, experiments=1)
+                     #distributed='dispy', nodes=['192.168.0.110','192.168.0.106','192.168.0.107'])
+
+t2 = time()
+
+print(t2 - t1)