Evolutive optimizations

2019-01-18 15:25:18 -02:00 · 2019-01-18 15:25:18 -02:00 · dd78b7e559
commit dd78b7e559
parent 2ce04b1031
3 changed files with 41 additions and 19 deletions
--- a/pyFTS/common/Util.py
+++ b/pyFTS/common/Util.py
@ -8,6 +8,7 @@ import dill
 import numpy as np


+
 def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
    if axis is None and rules_by_axis is None:
        rows = 1
--- a/pyFTS/hyperparam/Evolutionary.py
+++ b/pyFTS/hyperparam/Evolutionary.py
@ -13,6 +13,7 @@ from pyFTS.partitioners import Grid, Entropy  # , Huarng
 from pyFTS.models import hofts
 from pyFTS.common import Membership
 from pyFTS.hyperparam import Util as hUtil
+from pyFTS.distributed import dispy


 #
@ -437,37 +438,42 @@ def GeneticAlgorithm(dataset, **kwargs):
        if no_improvement_count == mgen:
            break

-    if collect_statistics:
-        return best, generation_statistics
-    else:
-        return best
+
+    return best, statistics


 def cluster_method(dataset, **kwargs):
    from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm

    inicio = time.time()
-    ret = GeneticAlgorithm(dataset, **kwargs)
+    ret, statistics = GeneticAlgorithm(dataset, **kwargs)
    fim = time.time()
    ret['time'] = fim - inicio
    ret['size'] = ret['len_lags']
-    return ret
+    return ret, statistics


 def process_jobs(jobs, datasetname, conn):
    for job in jobs:
-        result = job()
+        result,statistics = job()
        if job.status == dispy.DispyJob.Finished and result is not None:
            print("Processing result of {}".format(result))

            log_result(conn, datasetname, result)

+            persist_statistics(statistics)

        else:
            print(job.exception)
            print(job.stdout)


+def persist_statistics(statistics):
+    import json
+    with open('statistics.txt', 'w') as file:
+        file.write(json.dumps(statistics))
+
+
 def log_result(conn, datasetname, result):
    metrics = ['rmse', 'size', 'time']
    for metric in metrics:
@ -490,8 +496,9 @@ def execute(datasetname, dataset, **kwargs):
    if not distributed:
        ret = []
        for i in range(experiments):
-            result = cluster_method(dataset, **kwargs)
+            result, statistics = cluster_method(dataset, **kwargs)
            log_result(conn, datasetname, result)
+            persist_statistics(statistics)
            ret.append(result)

        return result
@ -499,7 +506,7 @@ def execute(datasetname, dataset, **kwargs):
    elif distributed=='dispy':
        nodes = kwargs.get('nodes', ['127.0.0.1'])

-        cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
+        cluster, http_server = dispy.start_dispy_cluster(cluster_method, nodes=nodes)


        jobs = []
@ -511,4 +518,4 @@ def execute(datasetname, dataset, **kwargs):

        process_jobs(jobs, datasetname, conn)

-        Util.stop_dispy_cluster(cluster, http_server)
+        dispy.stop_dispy_cluster(cluster, http_server)
--- a/pyFTS/tests/hyperparam.py
+++ b/pyFTS/tests/hyperparam.py
@ -67,6 +67,7 @@ print(ret)

 from pyFTS.hyperparam import Evolutionary

+"""
 from pyFTS.data import SONDA

 data = np.array(SONDA.get_data('glo_avg'))
@ -77,15 +78,28 @@ dataset = data[:1000000]

 del(data)

-ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5,
-                                                pmut=.3, window_size=800000, collect_statistics=True,
-                                                parameters={'distributed': 'spark',
-                                                            'url': 'spark://192.168.0.106:7077'})
+"""

-import json
+import pandas as pd
+df = pd.read_csv('https://query.data.world/s/i7eb73c4rluf2luasppsyxaurx5ol7', sep=';')
+dataset = df['glo_avg'].values

-print(ret)
+from pyFTS.models import hofts
+from pyFTS.partitioners import Grid
+from pyFTS.benchmarks import Measures

-with open('statistics.txt', 'w') as file:
-     file.write(json.dumps(statistics)) # use `json.loads` to do the reverse
+from time import  time
+
+t1 = time()
+
+
+Evolutionary.execute('SONDA', dataset,
+                     ngen=20, mgen=5, npop=15, pcruz=.5, pmut=.3,
+                     window_size=35000, train_rate=.6, increment_rate=1,
+                     collect_statistics=True, experiments=1)
+                     #distributed='dispy', nodes=['192.168.0.110','192.168.0.106','192.168.0.107'])
+
+t2 = time()
+
+print(t2 - t1)