From dd78b7e559b7679dc988310a7c315076cbfd3ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Fri, 18 Jan 2019 15:25:18 -0200 Subject: [PATCH] Evolutive optimizations --- pyFTS/common/Util.py | 1 + pyFTS/hyperparam/Evolutionary.py | 29 ++++++++++++++++++----------- pyFTS/tests/hyperparam.py | 30 ++++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/pyFTS/common/Util.py b/pyFTS/common/Util.py index b73b0b2..71ead6a 100644 --- a/pyFTS/common/Util.py +++ b/pyFTS/common/Util.py @@ -8,6 +8,7 @@ import dill import numpy as np + def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1): if axis is None and rules_by_axis is None: rows = 1 diff --git a/pyFTS/hyperparam/Evolutionary.py b/pyFTS/hyperparam/Evolutionary.py index 59f4d1c..452e08e 100644 --- a/pyFTS/hyperparam/Evolutionary.py +++ b/pyFTS/hyperparam/Evolutionary.py @@ -13,6 +13,7 @@ from pyFTS.partitioners import Grid, Entropy # , Huarng from pyFTS.models import hofts from pyFTS.common import Membership from pyFTS.hyperparam import Util as hUtil +from pyFTS.distributed import dispy # @@ -437,37 +438,42 @@ def GeneticAlgorithm(dataset, **kwargs): if no_improvement_count == mgen: break - if collect_statistics: - return best, generation_statistics - else: - return best + + return best, statistics def cluster_method(dataset, **kwargs): from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm inicio = time.time() - ret = GeneticAlgorithm(dataset, **kwargs) + ret, statistics = GeneticAlgorithm(dataset, **kwargs) fim = time.time() ret['time'] = fim - inicio ret['size'] = ret['len_lags'] - return ret + return ret, statistics def process_jobs(jobs, datasetname, conn): for job in jobs: - result = job() + result,statistics = job() if job.status == dispy.DispyJob.Finished and result is not None: print("Processing result of {}".format(result)) log_result(conn, datasetname, result) - + + persist_statistics(statistics) else: print(job.exception) print(job.stdout) +def persist_statistics(statistics): + import json + with open('statistics.txt', 'w') as file: + file.write(json.dumps(statistics)) + + def log_result(conn, datasetname, result): metrics = ['rmse', 'size', 'time'] for metric in metrics: @@ -490,8 +496,9 @@ def execute(datasetname, dataset, **kwargs): if not distributed: ret = [] for i in range(experiments): - result = cluster_method(dataset, **kwargs) + result, statistics = cluster_method(dataset, **kwargs) log_result(conn, datasetname, result) + persist_statistics(statistics) ret.append(result) return result @@ -499,7 +506,7 @@ def execute(datasetname, dataset, **kwargs): elif distributed=='dispy': nodes = kwargs.get('nodes', ['127.0.0.1']) - cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes) + cluster, http_server = dispy.start_dispy_cluster(cluster_method, nodes=nodes) jobs = [] @@ -511,4 +518,4 @@ def execute(datasetname, dataset, **kwargs): process_jobs(jobs, datasetname, conn) - Util.stop_dispy_cluster(cluster, http_server) + dispy.stop_dispy_cluster(cluster, http_server) diff --git a/pyFTS/tests/hyperparam.py b/pyFTS/tests/hyperparam.py index a50edb2..b9aaa11 100644 --- a/pyFTS/tests/hyperparam.py +++ b/pyFTS/tests/hyperparam.py @@ -67,6 +67,7 @@ print(ret) from pyFTS.hyperparam import Evolutionary +""" from pyFTS.data import SONDA data = np.array(SONDA.get_data('glo_avg')) @@ -77,15 +78,28 @@ dataset = data[:1000000] del(data) -ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5, - pmut=.3, window_size=800000, collect_statistics=True, - parameters={'distributed': 'spark', - 'url': 'spark://192.168.0.106:7077'}) +""" -import json +import pandas as pd +df = pd.read_csv('https://query.data.world/s/i7eb73c4rluf2luasppsyxaurx5ol7', sep=';') +dataset = df['glo_avg'].values -print(ret) +from pyFTS.models import hofts +from pyFTS.partitioners import Grid +from pyFTS.benchmarks import Measures -with open('statistics.txt', 'w') as file: - file.write(json.dumps(statistics)) # use `json.loads` to do the reverse +from time import time + +t1 = time() + + +Evolutionary.execute('SONDA', dataset, + ngen=20, mgen=5, npop=15, pcruz=.5, pmut=.3, + window_size=35000, train_rate=.6, increment_rate=1, + collect_statistics=True, experiments=1) + #distributed='dispy', nodes=['192.168.0.110','192.168.0.106','192.168.0.107']) + +t2 = time() + +print(t2 - t1)