Evolutive optimizations

This commit is contained in:
Petrônio Cândido 2019-01-18 15:25:18 -02:00
parent 2ce04b1031
commit dd78b7e559
3 changed files with 41 additions and 19 deletions

View File

@ -8,6 +8,7 @@ import dill
import numpy as np import numpy as np
def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1): def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
if axis is None and rules_by_axis is None: if axis is None and rules_by_axis is None:
rows = 1 rows = 1

View File

@ -13,6 +13,7 @@ from pyFTS.partitioners import Grid, Entropy # , Huarng
from pyFTS.models import hofts from pyFTS.models import hofts
from pyFTS.common import Membership from pyFTS.common import Membership
from pyFTS.hyperparam import Util as hUtil from pyFTS.hyperparam import Util as hUtil
from pyFTS.distributed import dispy
# #
@ -437,37 +438,42 @@ def GeneticAlgorithm(dataset, **kwargs):
if no_improvement_count == mgen: if no_improvement_count == mgen:
break break
if collect_statistics:
return best, generation_statistics return best, statistics
else:
return best
def cluster_method(dataset, **kwargs): def cluster_method(dataset, **kwargs):
from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm
inicio = time.time() inicio = time.time()
ret = GeneticAlgorithm(dataset, **kwargs) ret, statistics = GeneticAlgorithm(dataset, **kwargs)
fim = time.time() fim = time.time()
ret['time'] = fim - inicio ret['time'] = fim - inicio
ret['size'] = ret['len_lags'] ret['size'] = ret['len_lags']
return ret return ret, statistics
def process_jobs(jobs, datasetname, conn): def process_jobs(jobs, datasetname, conn):
for job in jobs: for job in jobs:
result = job() result,statistics = job()
if job.status == dispy.DispyJob.Finished and result is not None: if job.status == dispy.DispyJob.Finished and result is not None:
print("Processing result of {}".format(result)) print("Processing result of {}".format(result))
log_result(conn, datasetname, result) log_result(conn, datasetname, result)
persist_statistics(statistics)
else: else:
print(job.exception) print(job.exception)
print(job.stdout) print(job.stdout)
def persist_statistics(statistics):
import json
with open('statistics.txt', 'w') as file:
file.write(json.dumps(statistics))
def log_result(conn, datasetname, result): def log_result(conn, datasetname, result):
metrics = ['rmse', 'size', 'time'] metrics = ['rmse', 'size', 'time']
for metric in metrics: for metric in metrics:
@ -490,8 +496,9 @@ def execute(datasetname, dataset, **kwargs):
if not distributed: if not distributed:
ret = [] ret = []
for i in range(experiments): for i in range(experiments):
result = cluster_method(dataset, **kwargs) result, statistics = cluster_method(dataset, **kwargs)
log_result(conn, datasetname, result) log_result(conn, datasetname, result)
persist_statistics(statistics)
ret.append(result) ret.append(result)
return result return result
@ -499,7 +506,7 @@ def execute(datasetname, dataset, **kwargs):
elif distributed=='dispy': elif distributed=='dispy':
nodes = kwargs.get('nodes', ['127.0.0.1']) nodes = kwargs.get('nodes', ['127.0.0.1'])
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes) cluster, http_server = dispy.start_dispy_cluster(cluster_method, nodes=nodes)
jobs = [] jobs = []
@ -511,4 +518,4 @@ def execute(datasetname, dataset, **kwargs):
process_jobs(jobs, datasetname, conn) process_jobs(jobs, datasetname, conn)
Util.stop_dispy_cluster(cluster, http_server) dispy.stop_dispy_cluster(cluster, http_server)

View File

@ -67,6 +67,7 @@ print(ret)
from pyFTS.hyperparam import Evolutionary from pyFTS.hyperparam import Evolutionary
"""
from pyFTS.data import SONDA from pyFTS.data import SONDA
data = np.array(SONDA.get_data('glo_avg')) data = np.array(SONDA.get_data('glo_avg'))
@ -77,15 +78,28 @@ dataset = data[:1000000]
del(data) del(data)
ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5, """
pmut=.3, window_size=800000, collect_statistics=True,
parameters={'distributed': 'spark',
'url': 'spark://192.168.0.106:7077'})
import json import pandas as pd
df = pd.read_csv('https://query.data.world/s/i7eb73c4rluf2luasppsyxaurx5ol7', sep=';')
dataset = df['glo_avg'].values
print(ret) from pyFTS.models import hofts
from pyFTS.partitioners import Grid
from pyFTS.benchmarks import Measures
with open('statistics.txt', 'w') as file: from time import time
file.write(json.dumps(statistics)) # use `json.loads` to do the reverse
t1 = time()
Evolutionary.execute('SONDA', dataset,
ngen=20, mgen=5, npop=15, pcruz=.5, pmut=.3,
window_size=35000, train_rate=.6, increment_rate=1,
collect_statistics=True, experiments=1)
#distributed='dispy', nodes=['192.168.0.110','192.168.0.106','192.168.0.107'])
t2 = time()
print(t2 - t1)