Evolutive optimizations

This commit is contained in:
Petrônio Cândido 2019-01-18 15:25:18 -02:00
parent 2ce04b1031
commit dd78b7e559
3 changed files with 41 additions and 19 deletions

View File

@ -8,6 +8,7 @@ import dill
import numpy as np
def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
if axis is None and rules_by_axis is None:
rows = 1

View File

@ -13,6 +13,7 @@ from pyFTS.partitioners import Grid, Entropy # , Huarng
from pyFTS.models import hofts
from pyFTS.common import Membership
from pyFTS.hyperparam import Util as hUtil
from pyFTS.distributed import dispy
#
@ -437,37 +438,42 @@ def GeneticAlgorithm(dataset, **kwargs):
if no_improvement_count == mgen:
break
if collect_statistics:
return best, generation_statistics
else:
return best
return best, statistics
def cluster_method(dataset, **kwargs):
from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm
inicio = time.time()
ret = GeneticAlgorithm(dataset, **kwargs)
ret, statistics = GeneticAlgorithm(dataset, **kwargs)
fim = time.time()
ret['time'] = fim - inicio
ret['size'] = ret['len_lags']
return ret
return ret, statistics
def process_jobs(jobs, datasetname, conn):
for job in jobs:
result = job()
result,statistics = job()
if job.status == dispy.DispyJob.Finished and result is not None:
print("Processing result of {}".format(result))
log_result(conn, datasetname, result)
persist_statistics(statistics)
else:
print(job.exception)
print(job.stdout)
def persist_statistics(statistics):
import json
with open('statistics.txt', 'w') as file:
file.write(json.dumps(statistics))
def log_result(conn, datasetname, result):
metrics = ['rmse', 'size', 'time']
for metric in metrics:
@ -490,8 +496,9 @@ def execute(datasetname, dataset, **kwargs):
if not distributed:
ret = []
for i in range(experiments):
result = cluster_method(dataset, **kwargs)
result, statistics = cluster_method(dataset, **kwargs)
log_result(conn, datasetname, result)
persist_statistics(statistics)
ret.append(result)
return result
@ -499,7 +506,7 @@ def execute(datasetname, dataset, **kwargs):
elif distributed=='dispy':
nodes = kwargs.get('nodes', ['127.0.0.1'])
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
cluster, http_server = dispy.start_dispy_cluster(cluster_method, nodes=nodes)
jobs = []
@ -511,4 +518,4 @@ def execute(datasetname, dataset, **kwargs):
process_jobs(jobs, datasetname, conn)
Util.stop_dispy_cluster(cluster, http_server)
dispy.stop_dispy_cluster(cluster, http_server)

View File

@ -67,6 +67,7 @@ print(ret)
from pyFTS.hyperparam import Evolutionary
"""
from pyFTS.data import SONDA
data = np.array(SONDA.get_data('glo_avg'))
@ -77,15 +78,28 @@ dataset = data[:1000000]
del(data)
ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5,
pmut=.3, window_size=800000, collect_statistics=True,
parameters={'distributed': 'spark',
'url': 'spark://192.168.0.106:7077'})
"""
import json
import pandas as pd
df = pd.read_csv('https://query.data.world/s/i7eb73c4rluf2luasppsyxaurx5ol7', sep=';')
dataset = df['glo_avg'].values
print(ret)
from pyFTS.models import hofts
from pyFTS.partitioners import Grid
from pyFTS.benchmarks import Measures
with open('statistics.txt', 'w') as file:
file.write(json.dumps(statistics)) # use `json.loads` to do the reverse
from time import time
t1 = time()
Evolutionary.execute('SONDA', dataset,
ngen=20, mgen=5, npop=15, pcruz=.5, pmut=.3,
window_size=35000, train_rate=.6, increment_rate=1,
collect_statistics=True, experiments=1)
#distributed='dispy', nodes=['192.168.0.110','192.168.0.106','192.168.0.107'])
t2 = time()
print(t2 - t1)