GridSearch improvements

This commit is contained in:
Petrônio Cândido 2018-11-14 00:42:59 -02:00
parent 2db3b0311e
commit edceece6e2
2 changed files with 56 additions and 39 deletions

View File

@ -42,7 +42,7 @@ def metodo_cluster(individual, train, test):
partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf) partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf)
model = hofts.HighOrderFTS(partitioner=partitioner, model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
lags=individual['lags'], lags=individual['lags'],
alpha_cut=individual['alpha'], alpha_cut=individual['alpha'],
order=individual['order']) order=individual['order'])
@ -51,7 +51,31 @@ def metodo_cluster(individual, train, test):
rmse, mape, u = Measures.get_point_statistics(test, model) rmse, mape, u = Measures.get_point_statistics(test, model)
return individual, rmse size = len(model)
return individual, rmse, size
def process_jobs(jobs, datasetname, conn):
for job in jobs:
result, rmse, size = job()
if job.status == dispy.DispyJob.Finished and result is not None:
print(result)
record = (datasetname, 'GridSearch', 'WHOFTS', None, result['mf'],
result['order'], result['partitioner'], result['npart'],
result['alpha'], str(result['lags']), 'rmse', rmse)
hUtil.insert_hyperparam(record, conn)
record = (datasetname, 'GridSearch', 'WHOFTS', None, result['mf'],
result['order'], result['partitioner'], result['npart'],
result['alpha'], str(result['lags']), 'size', size)
hUtil.insert_hyperparam(record, conn)
else:
print(job.exception)
print(job.stdout)
def execute(hyperparams, datasetname, train, test, **kwargs): def execute(hyperparams, datasetname, train, test, **kwargs):
@ -76,19 +100,26 @@ def execute(hyperparams, datasetname, train, test, **kwargs):
for hp in keys_sorted for hp in keys_sorted
] ]
for instance in product(*hp_values): cluster, http_server = Util.start_dispy_cluster(metodo_cluster, nodes=nodes)
conn = hUtil.open_hyperparam_db('hyperparam.db')
for ct, instance in enumerate(product(*hp_values)):
partitions = instance[index['partitions']] partitions = instance[index['partitions']]
partitioner = instance[index['partitioner']] partitioner = instance[index['partitioner']]
mf = instance[index['mf']] mf = instance[index['mf']]
alpha_cut = instance[index['alpha']] alpha_cut = instance[index['alpha']]
order = instance[index['order']] order = instance[index['order']]
count = 0
for lag1 in lags: # o é o lag1 for lag1 in lags: # o é o lag1
_lags = [lag1] _lags = [lag1]
count += 1
if order > 1: if order > 1:
for lag2 in lags: # o é o lag1 for lag2 in lags: # o é o lag1
_lags2 = [lag1, lag1+lag2] _lags2 = [lag1, lag1+lag2]
count += 1
if order > 2: if order > 2:
for lag3 in lags: # o é o lag1 for lag3 in lags: # o é o lag1
count += 1
_lags3 = [lag1, lag1 + lag2, lag1 + lag2+lag3 ] _lags3 = [lag1, lag1 + lag2, lag1 + lag2+lag3 ]
individuals.append(dict_individual(mf, partitioner, partitions, order, _lags3, alpha_cut)) individuals.append(dict_individual(mf, partitioner, partitions, order, _lags3, alpha_cut))
else: else:
@ -97,31 +128,17 @@ def execute(hyperparams, datasetname, train, test, **kwargs):
else: else:
individuals.append(dict_individual(mf, partitioner, partitions, order, _lags, alpha_cut)) individuals.append(dict_individual(mf, partitioner, partitions, order, _lags, alpha_cut))
if count > 50:
jobs = []
cluster, http_server = Util.start_dispy_cluster(metodo_cluster, nodes=nodes) for ind in individuals:
job = cluster.submit(ind, train, test)
jobs.append(job)
jobs = [] process_jobs(jobs, datasetname, conn)
for ind in individuals: count = 0
job = cluster.submit(ind, train, test)
jobs.append(job)
individuals = []
conn = hUtil.open_hyperparam_db('hyperparam.db')
for job in jobs:
result, rmse = job()
if job.status == dispy.DispyJob.Finished and result is not None:
print(result)
record = (datasetname, 'GridSearch', 'HOFTS', None, result['mf'],
result['order'], result['partitioner'], result['npart'],
result['alpha'], str(result['lags']), 'rmse', rmse)
hUtil.insert_hyperparam(record, conn)
else:
print(job.exception)
print(job.stdout)
Util.stop_dispy_cluster(cluster, http_server) Util.stop_dispy_cluster(cluster, http_server)

View File

@ -1,4 +1,4 @@
import numpy as np
from pyFTS.hyperparam import GridSearch from pyFTS.hyperparam import GridSearch
def get_train_test(): def get_train_test():
@ -12,15 +12,15 @@ def get_train_test():
return 'Malaysia.temperature', train, test return 'Malaysia.temperature', train, test
hyperparams = { hyperparams = {
'order':[1], 'order':[1, 2, 3],
'partitions':[10, 15], 'partitions': np.arange(10,100,3),
'partitioner': [1], 'partitioner': [1,2],
'mf': [1], 'mf': [1, 2, 3, 4],
'lags': [1, 2, 3], 'lags': np.arange(1,35,2),
'alpha': [.1, .2, .5] 'alpha': np.arange(0,.5, .05)
} }
nodes = ['192.168.0.110','192.168.0.106'] nodes = ['192.168.0.110','192.168.0.106', '192.168.0.107']
ds, train, test = get_train_test() ds, train, test = get_train_test()