Evolutive hyperparam

This commit is contained in:
Petrônio Cândido 2018-12-03 10:10:39 -02:00
parent 4029b4223d
commit cf24e88b8a
4 changed files with 376 additions and 10 deletions

View File

@ -0,0 +1,338 @@
import numpy as np
import pandas as pd
import math
import time
from functools import reduce
from operator import itemgetter
import dispy
import random
from pyFTS.common import Util
from pyFTS.benchmarks import Measures
from pyFTS.partitioners import Grid, Entropy # , Huarng
from pyFTS.models import hofts
from pyFTS.common import Membership
from pyFTS.hyperparam import Util as hUtil
# Gera indivíduos após operadores
def genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse):
ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order, alpha=alpha, lags=lags, len_lags=len_lags,
rmse=rmse)
return ind
# Gera indivíduos
def random_genotype():
order = random.randint(1, 3)
return genotype(
random.randint(1, 4),
random.randint(10, 100),
random.randint(1, 2),
order,
random.uniform(0, .5),
sorted(random.sample(range(1, 50), order)),
[],
[]
)
# Gera uma população de tamanho n
def initial_population(n):
pop = []
for i in range(n):
pop.append(random_genotype())
return pop
# Função de avaliação
def phenotype(individual, train):
try:
if individual['mf'] == 1:
mf = Membership.trimf
elif individual['mf'] == 2:
mf = Membership.trapmf
elif individual['mf'] == 3 and individual['partitioner'] != 2:
mf = Membership.gaussmf
else:
mf = Membership.trimf
if individual['partitioner'] == 1:
partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
elif individual['partitioner'] == 2:
partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf)
model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
lags=individual['lags'],
alpha_cut=individual['alpha'],
order=individual['order'])
model.fit(train)
return model
except Exception as ex:
print("EXCEPTION!", str(ex), str(individual))
return None
def evaluation1(dataset, individual):
from pyFTS.common import Util
from pyFTS.benchmarks import Measures
try:
results = []
lengths = []
for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25):
model = phenotype(individual, train)
if model is None:
return (None)
rmse, _, _ = Measures.get_point_statistics(test, model)
lengths.append(len(model))
results.append(rmse)
_lags = sum(model.lags) * 100
rmse = np.nansum([.6 * np.nanmean(results), .4 * np.nanstd(results)])
len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])
return len_lags, rmse
except Exception as ex:
print("EXCEPTION!", str(ex), str(individual))
return np.inf
def tournament(population, objective):
n = len(population) - 1
r1 = random.randint(0, n) if n > 2 else 0
r2 = random.randint(0, n) if n > 2 else 1
ix = r1 if population[r1][objective] < population[r2][objective] else r2
return population[ix]
def selection1(population):
pais = []
prob = .8
# for i in range(len(population)):
pai1 = tournament(population, 'rmse')
pai2 = tournament(population, 'rmse')
finalista = tournament([pai1, pai2], 'len_lags')
return finalista
def lag_crossover2(best, worst):
order = int(round(.7 * best['order'] + .3 * worst['order']))
lags = []
min_order = min(best['order'], worst['order'])
max_order = best if best['order'] > min_order else worst
for k in np.arange(0, order):
if k < min_order:
lags.append(int(round(.7 * best['lags'][k] + .3 * worst['lags'][k])))
else:
lags.append(max_order['lags'][k])
for k in range(1, order):
while lags[k - 1] >= lags[k]:
lags[k] += random.randint(1, 10)
return order, lags
# Cruzamento
def crossover(pais):
import random
if pais[0]['rmse'] < pais[1]['rmse']:
best = pais[0]
worst = pais[1]
else:
best = pais[1]
worst = pais[0]
npart = int(round(.7 * best['npart'] + .3 * worst['npart']))
alpha = float(.7 * best['alpha'] + .3 * worst['alpha'])
rnd = random.uniform(0, 1)
mf = best['mf'] if rnd < .7 else worst['mf']
rnd = random.uniform(0, 1)
partitioner = best['partitioner'] if rnd < .7 else worst['partitioner']
order, lags = lag_crossover2(best, worst)
rmse = []
len_lags = []
filho = genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse)
return filho
# Mutação | p é a probabilidade de mutação
def mutation_lags(lags, order):
new = sorted(random.sample(range(1, 50), order))
for lag in np.arange(len(lags) - 1):
new[lag] = min(50, max(1, int(lags[lag] + np.random.normal(0, 0.5))))
if order > 1:
for k in np.arange(1, order):
while new[k] <= new[k - 1]:
new[k] = int(new[k] + np.random.randint(1, 5))
return new
def mutation(individual):
import numpy.random
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 2))))
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .1)))
individual['mf'] = random.randint(1, 2)
individual['partitioner'] = random.randint(1, 2)
individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 0.5))))
# Chama a função mutation_lags
individual['lags'] = mutation_lags( individual['lags'], individual['order'])
#individual['lags'] = sorted(random.sample(range(1, 50), individual['order']))
return individual
# Elitismo
def elitism(population, new_population):
# Pega melhor indivíduo da população corrente
population = sorted(population, key=itemgetter('rmse'))
best = population[0]
# Ordena a nova população e insere o melhor1 no lugar do pior
new_population = sorted(new_population, key=itemgetter('rmse'))
new_population[-1] = best
# Ordena novamente e pega o melhor
new_population = sorted(new_population, key=itemgetter('rmse'))
return new_population
def genetico(dataset, ngen, npop, pcruz, pmut, option=1):
new_populacao = populacao_nova = []
# Gerar população inicial
populacao = initial_population(npop)
# Avaliar população inicial
result = [evaluation1(dataset, k) for k in populacao]
for i in range(npop):
if option == 1:
populacao[i]['len_lags'], populacao[i]['rmse'] = result[i]
else:
populacao[i]['rmse'] = result[i]
# Gerações
for i in range(ngen):
# Iteração para gerar a nova população
for j in range(int(npop / 2)):
# Selecao de pais
pais = []
pais.append(selection1(populacao))
pais.append(selection1(populacao))
# Cruzamento com probabilidade pcruz
rnd = random.uniform(0, 1)
filho1 = crossover(pais) if pcruz > rnd else pais[0]
rnd = random.uniform(0, 1)
filho2 = crossover(pais) if pcruz > rnd else pais[1]
# Mutação com probabilidade pmut
rnd = random.uniform(0, 1)
filho11 = mutation(filho1) if pmut > rnd else filho1
rnd = random.uniform(0, 1)
filho22 = mutation(filho2) if pmut > rnd else filho2
# Insere filhos na nova população
new_populacao.append(filho11)
new_populacao.append(filho22)
result = [evaluation1(dataset, k) for k in new_populacao]
for i in range(len(new_populacao)):
new_populacao[i]['len_lags'], new_populacao[i]['rmse'] = result[i]
populacao = elitism(populacao, new_populacao)
new_populacao = []
melhorT = sorted(populacao, key=lambda item: item['rmse'])[0]
return melhorT
def cluster_method(dataset, ngen, npop, pcruz, pmut, option=1):
print(ngen, npop, pcruz, pmut, option)
from pyFTS.hyperparam.Evolutionary import genetico
inicio = time.time()
ret = genetico(dataset, ngen, npop, pcruz, pmut, option)
fim = time.time()
ret['time'] = fim - inicio
ret['size'] = ret['len_lags']
return ret
def process_jobs(jobs, datasetname, conn):
for job in jobs:
result = job()
if job.status == dispy.DispyJob.Finished and result is not None:
print("Processing result of {}".format(result))
metrics = ['rmse', 'size', 'time']
for metric in metrics:
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
result['order'], result['partitioner'], result['npart'],
result['alpha'], str(result['lags']), metric, result[metric])
print(record)
hUtil.insert_hyperparam(record, conn)
else:
print(job.exception)
print(job.stdout)
def execute(datasetname, dataset, **kwargs):
nodes = kwargs.get('nodes', ['127.0.0.1'])
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
conn = hUtil.open_hyperparam_db('hyperparam.db')
ngen = kwargs.get('ngen', 70)
npop = kwargs.get('npop', 20)
pcruz = kwargs.get('pcruz', .8)
pmut = kwargs.get('pmut', .2)
option = kwargs.get('option', 1)
jobs = []
for i in range(kwargs.get('experiments', 30)):
print("Experiment {}".format(i))
job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
jobs.append(job)
process_jobs(jobs, datasetname, conn)
Util.stop_dispy_cluster(cluster, http_server)

View File

@ -54,7 +54,8 @@ def cluster_method(individual, train, test):
size = len(model)
return individual, rmse, size, mape, u
def process_jobs(jobs, datasetname, conn):
for job in jobs:
result, rmse, size, mape, u = job()

View File

@ -1,15 +1,15 @@
import numpy as np
from pyFTS.hyperparam import GridSearch
from pyFTS.hyperparam import GridSearch, Evolutionary
def get_train_test():
def get_dataset():
from pyFTS.data import Malaysia
ds = Malaysia.get_data('temperature')[:1000]
# ds = pd.read_csv('Malaysia.csv',delimiter=',' )[['temperature']].values[:2000].flatten().tolist()
train = ds[:800]
test = ds[800:]
#train = ds[:800]
#test = ds[800:]
return 'Malaysia.temperature', train, test
return 'Malaysia.temperature', ds #train, test
"""
hyperparams = {
@ -20,7 +20,7 @@ hyperparams = {
'lags': np.arange(1,35,2),
'alpha': np.arange(.0, .5, .05)
}
"""
hyperparams = {
'order':[3], #[1, 2],
@ -30,9 +30,13 @@ hyperparams = {
'lags': np.arange(1, 10),
'alpha': [.0, .3, .5]
}
"""
nodes = ['192.168.0.106', '192.168.0.110', '192.168.0.107']
ds, train, test = get_train_test()
datsetname, dataset = get_dataset()
GridSearch.execute(hyperparams, ds, train, test, nodes=nodes)
#GridSearch.execute(hyperparams, ds, train, test, nodes=nodes)
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
Evolutionary.execute(datsetname, dataset, nodes=nodes, ngen=50, npop=30, )

View File

@ -17,8 +17,30 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3')
data = [[12, 100], [13, 200]]
for k in data:
k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S')
df = pd.DataFrame(data, columns=['data', 'glo_avg'])
#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')})
#print(forecasts)
f = lambda x: x + pd.to_timedelta(1, unit='h')
for ix, row in df.iterrows():
print(row['data'])
print(f(row['data']))
# Multivariate time series
'''
train_mv = {}
test_mv = {}
@ -131,3 +153,4 @@ for ct, key in enumerate(models.keys()):
Util.persist_obj(model, model.shortname)
del(model)
'''