From cf24e88b8a601461f0fca238c9cacbf70cc8e289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Mon, 3 Dec 2018 10:10:39 -0200 Subject: [PATCH] Evolutive hyperparam --- pyFTS/hyperparam/Evolutionary.py | 338 +++++++++++++++++++++++++++++++ pyFTS/hyperparam/GridSearch.py | 3 +- pyFTS/tests/hyperparam.py | 22 +- pyFTS/tests/multivariate.py | 23 +++ 4 files changed, 376 insertions(+), 10 deletions(-) create mode 100644 pyFTS/hyperparam/Evolutionary.py diff --git a/pyFTS/hyperparam/Evolutionary.py b/pyFTS/hyperparam/Evolutionary.py new file mode 100644 index 0000000..1cccfb7 --- /dev/null +++ b/pyFTS/hyperparam/Evolutionary.py @@ -0,0 +1,338 @@ +import numpy as np +import pandas as pd +import math +import time +from functools import reduce +from operator import itemgetter +import dispy + +import random +from pyFTS.common import Util +from pyFTS.benchmarks import Measures +from pyFTS.partitioners import Grid, Entropy # , Huarng +from pyFTS.models import hofts +from pyFTS.common import Membership +from pyFTS.hyperparam import Util as hUtil + + +# Gera indivíduos após operadores +def genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse): + ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order, alpha=alpha, lags=lags, len_lags=len_lags, + rmse=rmse) + return ind + + +# Gera indivíduos +def random_genotype(): + order = random.randint(1, 3) + return genotype( + random.randint(1, 4), + random.randint(10, 100), + random.randint(1, 2), + order, + random.uniform(0, .5), + sorted(random.sample(range(1, 50), order)), + [], + [] + ) + + +# Gera uma população de tamanho n +def initial_population(n): + pop = [] + for i in range(n): + pop.append(random_genotype()) + return pop + + +# Função de avaliação +def phenotype(individual, train): + try: + if individual['mf'] == 1: + mf = Membership.trimf + elif individual['mf'] == 2: + mf = Membership.trapmf + elif individual['mf'] == 3 and individual['partitioner'] != 2: + mf = Membership.gaussmf + else: + mf = Membership.trimf + + if individual['partitioner'] == 1: + partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf) + elif individual['partitioner'] == 2: + partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf) + + model = hofts.WeightedHighOrderFTS(partitioner=partitioner, + lags=individual['lags'], + alpha_cut=individual['alpha'], + order=individual['order']) + + model.fit(train) + + return model + + except Exception as ex: + print("EXCEPTION!", str(ex), str(individual)) + return None + + +def evaluation1(dataset, individual): + from pyFTS.common import Util + from pyFTS.benchmarks import Measures + + try: + results = [] + lengths = [] + + for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25): + model = phenotype(individual, train) + + if model is None: + return (None) + + rmse, _, _ = Measures.get_point_statistics(test, model) + lengths.append(len(model)) + + results.append(rmse) + + _lags = sum(model.lags) * 100 + + rmse = np.nansum([.6 * np.nanmean(results), .4 * np.nanstd(results)]) + len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags]) + + return len_lags, rmse + + except Exception as ex: + print("EXCEPTION!", str(ex), str(individual)) + return np.inf + + +def tournament(population, objective): + n = len(population) - 1 + + r1 = random.randint(0, n) if n > 2 else 0 + r2 = random.randint(0, n) if n > 2 else 1 + ix = r1 if population[r1][objective] < population[r2][objective] else r2 + return population[ix] + + +def selection1(population): + pais = [] + prob = .8 + + # for i in range(len(population)): + pai1 = tournament(population, 'rmse') + pai2 = tournament(population, 'rmse') + + finalista = tournament([pai1, pai2], 'len_lags') + + return finalista + + +def lag_crossover2(best, worst): + order = int(round(.7 * best['order'] + .3 * worst['order'])) + lags = [] + + min_order = min(best['order'], worst['order']) + + max_order = best if best['order'] > min_order else worst + + for k in np.arange(0, order): + if k < min_order: + lags.append(int(round(.7 * best['lags'][k] + .3 * worst['lags'][k]))) + else: + lags.append(max_order['lags'][k]) + + for k in range(1, order): + while lags[k - 1] >= lags[k]: + lags[k] += random.randint(1, 10) + + return order, lags + + +# Cruzamento +def crossover(pais): + import random + + if pais[0]['rmse'] < pais[1]['rmse']: + best = pais[0] + worst = pais[1] + else: + best = pais[1] + worst = pais[0] + + npart = int(round(.7 * best['npart'] + .3 * worst['npart'])) + alpha = float(.7 * best['alpha'] + .3 * worst['alpha']) + + rnd = random.uniform(0, 1) + mf = best['mf'] if rnd < .7 else worst['mf'] + + rnd = random.uniform(0, 1) + partitioner = best['partitioner'] if rnd < .7 else worst['partitioner'] + + order, lags = lag_crossover2(best, worst) + + rmse = [] + len_lags = [] + + filho = genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse) + + return filho + + +# Mutação | p é a probabilidade de mutação + +def mutation_lags(lags, order): + new = sorted(random.sample(range(1, 50), order)) + for lag in np.arange(len(lags) - 1): + new[lag] = min(50, max(1, int(lags[lag] + np.random.normal(0, 0.5)))) + + if order > 1: + for k in np.arange(1, order): + while new[k] <= new[k - 1]: + new[k] = int(new[k] + np.random.randint(1, 5)) + + return new + + +def mutation(individual): + import numpy.random + individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 2)))) + individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .1))) + individual['mf'] = random.randint(1, 2) + individual['partitioner'] = random.randint(1, 2) + individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 0.5)))) + # Chama a função mutation_lags + individual['lags'] = mutation_lags( individual['lags'], individual['order']) + #individual['lags'] = sorted(random.sample(range(1, 50), individual['order'])) + + return individual + + +# Elitismo +def elitism(population, new_population): + # Pega melhor indivíduo da população corrente + population = sorted(population, key=itemgetter('rmse')) + best = population[0] + + # Ordena a nova população e insere o melhor1 no lugar do pior + new_population = sorted(new_population, key=itemgetter('rmse')) + new_population[-1] = best + + # Ordena novamente e pega o melhor + new_population = sorted(new_population, key=itemgetter('rmse')) + + return new_population + + +def genetico(dataset, ngen, npop, pcruz, pmut, option=1): + new_populacao = populacao_nova = [] + # Gerar população inicial + populacao = initial_population(npop) + + # Avaliar população inicial + result = [evaluation1(dataset, k) for k in populacao] + + for i in range(npop): + if option == 1: + populacao[i]['len_lags'], populacao[i]['rmse'] = result[i] + else: + populacao[i]['rmse'] = result[i] + + # Gerações + for i in range(ngen): + # Iteração para gerar a nova população + for j in range(int(npop / 2)): + # Selecao de pais + pais = [] + pais.append(selection1(populacao)) + pais.append(selection1(populacao)) + + # Cruzamento com probabilidade pcruz + rnd = random.uniform(0, 1) + filho1 = crossover(pais) if pcruz > rnd else pais[0] + rnd = random.uniform(0, 1) + filho2 = crossover(pais) if pcruz > rnd else pais[1] + + # Mutação com probabilidade pmut + rnd = random.uniform(0, 1) + filho11 = mutation(filho1) if pmut > rnd else filho1 + rnd = random.uniform(0, 1) + filho22 = mutation(filho2) if pmut > rnd else filho2 + + # Insere filhos na nova população + new_populacao.append(filho11) + new_populacao.append(filho22) + + result = [evaluation1(dataset, k) for k in new_populacao] + + for i in range(len(new_populacao)): + new_populacao[i]['len_lags'], new_populacao[i]['rmse'] = result[i] + + populacao = elitism(populacao, new_populacao) + + new_populacao = [] + + melhorT = sorted(populacao, key=lambda item: item['rmse'])[0] + + return melhorT + + +def cluster_method(dataset, ngen, npop, pcruz, pmut, option=1): + print(ngen, npop, pcruz, pmut, option) + + from pyFTS.hyperparam.Evolutionary import genetico + + inicio = time.time() + ret = genetico(dataset, ngen, npop, pcruz, pmut, option) + fim = time.time() + ret['time'] = fim - inicio + ret['size'] = ret['len_lags'] + return ret + + +def process_jobs(jobs, datasetname, conn): + for job in jobs: + result = job() + if job.status == dispy.DispyJob.Finished and result is not None: + print("Processing result of {}".format(result)) + + metrics = ['rmse', 'size', 'time'] + + for metric in metrics: + record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'], + result['order'], result['partitioner'], result['npart'], + result['alpha'], str(result['lags']), metric, result[metric]) + + print(record) + + hUtil.insert_hyperparam(record, conn) + + + else: + print(job.exception) + print(job.stdout) + + +def execute(datasetname, dataset, **kwargs): + nodes = kwargs.get('nodes', ['127.0.0.1']) + + cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes) + conn = hUtil.open_hyperparam_db('hyperparam.db') + + ngen = kwargs.get('ngen', 70) + npop = kwargs.get('npop', 20) + pcruz = kwargs.get('pcruz', .8) + pmut = kwargs.get('pmut', .2) + option = kwargs.get('option', 1) + + jobs = [] + + for i in range(kwargs.get('experiments', 30)): + print("Experiment {}".format(i)) + job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option) + jobs.append(job) + + process_jobs(jobs, datasetname, conn) + + Util.stop_dispy_cluster(cluster, http_server) diff --git a/pyFTS/hyperparam/GridSearch.py b/pyFTS/hyperparam/GridSearch.py index 5110f54..9922721 100644 --- a/pyFTS/hyperparam/GridSearch.py +++ b/pyFTS/hyperparam/GridSearch.py @@ -54,7 +54,8 @@ def cluster_method(individual, train, test): size = len(model) return individual, rmse, size, mape, u - + + def process_jobs(jobs, datasetname, conn): for job in jobs: result, rmse, size, mape, u = job() diff --git a/pyFTS/tests/hyperparam.py b/pyFTS/tests/hyperparam.py index 4b897c3..1b3c34a 100644 --- a/pyFTS/tests/hyperparam.py +++ b/pyFTS/tests/hyperparam.py @@ -1,15 +1,15 @@ import numpy as np -from pyFTS.hyperparam import GridSearch +from pyFTS.hyperparam import GridSearch, Evolutionary -def get_train_test(): +def get_dataset(): from pyFTS.data import Malaysia ds = Malaysia.get_data('temperature')[:1000] # ds = pd.read_csv('Malaysia.csv',delimiter=',' )[['temperature']].values[:2000].flatten().tolist() - train = ds[:800] - test = ds[800:] + #train = ds[:800] + #test = ds[800:] - return 'Malaysia.temperature', train, test + return 'Malaysia.temperature', ds #train, test """ hyperparams = { @@ -20,7 +20,7 @@ hyperparams = { 'lags': np.arange(1,35,2), 'alpha': np.arange(.0, .5, .05) } -""" + hyperparams = { 'order':[3], #[1, 2], @@ -30,9 +30,13 @@ hyperparams = { 'lags': np.arange(1, 10), 'alpha': [.0, .3, .5] } - +""" nodes = ['192.168.0.106', '192.168.0.110', '192.168.0.107'] -ds, train, test = get_train_test() +datsetname, dataset = get_dataset() -GridSearch.execute(hyperparams, ds, train, test, nodes=nodes) +#GridSearch.execute(hyperparams, ds, train, test, nodes=nodes) + +#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1) + +Evolutionary.execute(datsetname, dataset, nodes=nodes, ngen=50, npop=30, ) diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 1f73595..c47e07c 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -17,8 +17,30 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime + +model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS1solarorder2knn3') + +data = [[12, 100], [13, 200]] + +for k in data: + k[0] = pd.to_datetime('2018-01-01 {}:00:00'.format(k[0]), format='%Y-%m-%d %H:%M:%S') + +df = pd.DataFrame(data, columns=['data', 'glo_avg']) + +#forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x: x + pd.to_timedelta(1, unit='h')}) + +#print(forecasts) + +f = lambda x: x + pd.to_timedelta(1, unit='h') + +for ix, row in df.iterrows(): + print(row['data']) + print(f(row['data'])) + + # Multivariate time series +''' train_mv = {} test_mv = {} @@ -131,3 +153,4 @@ for ct, key in enumerate(models.keys()): Util.persist_obj(model, model.shortname) del(model) +''' \ No newline at end of file