From ad1205121deeb1ce555b634c4f73b1a50767c14f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Wed, 25 Dec 2019 22:11:08 -0300 Subject: [PATCH] Random Search in hyperparam --- pyFTS/common/fts.py | 4 -- pyFTS/hyperparam/mvfts.py | 76 +++++++++++++++++++++++++++- pyFTS/hyperparam/random_search.py | 84 +++++++++++++++++++++++++++++++ pyFTS/partitioners/partitioner.py | 10 ++-- pyFTS/tests/hyperparam.py | 16 +++++- 5 files changed, 178 insertions(+), 12 deletions(-) create mode 100644 pyFTS/hyperparam/random_search.py diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index df9bbe3..b0c66e5 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -410,10 +410,6 @@ class FTS(object): if save: Util.persist_obj(self, file_path) - if 'statistics' in kw: - kwargs['statistics'] = kw['statistics'] - print(kwargs['statistics']) - def clone_parameters(self, model): """ diff --git a/pyFTS/hyperparam/mvfts.py b/pyFTS/hyperparam/mvfts.py index 20e9816..b4faf41 100644 --- a/pyFTS/hyperparam/mvfts.py +++ b/pyFTS/hyperparam/mvfts.py @@ -29,7 +29,7 @@ from pyFTS.common import Membership from pyFTS.models import hofts, ifts, pwfts from pyFTS.hyperparam import Util as hUtil from pyFTS.distributed import dispy as dUtil -from pyFTS.hyperparam import Evolutionary +from pyFTS.hyperparam import Evolutionary, random_search as RS from pyFTS.models.multivariate import mvfts, wmvfts, variable from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime @@ -315,6 +315,7 @@ def crossover_variable_params(best, worst, var): param = {'partitioner': partitioner, 'npart': npart, 'alpha': alpha, 'mf': mf} return param + def mutation(individual, **kwargs): """ Mutation operator @@ -356,6 +357,51 @@ def mutation(individual, **kwargs): return individual +def mutation_random_search(individual, **kwargs): + """ + Mutation operator + + :param individual: an individual genotype + :param pmut: individual probability o + :return: + """ + + vars = kwargs.get('variables', None) + tvar = kwargs.get('target_variable', None) + l = len(vars) + + il = len(individual['explanatory_variables']) + # + if il > 1: + for l in range(il): + il = len(individual['explanatory_variables']) + rnd = random.uniform(0, 1) + if rnd > .5: + rnd = random.randint(0, il-1) + val = individual['explanatory_variables'][rnd] + individual['explanatory_variables'].remove(val) + individual['explanatory_params'].pop(rnd) + else: + rnd = random.randint(0, l-1) + while rnd in individual['explanatory_variables']: + rnd = random.randint(0, l-1) + individual['explanatory_variables'].append(rnd) + individual['explanatory_params'].append(random_param(vars[rnd])) + + for ct in np.arange(len(individual['explanatory_variables'])): + rnd = random.uniform(0, 1) + if rnd > .5: + mutate_variable_params(individual['explanatory_params'][ct], vars[ct]) + + rnd = random.uniform(0, 1) + if rnd > .5: + mutate_variable_params(individual['target_params'], tvar) + + individual['f1'] = None + individual['f2'] = None + + return individual + def mutate_variable_params(param, var): if var['type']=='common': @@ -458,3 +504,31 @@ def log_result(datasetname, fts_method, result): file.write(json.dumps(result)) print(result) + + +def random_search(datasetname, dataset, **kwargs): + experiments = kwargs.get('experiments', 30) + + distributed = kwargs.get('distributed', False) + + fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) + shortname = str(fts_method.__module__).split('.')[-1] + + kwargs['mutation_operator'] = mutation_random_search + kwargs['evaluation_operator'] = evaluate + kwargs['random_individual'] = random_genotype + + ret = [] + for i in np.arange(experiments): + print("Experiment {}".format(i)) + + start = time.time() + ret, statistics = RS.execute (dataset, **kwargs) + end = time.time() + ret['time'] = end - start + experiment = {'individual': ret, 'statistics': statistics} + + ret = process_experiment(shortname, experiment, datasetname) + + + return ret \ No newline at end of file diff --git a/pyFTS/hyperparam/random_search.py b/pyFTS/hyperparam/random_search.py new file mode 100644 index 0000000..3f94995 --- /dev/null +++ b/pyFTS/hyperparam/random_search.py @@ -0,0 +1,84 @@ +""" +Simple Random Search Hyperparameter Optimization +""" + +from pyFTS.hyperparam import Evolutionary +import time + +__measures = ['f1', 'f2', 'rmse', 'size'] + + +def execute( dataset, **kwargs): + """ + Batch execution of Random Search Hyperparameter Optimization + + :param datasetname: + :param dataset: The time series to optimize the FTS + :keyword ngen: An integer value with the maximum number of generations, default value: 30 + :keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7 + :keyword fts_method: The FTS method to optimize + :keyword parameters: dict with model specific arguments for fts_method + :keyword random_individual: create an random genotype + :keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness + :keyword mutation_operator: a function that receives one individual and return a changed individual + :keyword window_size: An integer value with the the length of scrolling window for train/test on dataset + :keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1]) + :keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window, + relative to the window_size ([0,1]) + :keyword collect_statistics: A boolean value indicating to collect statistics for each generation + :keyword distributed: A value indicating it the execution will be local and sequential (distributed=False), + or parallel and distributed (distributed='dispy' or distributed='spark') + :keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node + :return: the best genotype + """ + + ngen = kwargs.get('ngen',30) + mgen = kwargs.get('mgen', 7) + + kwargs['pmut'] = 1.0 + + random_individual = kwargs.get('random_individual', Evolutionary.random_genotype) + evaluation_operator = kwargs.get('evaluation_operator', Evolutionary.evaluate) + mutation_operator = kwargs.get('mutation_operator', Evolutionary.mutation) + + no_improvement_count = 0 + + individual = random_individual(**kwargs) + + stat = {} + + stat[0] = {} + + ret = evaluation_operator(dataset, individual, **kwargs) + for key in __measures: + individual[key] = ret[key] + stat[0][key] = ret[key] + + print(individual) + + for i in range(1,ngen+1): + print("GENERATION {} {}".format(i, time.time())) + + new = mutation_operator(individual, **kwargs) + ret = evaluation_operator(dataset, new, **kwargs) + new_stat = {} + for key in __measures: + new[key] = ret[key] + new_stat[key] = ret[key] + + print(new) + + if new['f1'] <= individual['f1'] and new['f2'] <= individual['f2']: + individual = new + no_improvement_count = 0 + stat[i] = new_stat + print(individual) + else: + stat[i] = stat[i-1] + no_improvement_count += 1 + print("WITHOUT IMPROVEMENT {}".format(no_improvement_count)) + + if no_improvement_count == mgen: + break + + return individual, stat diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index 15deb91..064feb4 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -162,12 +162,10 @@ class Partitioner(object): nearest = self.search(data, type='index') mv = np.zeros(self.partitions) - try: - for ix in nearest: - tmp = self[ix].membership(data) - mv[ix] = tmp if tmp >= alpha_cut else 0. - except: - print(ix) + for ix in nearest: + tmp = self[ix].membership(data) + mv[ix] = tmp if tmp >= alpha_cut else 0. + ix = np.ravel(np.argwhere(mv > 0.)) diff --git a/pyFTS/tests/hyperparam.py b/pyFTS/tests/hyperparam.py index 18b46b1..b93c6bd 100644 --- a/pyFTS/tests/hyperparam.py +++ b/pyFTS/tests/hyperparam.py @@ -56,6 +56,20 @@ explanatory_variables =[ target_variable = {'name': 'Load', 'data_label': 'load', 'type': 'common'} nodes=['192.168.28.38'] + +deho_mv.random_search(datsetname, dataset, + ngen=200, mgen=200, + window_size=2000, train_rate=.9, increment_rate=1, + experiments=1, + fts_method=wmvfts.WeightedMVFTS, + variables=explanatory_variables, + target_variable=target_variable, + #distributed='dispy', nodes=nodes, + parameters=dict(num_batches=5) + #parameters=dict(distributed='dispy', nodes=nodes, num_batches=5) + ) + +''' deho_mv.execute(datsetname, dataset, ngen=20, npop=15,psel=0.6, pcross=.5, pmut=.3, window_size=2000, train_rate=.9, increment_rate=1, @@ -67,7 +81,7 @@ deho_mv.execute(datsetname, dataset, parameters=dict(num_batches=5) #parameters=dict(distributed='dispy', nodes=nodes, num_batches=5) ) - +''' ''' ret = Evolutionary.execute(datsetname, dataset, ngen=30, npop=20,psel=0.6, pcross=.5, pmut=.3,