Optimizations on hyperparam and spark
This commit is contained in:
parent
a0af21d4b9
commit
2ce04b1031
@ -180,7 +180,7 @@ def distributed_train(model, data, url=SPARK_ADDR, app='pyFTS'):
|
|||||||
|
|
||||||
func = lambda x: slave_train_univariate(x, **parameters)
|
func = lambda x: slave_train_univariate(x, **parameters)
|
||||||
|
|
||||||
flrgs = context.parallelize(data).repartition(nodes*2).mapPartitions(func)
|
flrgs = context.parallelize(data).repartition(nodes*4).mapPartitions(func)
|
||||||
|
|
||||||
for k in flrgs.collect():
|
for k in flrgs.collect():
|
||||||
model.append_rule(k[1])
|
model.append_rule(k[1])
|
||||||
|
@ -88,10 +88,10 @@ def phenotype(individual, train, parameters={}):
|
|||||||
else:
|
else:
|
||||||
mf = Membership.trimf
|
mf = Membership.trimf
|
||||||
|
|
||||||
if individual['partitioner'] == 1:
|
#if individual['partitioner'] == 1:
|
||||||
partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
|
partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
|
||||||
elif individual['partitioner'] == 2:
|
#elif individual['partitioner'] == 2:
|
||||||
partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf)
|
# partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf)
|
||||||
|
|
||||||
model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
|
model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
|
||||||
lags=individual['lags'],
|
lags=individual['lags'],
|
||||||
@ -372,11 +372,12 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
last_best = population[0]
|
last_best = population[0]
|
||||||
best = population[1]
|
best = population[1]
|
||||||
|
|
||||||
|
print("Evaluating initial population {}".format(time.time()))
|
||||||
for individual in population:
|
for individual in population:
|
||||||
individual['len_lags'], individual['rmse'] = evaluate(dataset, individual, **kwargs)
|
individual['len_lags'], individual['rmse'] = evaluate(dataset, individual, **kwargs)
|
||||||
|
|
||||||
for i in range(ngen):
|
for i in range(ngen):
|
||||||
print("GENERATION {}".format(i))
|
print("GENERATION {} {}".format(i, time.time()))
|
||||||
|
|
||||||
generation_statistics = {}
|
generation_statistics = {}
|
||||||
|
|
||||||
@ -403,7 +404,6 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
if collect_statistics:
|
if collect_statistics:
|
||||||
_f1.append(f1)
|
_f1.append(f1)
|
||||||
_f2.append(f2)
|
_f2.append(f2)
|
||||||
#print('eval {}'.format(individual))
|
|
||||||
|
|
||||||
if collect_statistics:
|
if collect_statistics:
|
||||||
generation_statistics['population'] = {'f1': np.nanmedian(_f1), 'f2': np.nanmedian(_f2)}
|
generation_statistics['population'] = {'f1': np.nanmedian(_f1), 'f2': np.nanmedian(_f2)}
|
||||||
@ -426,13 +426,13 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
|
|
||||||
if last_best['rmse'] <= best['rmse'] and last_best['len_lags'] <= best['len_lags']:
|
if last_best['rmse'] <= best['rmse'] and last_best['len_lags'] <= best['len_lags']:
|
||||||
no_improvement_count += 1
|
no_improvement_count += 1
|
||||||
#print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
|
print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
|
||||||
pmut += .05
|
pmut += .05
|
||||||
else:
|
else:
|
||||||
no_improvement_count = 0
|
no_improvement_count = 0
|
||||||
pcruz = kwargs.get('pcruz', .5)
|
pcruz = kwargs.get('pcruz', .5)
|
||||||
pmut = kwargs.get('pmut', .3)
|
pmut = kwargs.get('pmut', .3)
|
||||||
#print(best)
|
print(best)
|
||||||
|
|
||||||
if no_improvement_count == mgen:
|
if no_improvement_count == mgen:
|
||||||
break
|
break
|
||||||
|
@ -30,7 +30,7 @@ hyperparams = {
|
|||||||
'lags': np.arange(1, 10),
|
'lags': np.arange(1, 10),
|
||||||
'alpha': [.0, .3, .5]
|
'alpha': [.0, .3, .5]
|
||||||
}
|
}
|
||||||
"""
|
|
||||||
nodes = ['192.168.0.106', '192.168.0.110', '192.168.0.107']
|
nodes = ['192.168.0.106', '192.168.0.110', '192.168.0.107']
|
||||||
|
|
||||||
datsetname, dataset = get_dataset()
|
datsetname, dataset = get_dataset()
|
||||||
@ -39,7 +39,7 @@ datsetname, dataset = get_dataset()
|
|||||||
|
|
||||||
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
|
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
|
||||||
|
|
||||||
'''
|
|
||||||
from pyFTS.models import hofts
|
from pyFTS.models import hofts
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.benchmarks import Measures
|
from pyFTS.benchmarks import Measures
|
||||||
@ -56,11 +56,36 @@ Measures.get_point_statistics(dataset[800:1000], model)
|
|||||||
|
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
'''
|
|
||||||
ret = Evolutionary.execute(datsetname, dataset,
|
ret = Evolutionary.execute(datsetname, dataset,
|
||||||
ngen=30, npop=20, pcruz=.5, pmut=.3,
|
ngen=30, npop=20, pcruz=.5, pmut=.3,
|
||||||
window_size=800, experiments=30)
|
window_size=800, experiments=30)
|
||||||
#parameters={'distributed': 'spark', 'url': 'spark://192.168.0.106:7077'})
|
parameters={'distributed': 'spark', 'url': 'spark://192.168.0.106:7077'})
|
||||||
|
|
||||||
print(ret)
|
print(ret)
|
||||||
#'''
|
"""
|
||||||
|
|
||||||
|
from pyFTS.hyperparam import Evolutionary
|
||||||
|
|
||||||
|
from pyFTS.data import SONDA
|
||||||
|
|
||||||
|
data = np.array(SONDA.get_data('glo_avg'))
|
||||||
|
|
||||||
|
data = data[~(np.isnan(data) | np.equal(data, 0.0))]
|
||||||
|
|
||||||
|
dataset = data[:1000000]
|
||||||
|
|
||||||
|
del(data)
|
||||||
|
|
||||||
|
ret, statistics = Evolutionary.GeneticAlgorithm(dataset, ngen=30, npop=20, pcruz=.5,
|
||||||
|
pmut=.3, window_size=800000, collect_statistics=True,
|
||||||
|
parameters={'distributed': 'spark',
|
||||||
|
'url': 'spark://192.168.0.106:7077'})
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
print(ret)
|
||||||
|
|
||||||
|
with open('statistics.txt', 'w') as file:
|
||||||
|
file.write(json.dumps(statistics)) # use `json.loads` to do the reverse
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user