Bugfixes and improvements in MVFTS and DEHO
This commit is contained in:
parent
9f41a49ad9
commit
876de2721d
@ -332,7 +332,7 @@ class FTS(object):
|
|||||||
|
|
||||||
dump = kwargs.get('dump', None)
|
dump = kwargs.get('dump', None)
|
||||||
|
|
||||||
num_batches = kwargs.get('num_batches', 10)
|
num_batches = kwargs.get('num_batches', None)
|
||||||
|
|
||||||
save = kwargs.get('save_model', False) # save model on disk
|
save = kwargs.get('save_model', False) # save model on disk
|
||||||
|
|
||||||
@ -345,6 +345,8 @@ class FTS(object):
|
|||||||
batch_save_interval = kwargs.get('batch_save_interval', 10)
|
batch_save_interval = kwargs.get('batch_save_interval', 10)
|
||||||
|
|
||||||
if distributed is not None and distributed:
|
if distributed is not None and distributed:
|
||||||
|
if num_batches is None:
|
||||||
|
num_batches = 10
|
||||||
|
|
||||||
if distributed == 'dispy':
|
if distributed == 'dispy':
|
||||||
from pyFTS.distributed import dispy
|
from pyFTS.distributed import dispy
|
||||||
|
@ -30,5 +30,4 @@ def get_dataframe():
|
|||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
@ -69,13 +69,16 @@ def initial_population(n, **kwargs):
|
|||||||
:param n: the size of the population
|
:param n: the size of the population
|
||||||
:return: a list with n random individuals
|
:return: a list with n random individuals
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
create_random_individual = kwargs.get('random_individual', random_genotype)
|
||||||
|
|
||||||
pop = []
|
pop = []
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
pop.append(random_genotype(**kwargs))
|
pop.append(create_random_individual(**kwargs))
|
||||||
return pop
|
return pop
|
||||||
|
|
||||||
|
|
||||||
def phenotype(individual, train, fts_method, parameters={}):
|
def phenotype(individual, train, fts_method, parameters={}, **kwargs):
|
||||||
"""
|
"""
|
||||||
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
||||||
|
|
||||||
@ -96,10 +99,10 @@ def phenotype(individual, train, fts_method, parameters={}):
|
|||||||
else:
|
else:
|
||||||
mf = Membership.trimf
|
mf = Membership.trimf
|
||||||
|
|
||||||
#if individual['partitioner'] == 1:
|
if individual['partitioner'] == 1:
|
||||||
partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
|
partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
|
||||||
#elif individual['partitioner'] == 2:
|
elif individual['partitioner'] == 2:
|
||||||
# partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf)
|
partitioner = Entropy.EntropyPartitioner(data=train, npart=individual['npart'], func=mf)
|
||||||
|
|
||||||
model = fts_method(partitioner=partitioner,
|
model = fts_method(partitioner=partitioner,
|
||||||
lags=individual['lags'],
|
lags=individual['lags'],
|
||||||
@ -243,6 +246,8 @@ def crossover(population, **kwargs):
|
|||||||
|
|
||||||
n = len(population) - 1
|
n = len(population) - 1
|
||||||
|
|
||||||
|
r1, r2 = 0, 0
|
||||||
|
while r1 == r2:
|
||||||
r1 = random.randint(0, n)
|
r1 = random.randint(0, n)
|
||||||
r2 = random.randint(0, n)
|
r2 = random.randint(0, n)
|
||||||
|
|
||||||
@ -304,9 +309,6 @@ def mutation(individual, **kwargs):
|
|||||||
:param pmut: individual probability o
|
:param pmut: individual probability o
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
import numpy.random
|
|
||||||
|
|
||||||
print('mutation')
|
|
||||||
|
|
||||||
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 4))))
|
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 4))))
|
||||||
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .5)))
|
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .5)))
|
||||||
@ -572,6 +574,7 @@ def execute(datasetname, dataset, **kwargs):
|
|||||||
:keyword parameters: dict with model specific arguments for fts_method
|
:keyword parameters: dict with model specific arguments for fts_method
|
||||||
:keyword elitism: A boolean value indicating if the best individual must always survive to next population
|
:keyword elitism: A boolean value indicating if the best individual must always survive to next population
|
||||||
:keyword initial_operator: a function that receives npop and return a random population with size npop
|
:keyword initial_operator: a function that receives npop and return a random population with size npop
|
||||||
|
:keyword random_individual: create an random genotype
|
||||||
:keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness
|
:keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness
|
||||||
:keyword selection_operator: a function that receives the whole population and return a selected individual
|
:keyword selection_operator: a function that receives the whole population and return a selected individual
|
||||||
:keyword crossover_operator: a function that receives the whole population and return a descendent individual
|
:keyword crossover_operator: a function that receives the whole population and return a descendent individual
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
"""
|
"""
|
||||||
Common facilities for hyperparameter tunning
|
Common facilities for hyperparameter optimization
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
def open_hyperparam_db(name):
|
def open_hyperparam_db(name):
|
||||||
"""
|
"""
|
||||||
Open a connection with a Sqlite database designed to store benchmark results.
|
Open a connection with a Sqlite database designed to store benchmark results.
|
||||||
|
@ -1,25 +1,58 @@
|
|||||||
"""
|
"""
|
||||||
Distributed Evolutionary Hyperparameter Optimization (DEHO) for MVFTS
|
Distributed Evolutionary Hyperparameter Optimization (DEHO) for MVFTS
|
||||||
|
|
||||||
|
variables: A list of dictionaries, where each dictionary contains
|
||||||
|
- name: Variable name
|
||||||
|
- data_label: data label
|
||||||
|
- type: common | seasonal
|
||||||
|
- seasonality:
|
||||||
|
|
||||||
|
target_variable
|
||||||
|
|
||||||
|
genotype: A dictionary containing
|
||||||
|
- variables: a list with the selected variables, each instance is the index of a variable in variables
|
||||||
|
- params: a list of dictionaries, where each dictionary contains {mf, npart, partitioner, alpha}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import math
|
import math
|
||||||
|
import time
|
||||||
import random
|
import random
|
||||||
|
import logging
|
||||||
|
from pyFTS.common import Util
|
||||||
|
from pyFTS.benchmarks import Measures
|
||||||
|
from pyFTS.partitioners import Grid, Entropy # , Huarng
|
||||||
|
from pyFTS.common import Membership
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
from pyFTS.hyperparam import Util as hUtil
|
||||||
|
from pyFTS.distributed import dispy as dUtil
|
||||||
from pyFTS.hyperparam import Evolutionary
|
from pyFTS.hyperparam import Evolutionary
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts, variable
|
||||||
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
|
||||||
def genotype(vars, params, f1, f2):
|
def genotype(vars, params, tparams, f1=None, f2=None):
|
||||||
"""
|
"""
|
||||||
Create the individual genotype
|
Create the individual genotype
|
||||||
|
|
||||||
:param vars: dictionary with variable names, types, and other parameters
|
:param variables: dictionary with explanatory variable names, types, and other parameters
|
||||||
:param params: dictionary with variable hyperparameters var: {mf, npart, partitioner, alpha}
|
:param params: dictionary with variable hyperparameters var: {mf, npart, partitioner, alpha}
|
||||||
|
:param tparams: dictionary with target variable hyperparameters var: {mf, npart, partitioner, alpha}
|
||||||
:param f1: accuracy fitness value
|
:param f1: accuracy fitness value
|
||||||
:param f2: parsimony fitness value
|
:param f2: parsimony fitness value
|
||||||
:return: the genotype, a dictionary with all hyperparameters
|
:return: the genotype, a dictionary with all hyperparameters
|
||||||
"""
|
"""
|
||||||
ind = dict(vars=vars, params=params, f1=f1, f2=f2)
|
ind = dict(
|
||||||
|
explanatory_variables=vars,
|
||||||
|
explanatory_params=params,
|
||||||
|
target_params = tparams,
|
||||||
|
f1=f1,
|
||||||
|
f2=f2
|
||||||
|
)
|
||||||
return ind
|
return ind
|
||||||
|
|
||||||
|
|
||||||
@ -29,21 +62,365 @@ def random_genotype(**kwargs):
|
|||||||
|
|
||||||
:return: the genotype, a dictionary with all hyperparameters
|
:return: the genotype, a dictionary with all hyperparameters
|
||||||
"""
|
"""
|
||||||
order = random.randint(1, 3)
|
vars = kwargs.get('variables',None)
|
||||||
lags = [k for k in np.arange(1, order+1)]
|
|
||||||
|
tvar = kwargs.get('target_variable',None)
|
||||||
|
|
||||||
|
l = len(vars)
|
||||||
|
|
||||||
|
nvar = np.random.randint(1,l,1) # the number of variables
|
||||||
|
|
||||||
|
explanatory_variables = np.unique(np.random.randint(0, l, nvar)).tolist() #indexes of the variables
|
||||||
|
|
||||||
|
explanatory_params = []
|
||||||
|
|
||||||
|
for v in explanatory_variables:
|
||||||
|
param = {
|
||||||
|
'mf': random.randint(1, 4),
|
||||||
|
'npart': random.randint(10, 50),
|
||||||
|
'partitioner': 1, #random.randint(1, 2),
|
||||||
|
'alpha': random.uniform(0, .5)
|
||||||
|
}
|
||||||
|
explanatory_params.append(param)
|
||||||
|
|
||||||
|
target_params = {
|
||||||
|
'mf': random.randint(1, 4),
|
||||||
|
'npart': random.randint(10, 50),
|
||||||
|
'partitioner': 1, #random.randint(1, 2),
|
||||||
|
'alpha': random.uniform(0, .5)
|
||||||
|
}
|
||||||
|
|
||||||
return genotype(
|
return genotype(
|
||||||
random.randint(1, 4),
|
explanatory_variables,
|
||||||
random.randint(10, 100),
|
explanatory_params,
|
||||||
random.randint(1, 2),
|
target_params
|
||||||
order,
|
|
||||||
random.uniform(0, .5),
|
|
||||||
lags,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def phenotype(individual, train, fts_method, parameters={}, **kwargs):
|
||||||
|
vars = kwargs.get('variables', None)
|
||||||
|
tvar = kwargs.get('target_variable', None)
|
||||||
|
|
||||||
def phenotype(individual, train, fts_method, parameters={}):
|
explanatory_vars = []
|
||||||
pass
|
|
||||||
|
|
||||||
|
for ct, vix in enumerate(individual['explanatory_variables']):
|
||||||
|
var = vars[vix]
|
||||||
|
params = individual['explanatory_params'][ct]
|
||||||
|
|
||||||
|
mf = phenotype_mf(params)
|
||||||
|
|
||||||
|
partitioner = phenotype_partitioner(params)
|
||||||
|
|
||||||
|
if var['type'] == 'common':
|
||||||
|
tmp = variable.Variable(var['name'], data_label=var['data_label'], alias=var['name'], partitioner=partitioner,
|
||||||
|
partitioner_specific={'mf': mf}, npart=params['npart'], alpha_cut=params['alpha'],
|
||||||
|
data=train)
|
||||||
|
elif var['type'] == 'seasonal':
|
||||||
|
sp = {'seasonality': var['seasonality'], 'mf': mf }
|
||||||
|
tmp = variable.Variable(var['name'], data_label=var['data_label'], alias=var['name'],
|
||||||
|
partitioner=seasonal.TimeGridPartitioner,
|
||||||
|
partitioner_specific=sp, npart=params['npart'], alpha_cut=params['alpha'],
|
||||||
|
data=train)
|
||||||
|
|
||||||
|
explanatory_vars.append(tmp)
|
||||||
|
|
||||||
|
tparams = individual['target_params']
|
||||||
|
|
||||||
|
partitioner = phenotype_partitioner(tparams)
|
||||||
|
mf = phenotype_mf(tparams)
|
||||||
|
|
||||||
|
target_var = variable.Variable(tvar['name'], data_label=tvar['data_label'], alias=tvar['name'], partitioner=partitioner,
|
||||||
|
partitioner_specific={'mf': mf}, npart=tparams['npart'], alpha_cut=tparams['alpha'],
|
||||||
|
data=train)
|
||||||
|
|
||||||
|
model = fts_method(explanatory_variables=explanatory_vars, target_variable=target_var, **parameters)
|
||||||
|
model.fit(train, **parameters)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def phenotype_partitioner(params):
|
||||||
|
if params['partitioner'] == 1:
|
||||||
|
partitioner = Grid.GridPartitioner
|
||||||
|
elif params['partitioner'] == 2:
|
||||||
|
partitioner = Entropy.EntropyPartitioner
|
||||||
|
return partitioner
|
||||||
|
|
||||||
|
|
||||||
|
def phenotype_mf(params):
|
||||||
|
if params['mf'] == 1:
|
||||||
|
mf = Membership.trimf
|
||||||
|
elif params['mf'] == 2:
|
||||||
|
mf = Membership.trapmf
|
||||||
|
elif params['mf'] == 3 and params['partitioner'] != 2:
|
||||||
|
mf = Membership.gaussmf
|
||||||
|
else:
|
||||||
|
mf = Membership.trimf
|
||||||
|
return mf
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(dataset, individual, **kwargs):
|
||||||
|
"""
|
||||||
|
Evaluate an individual using a sliding window cross validation over the dataset.
|
||||||
|
|
||||||
|
:param dataset: Evaluation dataset
|
||||||
|
:param individual: genotype to be tested
|
||||||
|
:param window_size: The length of scrolling window for train/test on dataset
|
||||||
|
:param train_rate: The train/test split ([0,1])
|
||||||
|
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||||
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
|
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
||||||
|
"""
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
from pyFTS.common import Util
|
||||||
|
from pyFTS.benchmarks import Measures
|
||||||
|
from pyFTS.hyperparam.Evolutionary import __measures
|
||||||
|
from pyFTS.hyperparam.mvfts import phenotype
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts,grid, granular, common
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
window_size = kwargs.get('window_size', 800)
|
||||||
|
train_rate = kwargs.get('train_rate', .8)
|
||||||
|
increment_rate = kwargs.get('increment_rate', .2)
|
||||||
|
fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS)
|
||||||
|
parameters = kwargs.get('parameters',{})
|
||||||
|
tvar = kwargs.get('target_variable', None)
|
||||||
|
|
||||||
|
if individual['f1'] is not None and individual['f2'] is not None:
|
||||||
|
return { key: individual[key] for key in __measures }
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
lengths = []
|
||||||
|
|
||||||
|
kwargs2 = kwargs.copy()
|
||||||
|
kwargs2.pop('fts_method')
|
||||||
|
if 'parameters' in kwargs2:
|
||||||
|
kwargs2.pop('parameters')
|
||||||
|
|
||||||
|
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
model = phenotype(individual, train, fts_method=fts_method, parameters=parameters, **kwargs2)
|
||||||
|
|
||||||
|
forecasts = model.predict(test)
|
||||||
|
|
||||||
|
rmse = Measures.rmse(test[tvar['data_label']].values[model.max_lag:], forecasts[:-1])
|
||||||
|
lengths.append(len(model))
|
||||||
|
|
||||||
|
errors.append(rmse)
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
logging.exception("Error")
|
||||||
|
|
||||||
|
lengths.append(np.nan)
|
||||||
|
errors.append(np.nan)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_rmse = np.nanmean(errors)
|
||||||
|
_len = np.nanmean(lengths)
|
||||||
|
|
||||||
|
f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
|
||||||
|
f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)])
|
||||||
|
|
||||||
|
return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len }
|
||||||
|
except Exception as ex:
|
||||||
|
logging.exception("Error")
|
||||||
|
return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
|
||||||
|
|
||||||
|
|
||||||
|
def crossover(population, **kwargs):
|
||||||
|
"""
|
||||||
|
Crossover operation between two parents
|
||||||
|
|
||||||
|
:param population: the original population
|
||||||
|
:return: a genotype
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
|
||||||
|
n = len(population) - 1
|
||||||
|
|
||||||
|
r1,r2 = 0,0
|
||||||
|
while r1 == r2:
|
||||||
|
r1 = random.randint(0, n)
|
||||||
|
r2 = random.randint(0, n)
|
||||||
|
|
||||||
|
if population[r1]['f1'] < population[r2]['f1']:
|
||||||
|
best = population[r1]
|
||||||
|
worst = population[r2]
|
||||||
|
else:
|
||||||
|
best = population[r2]
|
||||||
|
worst = population[r1]
|
||||||
|
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
nvar = len(best['explanatory_variables']) if rnd < .7 else len(worst['explanatory_variables'])
|
||||||
|
|
||||||
|
explanatory_variables = []
|
||||||
|
explanatory_params = []
|
||||||
|
for ct in np.arange(nvar):
|
||||||
|
if ct < len(best['explanatory_variables']) and ct < len(worst['explanatory_variables']):
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
ix = best['explanatory_variables'][ct] if rnd < .7 else worst['explanatory_variables'][ct]
|
||||||
|
elif ct < len(best['explanatory_variables']):
|
||||||
|
ix = best['explanatory_variables'][ct]
|
||||||
|
elif ct < len(worst['explanatory_variables']):
|
||||||
|
ix = worst['explanatory_variables'][ct]
|
||||||
|
|
||||||
|
if ix in explanatory_variables:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ix in best['explanatory_variables'] and ix in worst['explanatory_variables']:
|
||||||
|
bix = best['explanatory_variables'].index(ix)
|
||||||
|
wix = worst['explanatory_variables'].index(ix)
|
||||||
|
param = crossover_variable_params(best['explanatory_params'][bix], worst['explanatory_params'][wix])
|
||||||
|
elif ix in best['explanatory_variables']:
|
||||||
|
bix = best['explanatory_variables'].index(ix)
|
||||||
|
param = best['explanatory_params'][bix]
|
||||||
|
elif ix in worst['explanatory_variables']:
|
||||||
|
wix = worst['explanatory_variables'].index(ix)
|
||||||
|
param = worst['explanatory_params'][wix]
|
||||||
|
|
||||||
|
explanatory_variables.append(ix)
|
||||||
|
explanatory_params.append(param)
|
||||||
|
|
||||||
|
tparams = crossover_variable_params(best['target_params'], worst['target_params'])
|
||||||
|
|
||||||
|
descendent = genotype(explanatory_variables, explanatory_params, tparams, None, None)
|
||||||
|
|
||||||
|
return descendent
|
||||||
|
|
||||||
|
|
||||||
|
def crossover_variable_params(best, worst):
|
||||||
|
npart = int(round(.7 * best['npart'] + .3 * worst['npart']))
|
||||||
|
alpha = float(.7 * best['alpha'] + .3 * worst['alpha'])
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
mf = best['mf'] if rnd < .7 else worst['mf']
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
partitioner = best['partitioner'] if rnd < .7 else worst['partitioner']
|
||||||
|
param = {'partitioner': partitioner, 'npart': npart, 'alpha': alpha, 'mf': mf}
|
||||||
|
return param
|
||||||
|
|
||||||
|
def mutation(individual, **kwargs):
|
||||||
|
"""
|
||||||
|
Mutation operator
|
||||||
|
|
||||||
|
:param individual: an individual genotype
|
||||||
|
:param pmut: individual probability o
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
for ct in np.arange(len(individual['explanatory_variables'])):
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
if rnd > .5:
|
||||||
|
mutate_variable_params(individual['explanatory_params'][ct])
|
||||||
|
|
||||||
|
rnd = random.uniform(0, 1)
|
||||||
|
if rnd > .5:
|
||||||
|
mutate_variable_params(individual['target_params'])
|
||||||
|
|
||||||
|
individual['f1'] = None
|
||||||
|
individual['f2'] = None
|
||||||
|
|
||||||
|
return individual
|
||||||
|
|
||||||
|
|
||||||
|
def mutate_variable_params(param):
|
||||||
|
param['npart'] = min(50, max(3, int(param['npart'] + np.random.normal(0, 4))))
|
||||||
|
param['alpha'] = min(.5, max(0, param['alpha'] + np.random.normal(0, .5)))
|
||||||
|
param['mf'] = random.randint(1, 4)
|
||||||
|
param['partitioner'] = random.randint(1, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def execute(datasetname, dataset, **kwargs):
|
||||||
|
"""
|
||||||
|
Batch execution of Distributed Evolutionary Hyperparameter Optimization (DEHO) for monovariate methods
|
||||||
|
|
||||||
|
:param datasetname:
|
||||||
|
:param dataset: The time series to optimize the FTS
|
||||||
|
:keyword database_file:
|
||||||
|
:keyword experiments:
|
||||||
|
:keyword distributed:
|
||||||
|
:keyword ngen: An integer value with the maximum number of generations, default value: 30
|
||||||
|
:keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7
|
||||||
|
:keyword npop: An integer value with the population size, default value: 20
|
||||||
|
:keyword pcross: A float value between 0 and 1 with the probability of crossover, default: .5
|
||||||
|
:keyword psel: A float value between 0 and 1 with the probability of selection, default: .5
|
||||||
|
:keyword pmut: A float value between 0 and 1 with the probability of mutation, default: .3
|
||||||
|
:keyword fts_method: The MVFTS method to optimize
|
||||||
|
:keyword parameters: dict with model specific arguments for fts_method
|
||||||
|
:keyword elitism: A boolean value indicating if the best individual must always survive to next population
|
||||||
|
:keyword selection_operator: a function that receives the whole population and return a selected individual
|
||||||
|
:keyword window_size: An integer value with the the length of scrolling window for train/test on dataset
|
||||||
|
:keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1])
|
||||||
|
:keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window,
|
||||||
|
relative to the window_size ([0,1])
|
||||||
|
:keyword collect_statistics: A boolean value indicating to collect statistics for each generation
|
||||||
|
:keyword distributed: A value indicating it the execution will be local and sequential (distributed=False),
|
||||||
|
or parallel and distributed (distributed='dispy' or distributed='spark')
|
||||||
|
:keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node
|
||||||
|
:return: the best genotype
|
||||||
|
"""
|
||||||
|
|
||||||
|
experiments = kwargs.get('experiments', 30)
|
||||||
|
|
||||||
|
distributed = kwargs.get('distributed', False)
|
||||||
|
|
||||||
|
fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||||
|
shortname = str(fts_method.__module__).split('.')[-1]
|
||||||
|
|
||||||
|
kwargs['mutation_operator'] = mutation
|
||||||
|
kwargs['crossover_operator'] = crossover
|
||||||
|
kwargs['evaluation_operator'] = evaluate
|
||||||
|
kwargs['random_individual'] = random_genotype
|
||||||
|
|
||||||
|
if distributed == 'dispy':
|
||||||
|
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||||
|
cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
|
||||||
|
kwargs['cluster'] = cluster
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
for i in np.arange(experiments):
|
||||||
|
print("Experiment {}".format(i))
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
ret, statistics = Evolutionary.GeneticAlgorithm(dataset, **kwargs)
|
||||||
|
end = time.time()
|
||||||
|
ret['time'] = end - start
|
||||||
|
experiment = {'individual': ret, 'statistics': statistics}
|
||||||
|
|
||||||
|
ret = process_experiment(shortname, experiment, datasetname)
|
||||||
|
|
||||||
|
if distributed == 'dispy':
|
||||||
|
dUtil.stop_dispy_cluster(cluster, http_server)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def process_experiment(fts_method, result, datasetname):
|
||||||
|
"""
|
||||||
|
Persist the results of an DEHO execution in sqlite database (best hyperparameters) and json file (generation statistics)
|
||||||
|
|
||||||
|
:param fts_method:
|
||||||
|
:param result:
|
||||||
|
:param datasetname:
|
||||||
|
:param conn:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
log_result(datasetname, fts_method, result['individual'])
|
||||||
|
persist_statistics(datasetname, result['statistics'])
|
||||||
|
return result['individual']
|
||||||
|
|
||||||
|
|
||||||
|
def persist_statistics(datasetname, statistics):
|
||||||
|
import json
|
||||||
|
with open('statistics_{}.json'.format(datasetname), 'w') as file:
|
||||||
|
file.write(json.dumps(statistics))
|
||||||
|
|
||||||
|
|
||||||
|
def log_result(datasetname, fts_method, result):
|
||||||
|
import json
|
||||||
|
with open('result_{}{}.json'.format(fts_method,datasetname), 'w') as file:
|
||||||
|
file.write(json.dumps(result))
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
@ -19,7 +19,7 @@ class FLR(object):
|
|||||||
self.RHS = set
|
self.RHS = set
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "{} -> {}".format([self.LHS[k] for k in self.LHS.keys()], self.RHS)
|
return "{} -> {}".format([k for k in self.LHS.values()], self.RHS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ from pyFTS.partitioners import Grid
|
|||||||
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
||||||
from itertools import product
|
from itertools import product
|
||||||
from types import LambdaType
|
from types import LambdaType
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -75,19 +76,19 @@ class MVFTS(fts.FTS):
|
|||||||
for path in product_dict(**lags):
|
for path in product_dict(**lags):
|
||||||
flr = MVFLR.FLR()
|
flr = MVFLR.FLR()
|
||||||
|
|
||||||
for var, fset in path.items():
|
flr.LHS = path
|
||||||
flr.set_lhs(var, fset)
|
|
||||||
|
#for var, fset in path.items():
|
||||||
|
# flr.set_lhs(var, fset)
|
||||||
|
|
||||||
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
||||||
flrs.append(flr)
|
flrs.append(flr)
|
||||||
else:
|
|
||||||
print(flr)
|
|
||||||
|
|
||||||
return flrs
|
return flrs
|
||||||
|
|
||||||
def generate_flrs(self, data):
|
def generate_flrs(self, data):
|
||||||
flrs = []
|
flrs = []
|
||||||
for ct in range(1, len(data.index)):
|
for ct in np.arange(1, len(data.index)):
|
||||||
ix = data.index[ct-1]
|
ix = data.index[ct-1]
|
||||||
data_point = self.format_data( data.loc[ix] )
|
data_point = self.format_data( data.loc[ix] )
|
||||||
|
|
||||||
@ -99,8 +100,9 @@ class MVFTS(fts.FTS):
|
|||||||
|
|
||||||
for flr in tmp_flrs:
|
for flr in tmp_flrs:
|
||||||
for v, s in target:
|
for v, s in target:
|
||||||
flr.set_rhs(s)
|
new_flr = deepcopy(flr)
|
||||||
flrs.append(flr)
|
new_flr.set_rhs(s)
|
||||||
|
flrs.append(new_flr)
|
||||||
|
|
||||||
return flrs
|
return flrs
|
||||||
|
|
||||||
@ -113,7 +115,6 @@ class MVFTS(fts.FTS):
|
|||||||
|
|
||||||
self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
|
self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
|
||||||
|
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
ndata = self.apply_transformations(data)
|
ndata = self.apply_transformations(data)
|
||||||
|
@ -72,55 +72,54 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
partlen = self.season.value / self.partitions
|
partlen = self.season.value / self.partitions
|
||||||
pl2 = partlen / 2
|
pl2 = partlen / 2
|
||||||
|
|
||||||
count = 0
|
for count, midpoint in enumerate(np.arange(self.min, self.max, partlen)):
|
||||||
for c in np.arange(self.min, self.max, partlen):
|
|
||||||
set_name = self.get_name(count)
|
set_name = self.get_name(count)
|
||||||
if self.membership_function == Membership.trimf:
|
if self.membership_function == Membership.trimf:
|
||||||
if c == self.min:
|
if midpoint == self.min or count == 0:
|
||||||
tmp = Composite(set_name, superset=True, **kwargs)
|
tmp = Composite(set_name, superset=True, **kwargs)
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[self.season.value - pl2, self.season.value,
|
[self.season.value - pl2, self.season.value,
|
||||||
self.season.value + pl2], self.season.value, alpha=1,
|
self.season.value + pl2], self.season.value, alpha=1,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[c - partlen, c, c + partlen], c,
|
[midpoint - partlen, midpoint, midpoint + partlen], midpoint,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.centroid = c
|
tmp.centroid = midpoint
|
||||||
sets[set_name] = tmp
|
sets[set_name] = tmp
|
||||||
elif c == self.max - partlen:
|
elif midpoint == self.max - partlen or count == self.partitions - 1:
|
||||||
tmp = Composite(set_name, superset=True, **kwargs)
|
tmp = Composite(set_name, superset=True, **kwargs)
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[-pl2, 0.0,
|
[-pl2, 0.0,
|
||||||
pl2], 0.0, alpha=1,
|
pl2], 0.0, alpha=1,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[c - partlen, c, c + partlen], c,
|
[midpoint - partlen, midpoint, midpoint + partlen], midpoint,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.centroid = c
|
tmp.centroid = midpoint
|
||||||
sets[set_name] = tmp
|
sets[set_name] = tmp
|
||||||
else:
|
else:
|
||||||
sets[set_name] = FuzzySet(self.season, set_name, Membership.trimf,
|
sets[set_name] = FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[c - partlen, c, c + partlen], c,
|
[midpoint - partlen, midpoint, midpoint + partlen], midpoint,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
elif self.membership_function == Membership.gaussmf:
|
elif self.membership_function == Membership.gaussmf:
|
||||||
sets[set_name] = FuzzySet(self.season, set_name, Membership.gaussmf, [c, partlen / 3], c,
|
sets[set_name] = FuzzySet(self.season, set_name, Membership.gaussmf, [midpoint, partlen / 3], midpoint,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
elif self.membership_function == Membership.trapmf:
|
elif self.membership_function == Membership.trapmf:
|
||||||
q = partlen / 4
|
q = partlen / 4
|
||||||
if c == self.min:
|
if midpoint == self.min:
|
||||||
tmp = Composite(set_name, superset=True)
|
tmp = Composite(set_name, superset=True)
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[self.season.value - pl2, self.season.value,
|
[self.season.value - pl2, self.season.value,
|
||||||
self.season.value + 0.0000001], 0,
|
self.season.value + 0.0000001], 0,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trapmf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trapmf,
|
||||||
[c - partlen, c - q, c + q, c + partlen], c,
|
[midpoint - partlen, midpoint - q, midpoint + q, midpoint + partlen], midpoint,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.centroid = c
|
tmp.centroid = midpoint
|
||||||
sets[set_name] = tmp
|
sets[set_name] = tmp
|
||||||
else:
|
else:
|
||||||
sets[set_name] = FuzzySet(self.season, set_name, Membership.trapmf,
|
sets[set_name] = FuzzySet(self.season, set_name, Membership.trapmf,
|
||||||
[c - partlen, c - q, c + q, c + partlen], c,
|
[midpoint - partlen, midpoint - q, midpoint + q, midpoint + partlen], midpoint,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
@ -169,9 +169,12 @@ class Partitioner(object):
|
|||||||
if method == 'fuzzy' and mode == 'vector':
|
if method == 'fuzzy' and mode == 'vector':
|
||||||
return mv
|
return mv
|
||||||
elif method == 'fuzzy' and mode == 'sets':
|
elif method == 'fuzzy' and mode == 'sets':
|
||||||
|
try:
|
||||||
ix = np.ravel(np.argwhere(mv > 0.))
|
ix = np.ravel(np.argwhere(mv > 0.))
|
||||||
sets = [self.ordered_sets[i] for i in ix]
|
sets = [self.ordered_sets[i] for i in ix if i < self.partitions]
|
||||||
return sets
|
return sets
|
||||||
|
except Exception as ex:
|
||||||
|
return None
|
||||||
elif method == 'maximum' and mode == 'sets':
|
elif method == 'maximum' and mode == 'sets':
|
||||||
mx = max(mv)
|
mx = max(mv)
|
||||||
ix = np.ravel(np.argwhere(mv == mx))
|
ix = np.ravel(np.argwhere(mv == mx))
|
||||||
|
@ -1,20 +1,23 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pyFTS.hyperparam import GridSearch, Evolutionary
|
from pyFTS.hyperparam import GridSearch, Evolutionary, mvfts as deho_mv
|
||||||
from pyFTS.models import pwfts
|
from pyFTS.models import pwfts
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts
|
||||||
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
|
||||||
def get_dataset():
|
def get_dataset():
|
||||||
from pyFTS.data import SONDA
|
#from pyFTS.data import SONDA
|
||||||
#from pyFTS.data import Malaysia
|
from pyFTS.data import Malaysia
|
||||||
|
|
||||||
data = [k for k in SONDA.get_data('ws_10m') if k > 0.1 and k != np.nan and k is not None]
|
#data = [k for k in SONDA.get_data('ws_10m') if k > 0.1 and k != np.nan and k is not None]
|
||||||
data = [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
#data = [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
#data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';')
|
#data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';')
|
||||||
#data = Malaysia.get_data('temperature')
|
data = Malaysia.get_dataframe()
|
||||||
|
data['time'] = pd.to_datetime(data["time"], format='%m/%d/%y %I:%M %p')
|
||||||
|
|
||||||
return 'SONDA.ws_10m', data
|
#return 'SONDA.ws_10m', data
|
||||||
#return 'Malaysia.temperature', data #train, test
|
return 'Malaysia', data.iloc[:5000] #train, test
|
||||||
#return 'Malaysia.temperature', data # train, test
|
#return 'Malaysia.temperature', data # train, test
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -43,6 +46,30 @@ datsetname, dataset = get_dataset()
|
|||||||
#GridSearch.execute(hyperparams, datsetname, dataset, nodes=nodes,
|
#GridSearch.execute(hyperparams, datsetname, dataset, nodes=nodes,
|
||||||
# window_size=10000, train_rate=.9, increment_rate=1,)
|
# window_size=10000, train_rate=.9, increment_rate=1,)
|
||||||
|
|
||||||
|
explanatory_variables =[
|
||||||
|
{'name': 'Load', 'data_label': 'load', 'type': 'common'},
|
||||||
|
{'name': 'Temperature', 'data_label': 'temperature', 'type': 'common'},
|
||||||
|
{'name': 'Daily', 'data_label': 'time', 'type': 'seasonal', 'seasonality': DateTime.minute_of_day, 'npart': 24 },
|
||||||
|
{'name': 'Weekly', 'data_label': 'time', 'type': 'seasonal', 'seasonality': DateTime.day_of_week, 'npart': 7 },
|
||||||
|
#{'name': 'Monthly', 'data_label': 'time', 'type': 'seasonal', 'seasonality': DateTime.day_of_month, 'npart': 4 },
|
||||||
|
{'name': 'Yearly', 'data_label': 'time', 'type': 'seasonal', 'seasonality': DateTime.day_of_year, 'npart': 12 }
|
||||||
|
]
|
||||||
|
|
||||||
|
target_variable = {'name': 'Load', 'data_label': 'load', 'type': 'common'}
|
||||||
|
nodes=['192.168.28.38']
|
||||||
|
deho_mv.execute(datsetname, dataset,
|
||||||
|
ngen=10, npop=10,psel=0.6, pcross=.5, pmut=.3,
|
||||||
|
window_size=5000, train_rate=.9, increment_rate=1,
|
||||||
|
experiments=1,
|
||||||
|
fts_method=wmvfts.WeightedMVFTS,
|
||||||
|
variables=explanatory_variables,
|
||||||
|
target_variable=target_variable,
|
||||||
|
distributed='dispy', nodes=nodes,
|
||||||
|
#parameters=dict(num_batches=5)
|
||||||
|
#parameters=dict(distributed='dispy', nodes=nodes, num_batches=5)
|
||||||
|
)
|
||||||
|
|
||||||
|
'''
|
||||||
ret = Evolutionary.execute(datsetname, dataset,
|
ret = Evolutionary.execute(datsetname, dataset,
|
||||||
ngen=30, npop=20,psel=0.6, pcross=.5, pmut=.3,
|
ngen=30, npop=20,psel=0.6, pcross=.5, pmut=.3,
|
||||||
window_size=10000, train_rate=.9, increment_rate=.3,
|
window_size=10000, train_rate=.9, increment_rate=.3,
|
||||||
@ -50,7 +77,7 @@ ret = Evolutionary.execute(datsetname, dataset,
|
|||||||
fts_method=pwfts.ProbabilisticWeightedFTS,
|
fts_method=pwfts.ProbabilisticWeightedFTS,
|
||||||
database_file='experiments.db',
|
database_file='experiments.db',
|
||||||
distributed='dispy', nodes=nodes)
|
distributed='dispy', nodes=nodes)
|
||||||
|
'''
|
||||||
#res = GridSearch.cluster_method({'mf':1, 'partitioner': 1, 'npart': 10, 'lags':[1], 'alpha': 0.0, 'order': 1},
|
#res = GridSearch.cluster_method({'mf':1, 'partitioner': 1, 'npart': 10, 'lags':[1], 'alpha': 0.0, 'order': 1},
|
||||||
# dataset, window_size = 10000, train_rate = .9, increment_rate = 1)
|
# dataset, window_size = 10000, train_rate = .9, increment_rate = 1)
|
||||||
|
|
||||||
|
@ -16,33 +16,39 @@ from pyFTS.models.seasonal.common import DateTime
|
|||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
from pyFTS.models.multivariate import common, variable, mvfts
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.common import Membership
|
from pyFTS.common import Membership
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from pyFTS.data import NASDAQ
|
from pyFTS.data import Malaysia, Enrollments
|
||||||
|
|
||||||
train_data = NASDAQ.get_data()[:2000]
|
df = Malaysia.get_dataframe()
|
||||||
test_data = NASDAQ.get_data()[2000:3000]
|
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
||||||
|
|
||||||
from pyFTS.partitioners import Grid
|
train_mv = df.iloc[:4500]
|
||||||
|
test_mv = df.iloc[4500:5000]
|
||||||
|
|
||||||
partitioner = Grid.GridPartitioner(data=train_data, npart=35)
|
del(df)
|
||||||
|
|
||||||
from pyFTS.models import pwfts, hofts
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||||
|
|
||||||
#model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner, order=2)
|
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
#from pyFTS.models.incremental import TimeVariant
|
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
||||||
|
|
||||||
#model = TimeVariant.Retrainer(partitioner_method=Grid.GridPartitioner, partitioner_params={'npart': 35},
|
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
|
||||||
# fts_method=pwfts.ProbabilisticWeightedFTS, fts_params={}, order=2 ,
|
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
|
||||||
# batch_size=100, window_length=500)
|
data=train_mv, alpha_cut=.3)
|
||||||
|
|
||||||
model = hofts.HighOrderFTS(partitioner=partitioner, order=2)
|
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||||
model.fit(train_data)
|
partitioner=Grid.GridPartitioner, npart=20, func=Membership.trimf,
|
||||||
|
data=train_mv, alpha_cut=.3)
|
||||||
|
|
||||||
print(model.predict(test_data, steps_ahead=10))
|
model = mvfts.MVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload)
|
||||||
|
#fs = Grid.GridPartitioner(data=Enrollments.get_data(), npart=10)
|
||||||
|
#print(fs)
|
||||||
|
#model = pwfts.ProbabilisticWeightedFTS(partitioner=vload.partitioner, order=2)
|
||||||
|
model.fit(train_mv) #, num_batches=10) #, distributed='dispy',nodes=['192.168.0.110'])
|
||||||
|
#model.fit(Enrollments.get_data()) #, num_batches=20) #, distributed='dispy',nodes=['192.168.0.110'])
|
||||||
|
|
||||||
|
print(model)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
def sample_by_hour(data):
|
def sample_by_hour(data):
|
||||||
|
Loading…
Reference in New Issue
Block a user