Fuzzy Cognitive Methods for FTS

This commit is contained in:
Petrônio Cândido 2019-04-26 17:28:43 -03:00
parent ebdcfe228f
commit 27b7cf59f8
6 changed files with 526 additions and 1 deletions

18
pyFTS/fcm/Activations.py Normal file
View File

@ -0,0 +1,18 @@
import numpy as np
def step(x):
if x <= 0:
return 0
else:
return 1
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def softmax(x):
mvs = sum([np.exp(k) for k in x.flatten()])
return np.array([np.exp(k)/mvs for k in x.flatten()])

425
pyFTS/fcm/GA.py Normal file
View File

@ -0,0 +1,425 @@
import numpy as np
import pandas as pd
import math
import time
from functools import reduce
from operator import itemgetter
import dispy
import random
from pyFTS.common import Util
from pyFTS.benchmarks import Measures
from pyFTS.partitioners import Grid, Entropy # , Huarng
from pyFTS.models import hofts
from pyFTS.common import Membership
from pyFTS.hyperparam import Util as hUtil
from pyFTS.distributed import dispy as dUtil
from pyFTS.fcm import common, fts
parameters = {}
#
def genotype():
'''
Create the individual genotype
:param mf: membership function
:param npart: number of partitions
:param partitioner: partitioner method
:param order: model order
:param alpha: alpha-cut
:param lags: array with lag indexes
:param f1: accuracy fitness value
:param f2: parsimony fitness value
:return: the genotype, a dictionary with all hyperparameters
'''
num_concepts = parameters['num_concepts']
order = parameters['order']
ind = dict(weights=[np.random.normal(0,.5,(num_concepts,num_concepts)) for k in range(order)])
return ind
def random_genotype():
'''
Create random genotype
:return: the genotype, a dictionary with all hyperparameters
'''
return genotype()
#
def initial_population(n):
'''
Create a random population of size n
:param n: the size of the population
:return: a list with n random individuals
'''
pop = []
for i in range(n):
pop.append(random_genotype())
return pop
def phenotype(individual, train):
'''
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
:param individual: a genotype
:param train: the training dataset
:param parameters: dict with model specific arguments for fit method.
:return: a fitted FTS model
'''
partitioner = parameters['partitioner']
order = parameters['order']
model = fts.FCM_FTS(partitioner=partitioner, order=order)
model.fcm.weights = individual['weights']
return model
def evaluate(dataset, individual, **kwargs):
'''
Evaluate an individual using a sliding window cross validation over the dataset.
:param dataset: Evaluation dataset
:param individual: genotype to be tested
:param window_size: The length of scrolling window for train/test on dataset
:param train_rate: The train/test split ([0,1])
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
:param parameters: dict with model specific arguments for fit method.
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
'''
from pyFTS.common import Util
from pyFTS.benchmarks import Measures
from pyFTS.fcm.GA import phenotype
import numpy as np
window_size = kwargs.get('window_size', 800)
train_rate = kwargs.get('train_rate', .8)
increment_rate = kwargs.get('increment_rate', .2)
#parameters = kwargs.get('parameters',{})
errors = []
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
model = phenotype(individual, train)
if model is None:
raise Exception("Phenotype returned None")
model.uod_clip = False
forecasts = model.predict(test)
rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model)
errors.append(rmse)
_rmse = np.nanmean(errors)
#print("EVALUATION {}".format(individual))
return {'rmse': _rmse}
def tournament(population, objective):
'''
Simple tournament selection strategy.
:param population: the population
:param objective: the objective to be considered on tournament
:return:
'''
n = len(population) - 1
r1 = random.randint(0, n) if n > 2 else 0
r2 = random.randint(0, n) if n > 2 else 1
ix = r1 if population[r1][objective] < population[r2][objective] else r2
return population[ix]
def crossover(parents):
'''
Crossover operation between two parents
:param parents: a list with two genotypes
:return: a genotype
'''
import random
descendent = genotype()
for k in range(parameters['order']):
new_weight = []
weights1 = parents[0]['weights'][k]
weights2 = parents[1]['weights'][k]
for (row, col), a in np.ndenumerate(weights1):
new_weight.append(.7*weights1[row, col] + .3*weights2[row, col] )
descendent['weights'][k] = np.array(new_weight).reshape(weights1.shape)
return descendent
def mutation(individual, pmut):
'''
Mutation operator
:param population:
:return:
'''
import numpy.random
for k in range(parameters['order']):
(rows, cols) = individual['weights'][k].shape
rnd = random.uniform(0, 1)
if rnd < pmut:
num_mutations = random.randint(1, parameters['num_concepts']**2)
for q in np.arange(0, num_mutations):
row = random.randint(0, rows-1)
col = random.randint(0, cols-1)
individual['weights'][k][row, col] += np.random.normal(0, .5, 1)
individual['weights'][k][row, col] = np.clip(individual['weights'][k][row, col], -1, 1)
return individual
def elitism(population, new_population):
'''
Elitism operation, always select the best individual of the population and discard the worst
:param population:
:param new_population:
:return:
'''
population = sorted(population, key=itemgetter('rmse'))
best = population[0]
new_population = sorted(new_population, key=itemgetter('rmse'))
if new_population[0]["rmse"] > best["rmse"]:
new_population.insert(0,best)
return new_population
def GeneticAlgorithm(dataset, **kwargs):
'''
Genetic algoritm for hyperparameter optimization
:param dataset:
:param ngen: Max number of generations
:param mgen: Max number of generations without improvement
:param npop: Population size
:param pcruz: Probability of crossover
:param pmut: Probability of mutation
:param window_size: The length of scrolling window for train/test on dataset
:param train_rate: The train/test split ([0,1])
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
:param parameters: dict with model specific arguments for fit method.
:return: the best genotype
'''
statistics = []
ngen = kwargs.get('ngen',30)
mgen = kwargs.get('mgen', 7)
npop = kwargs.get('npop',20)
pcruz = kwargs.get('pcruz',.5)
pmut = kwargs.get('pmut',.3)
distributed = kwargs.get('distributed', False)
if distributed == 'dispy':
cluster = kwargs.pop('cluster', None)
collect_statistics = kwargs.get('collect_statistics', True)
no_improvement_count = 0
new_population = []
population = initial_population(npop)
last_best = population[0]
best = population[1]
print("Evaluating initial population {}".format(time.time()))
if not distributed:
for individual in population:
ret = evaluate(dataset, individual, **kwargs)
individual['rmse'] = ret['rmse']
elif distributed=='dispy':
jobs = []
for ct, individual in enumerate(population):
job = cluster.submit(dataset, individual, **kwargs)
job.id = ct
jobs.append(job)
for job in jobs:
result = job()
if job.status == dispy.DispyJob.Finished and result is not None:
population[job.id]['rmse'] = result['rmse']
else:
print(job.exception)
print(job.stdout)
for i in range(ngen):
print("GENERATION {} {}".format(i, time.time()))
generation_statistics = {}
# Selection
for j in range(int(npop / 2)):
new_population.append(tournament(population, 'rmse'))
new_population.append(tournament(population, 'rmse'))
# Crossover
new = []
for j in range(int(npop * pcruz)):
new.append(crossover(new_population))
new_population.extend(new)
# Mutation
for ct, individual in enumerate(new_population):
new_population[ct] = mutation(individual, pmut)
# Evaluation
if collect_statistics:
stats = {}
for key in ['rmse']:
stats[key] = []
if not distributed:
for individual in new_population:
ret = evaluate(dataset, individual, **kwargs)
for key in ['rmse']:
individual[key] = ret[key]
if collect_statistics: stats[key].append(ret[key])
elif distributed == 'dispy':
jobs = []
for ct, individual in enumerate(new_population):
job = cluster.submit(dataset, individual, **kwargs)
job.id = ct
jobs.append(job)
for job in jobs:
print('job id {}'.format(job.id))
result = job()
if job.status == dispy.DispyJob.Finished and result is not None:
for key in ['rmse']:
new_population[job.id][key] = result[key]
if collect_statistics: stats[key].append(result[key])
else:
print(job.exception)
print(job.stdout)
if collect_statistics:
mean_stats = {key: np.nanmedian(stats[key]) for key in ['rmse'] }
generation_statistics['population'] = mean_stats
# Elitism
population = elitism(population, new_population)
population = population[:npop]
new_population = []
last_best = best
best = population[0]
if collect_statistics:
generation_statistics['best'] = {key: best[key] for key in ['rmse']}
statistics.append(generation_statistics)
if last_best['rmse'] <= best['rmse']:
no_improvement_count += 1
print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
pmut += .05
else:
no_improvement_count = 0
pcruz = kwargs.get('pcruz', .5)
pmut = kwargs.get('pmut', .3)
print(best)
if no_improvement_count == mgen:
break
return best, statistics
def process_experiment(result, datasetname, conn):
print(result)
#log_result(conn, datasetname, result['individual'])
#persist_statistics(result['statistics'])
return result['individual']
def persist_statistics(statistics):
import json
with open('statistics{}.txt'.format(time.time()), 'w') as file:
file.write(json.dumps(statistics))
def log_result(conn, datasetname, result):
metrics = ['rmse', 'size', 'time']
for metric in metrics:
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
result['order'], result['partitioner'], result['npart'],
result['alpha'], str(result['lags']), metric, result[metric])
print(record)
hUtil.insert_hyperparam(record, conn)
def execute(datasetname, dataset, **kwargs):
conn = hUtil.open_hyperparam_db('hyperparam.db')
experiments = kwargs.get('experiments', 30)
distributed = kwargs.get('distributed', False)
if distributed == 'dispy':
nodes = kwargs.get('nodes', ['127.0.0.1'])
cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
kwargs['cluster'] = cluster
ret = []
for i in np.arange(experiments):
print("Experiment {}".format(i))
start = time.time()
ret, statistics = GeneticAlgorithm(dataset, **kwargs)
end = time.time()
ret['time'] = end - start
experiment = {'individual': ret, 'statistics': statistics}
ret = process_experiment(experiment, datasetname, conn)
if distributed == 'dispy':
dUtil.stop_dispy_cluster(cluster, http_server)
return ret

0
pyFTS/fcm/__init__.py Normal file
View File

18
pyFTS/fcm/common.py Normal file
View File

@ -0,0 +1,18 @@
from pyFTS.fcm import Activations
import numpy as np
class FuzzyCognitiveMap(object):
def __init__(self, **kwargs):
super(FuzzyCognitiveMap, self).__init__()
self.order = kwargs.get('order',1)
self.concepts = kwargs.get('partitioner',None)
self.weights = []
self.activation_function = kwargs.get('func', Activations.sigmoid)
def activate(self, concepts):
dot_products = np.zeros(len(self.concepts))
for k in np.arange(0, self.order):
dot_products += np.dot(np.array(concepts[k]).T, self.weights[k])
return self.activation_function( dot_products )

30
pyFTS/fcm/fts.py Normal file
View File

@ -0,0 +1,30 @@
from pyFTS.common import fts
from pyFTS.models import hofts
from pyFTS.fcm import common
import numpy as np
class FCM_FTS(hofts.HighOrderFTS):
def __init__(self, **kwargs):
super(FCM_FTS, self).__init__(**kwargs)
self.fcm = common.FuzzyCognitiveMap(**kwargs)
def forecast(self, ndata, **kwargs):
ret = []
midpoints = np.array([fset.centroid for fset in self.partitioner])
for t in np.arange(self.order, len(ndata)+1):
sample = ndata[t - self.order : t]
fuzzyfied = self.partitioner.fuzzyfy(sample, mode='vector')
activation = self.fcm.activate(fuzzyfied)
final = np.dot(midpoints, activation)/np.sum(activation)
ret.append(final)
return ret

View File

@ -14,6 +14,39 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
from pyFTS.common import Transformations, Membership from pyFTS.common import Transformations, Membership
from pyFTS.fcm import fts, common, GA
from pyFTS.data import Enrollments, TAIEX
import pandas as pd
df = pd.read_csv('https://query.data.world/s/7zfy4d5uep7wbgf56k4uu5g52dmvap', sep=';')
data = df['glo_avg'].values[:12000]
fs = Grid.GridPartitioner(data=data, npart=35, func=Membership.trimf)
GA.parameters['num_concepts'] = 35
GA.parameters['order'] = 2
GA.parameters['partitioner'] = fs
GA.execute('TAIEX', data)
'''
model = fts.FCM_FTS(partitioner=fs, order=1)
model.fcm.weights = np.array([
[1, 1, 0, -1, -1],
[1, 1, 1, 0, -1],
[0, 1, 1, 1, 0],
[-1, 0, 1, 1, 1],
[-1, -1, 0, 1, 1]
])
print(data)
print(model.forecast(data))
'''
''' '''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
@ -80,7 +113,7 @@ forecasts = model.predict(test_mv, type='multivariate', generators={'data': time
print(forecasts) print(forecasts)
''' '''
'''
from pyFTS.data import lorentz from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000) df = lorentz.get_dataframe(iterations=5000)
@ -104,3 +137,4 @@ model.fit(train)
forecasts = model.predict(test, type='multivariate', steps_ahead=20) forecasts = model.predict(test, type='multivariate', steps_ahead=20)
print(forecasts) print(forecasts)
'''