Fuzzy Cognitive Methods for FTS
This commit is contained in:
parent
ebdcfe228f
commit
27b7cf59f8
18
pyFTS/fcm/Activations.py
Normal file
18
pyFTS/fcm/Activations.py
Normal file
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def step(x):
|
||||
if x <= 0:
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def sigmoid(x):
|
||||
return 1 / (1 + np.exp(-x))
|
||||
|
||||
|
||||
def softmax(x):
|
||||
mvs = sum([np.exp(k) for k in x.flatten()])
|
||||
return np.array([np.exp(k)/mvs for k in x.flatten()])
|
||||
|
425
pyFTS/fcm/GA.py
Normal file
425
pyFTS/fcm/GA.py
Normal file
@ -0,0 +1,425 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import math
|
||||
import time
|
||||
from functools import reduce
|
||||
from operator import itemgetter
|
||||
import dispy
|
||||
|
||||
import random
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.benchmarks import Measures
|
||||
from pyFTS.partitioners import Grid, Entropy # , Huarng
|
||||
from pyFTS.models import hofts
|
||||
from pyFTS.common import Membership
|
||||
from pyFTS.hyperparam import Util as hUtil
|
||||
from pyFTS.distributed import dispy as dUtil
|
||||
|
||||
from pyFTS.fcm import common, fts
|
||||
|
||||
|
||||
parameters = {}
|
||||
|
||||
#
|
||||
def genotype():
|
||||
'''
|
||||
Create the individual genotype
|
||||
|
||||
:param mf: membership function
|
||||
:param npart: number of partitions
|
||||
:param partitioner: partitioner method
|
||||
:param order: model order
|
||||
:param alpha: alpha-cut
|
||||
:param lags: array with lag indexes
|
||||
:param f1: accuracy fitness value
|
||||
:param f2: parsimony fitness value
|
||||
:return: the genotype, a dictionary with all hyperparameters
|
||||
'''
|
||||
num_concepts = parameters['num_concepts']
|
||||
order = parameters['order']
|
||||
ind = dict(weights=[np.random.normal(0,.5,(num_concepts,num_concepts)) for k in range(order)])
|
||||
return ind
|
||||
|
||||
|
||||
def random_genotype():
|
||||
'''
|
||||
Create random genotype
|
||||
|
||||
:return: the genotype, a dictionary with all hyperparameters
|
||||
'''
|
||||
return genotype()
|
||||
|
||||
|
||||
#
|
||||
def initial_population(n):
|
||||
'''
|
||||
Create a random population of size n
|
||||
|
||||
:param n: the size of the population
|
||||
:return: a list with n random individuals
|
||||
'''
|
||||
pop = []
|
||||
for i in range(n):
|
||||
pop.append(random_genotype())
|
||||
return pop
|
||||
|
||||
|
||||
def phenotype(individual, train):
|
||||
'''
|
||||
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
||||
|
||||
:param individual: a genotype
|
||||
:param train: the training dataset
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: a fitted FTS model
|
||||
'''
|
||||
partitioner = parameters['partitioner']
|
||||
order = parameters['order']
|
||||
|
||||
model = fts.FCM_FTS(partitioner=partitioner, order=order)
|
||||
|
||||
model.fcm.weights = individual['weights']
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
||||
def evaluate(dataset, individual, **kwargs):
|
||||
'''
|
||||
Evaluate an individual using a sliding window cross validation over the dataset.
|
||||
|
||||
:param dataset: Evaluation dataset
|
||||
:param individual: genotype to be tested
|
||||
:param window_size: The length of scrolling window for train/test on dataset
|
||||
:param train_rate: The train/test split ([0,1])
|
||||
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
||||
'''
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.benchmarks import Measures
|
||||
from pyFTS.fcm.GA import phenotype
|
||||
import numpy as np
|
||||
|
||||
window_size = kwargs.get('window_size', 800)
|
||||
train_rate = kwargs.get('train_rate', .8)
|
||||
increment_rate = kwargs.get('increment_rate', .2)
|
||||
#parameters = kwargs.get('parameters',{})
|
||||
|
||||
|
||||
errors = []
|
||||
|
||||
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
||||
|
||||
model = phenotype(individual, train)
|
||||
|
||||
if model is None:
|
||||
raise Exception("Phenotype returned None")
|
||||
|
||||
model.uod_clip = False
|
||||
|
||||
forecasts = model.predict(test)
|
||||
|
||||
rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model)
|
||||
|
||||
errors.append(rmse)
|
||||
|
||||
_rmse = np.nanmean(errors)
|
||||
|
||||
#print("EVALUATION {}".format(individual))
|
||||
return {'rmse': _rmse}
|
||||
|
||||
|
||||
|
||||
def tournament(population, objective):
|
||||
'''
|
||||
Simple tournament selection strategy.
|
||||
|
||||
:param population: the population
|
||||
:param objective: the objective to be considered on tournament
|
||||
:return:
|
||||
'''
|
||||
n = len(population) - 1
|
||||
|
||||
r1 = random.randint(0, n) if n > 2 else 0
|
||||
r2 = random.randint(0, n) if n > 2 else 1
|
||||
ix = r1 if population[r1][objective] < population[r2][objective] else r2
|
||||
return population[ix]
|
||||
|
||||
|
||||
|
||||
def crossover(parents):
|
||||
'''
|
||||
Crossover operation between two parents
|
||||
|
||||
:param parents: a list with two genotypes
|
||||
:return: a genotype
|
||||
'''
|
||||
import random
|
||||
|
||||
descendent = genotype()
|
||||
|
||||
for k in range(parameters['order']):
|
||||
new_weight = []
|
||||
weights1 = parents[0]['weights'][k]
|
||||
weights2 = parents[1]['weights'][k]
|
||||
|
||||
for (row, col), a in np.ndenumerate(weights1):
|
||||
new_weight.append(.7*weights1[row, col] + .3*weights2[row, col] )
|
||||
|
||||
descendent['weights'][k] = np.array(new_weight).reshape(weights1.shape)
|
||||
|
||||
return descendent
|
||||
|
||||
|
||||
def mutation(individual, pmut):
|
||||
'''
|
||||
Mutation operator
|
||||
|
||||
:param population:
|
||||
:return:
|
||||
'''
|
||||
import numpy.random
|
||||
|
||||
for k in range(parameters['order']):
|
||||
(rows, cols) = individual['weights'][k].shape
|
||||
|
||||
rnd = random.uniform(0, 1)
|
||||
|
||||
if rnd < pmut:
|
||||
|
||||
num_mutations = random.randint(1, parameters['num_concepts']**2)
|
||||
|
||||
for q in np.arange(0, num_mutations):
|
||||
|
||||
row = random.randint(0, rows-1)
|
||||
col = random.randint(0, cols-1)
|
||||
|
||||
individual['weights'][k][row, col] += np.random.normal(0, .5, 1)
|
||||
individual['weights'][k][row, col] = np.clip(individual['weights'][k][row, col], -1, 1)
|
||||
|
||||
|
||||
return individual
|
||||
|
||||
|
||||
def elitism(population, new_population):
|
||||
'''
|
||||
Elitism operation, always select the best individual of the population and discard the worst
|
||||
|
||||
:param population:
|
||||
:param new_population:
|
||||
:return:
|
||||
'''
|
||||
population = sorted(population, key=itemgetter('rmse'))
|
||||
best = population[0]
|
||||
|
||||
new_population = sorted(new_population, key=itemgetter('rmse'))
|
||||
if new_population[0]["rmse"] > best["rmse"]:
|
||||
new_population.insert(0,best)
|
||||
|
||||
return new_population
|
||||
|
||||
|
||||
def GeneticAlgorithm(dataset, **kwargs):
|
||||
'''
|
||||
Genetic algoritm for hyperparameter optimization
|
||||
|
||||
:param dataset:
|
||||
:param ngen: Max number of generations
|
||||
:param mgen: Max number of generations without improvement
|
||||
:param npop: Population size
|
||||
:param pcruz: Probability of crossover
|
||||
:param pmut: Probability of mutation
|
||||
:param window_size: The length of scrolling window for train/test on dataset
|
||||
:param train_rate: The train/test split ([0,1])
|
||||
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: the best genotype
|
||||
'''
|
||||
|
||||
statistics = []
|
||||
|
||||
ngen = kwargs.get('ngen',30)
|
||||
mgen = kwargs.get('mgen', 7)
|
||||
npop = kwargs.get('npop',20)
|
||||
pcruz = kwargs.get('pcruz',.5)
|
||||
pmut = kwargs.get('pmut',.3)
|
||||
distributed = kwargs.get('distributed', False)
|
||||
|
||||
if distributed == 'dispy':
|
||||
cluster = kwargs.pop('cluster', None)
|
||||
|
||||
collect_statistics = kwargs.get('collect_statistics', True)
|
||||
|
||||
no_improvement_count = 0
|
||||
|
||||
new_population = []
|
||||
|
||||
population = initial_population(npop)
|
||||
|
||||
last_best = population[0]
|
||||
best = population[1]
|
||||
|
||||
print("Evaluating initial population {}".format(time.time()))
|
||||
if not distributed:
|
||||
for individual in population:
|
||||
ret = evaluate(dataset, individual, **kwargs)
|
||||
individual['rmse'] = ret['rmse']
|
||||
elif distributed=='dispy':
|
||||
jobs = []
|
||||
for ct, individual in enumerate(population):
|
||||
job = cluster.submit(dataset, individual, **kwargs)
|
||||
job.id = ct
|
||||
jobs.append(job)
|
||||
for job in jobs:
|
||||
result = job()
|
||||
if job.status == dispy.DispyJob.Finished and result is not None:
|
||||
population[job.id]['rmse'] = result['rmse']
|
||||
else:
|
||||
print(job.exception)
|
||||
print(job.stdout)
|
||||
|
||||
for i in range(ngen):
|
||||
print("GENERATION {} {}".format(i, time.time()))
|
||||
|
||||
generation_statistics = {}
|
||||
|
||||
# Selection
|
||||
for j in range(int(npop / 2)):
|
||||
new_population.append(tournament(population, 'rmse'))
|
||||
new_population.append(tournament(population, 'rmse'))
|
||||
|
||||
# Crossover
|
||||
new = []
|
||||
for j in range(int(npop * pcruz)):
|
||||
new.append(crossover(new_population))
|
||||
new_population.extend(new)
|
||||
|
||||
# Mutation
|
||||
for ct, individual in enumerate(new_population):
|
||||
new_population[ct] = mutation(individual, pmut)
|
||||
|
||||
# Evaluation
|
||||
if collect_statistics:
|
||||
stats = {}
|
||||
for key in ['rmse']:
|
||||
stats[key] = []
|
||||
|
||||
if not distributed:
|
||||
for individual in new_population:
|
||||
ret = evaluate(dataset, individual, **kwargs)
|
||||
for key in ['rmse']:
|
||||
individual[key] = ret[key]
|
||||
if collect_statistics: stats[key].append(ret[key])
|
||||
|
||||
elif distributed == 'dispy':
|
||||
jobs = []
|
||||
|
||||
for ct, individual in enumerate(new_population):
|
||||
job = cluster.submit(dataset, individual, **kwargs)
|
||||
job.id = ct
|
||||
jobs.append(job)
|
||||
for job in jobs:
|
||||
print('job id {}'.format(job.id))
|
||||
result = job()
|
||||
if job.status == dispy.DispyJob.Finished and result is not None:
|
||||
for key in ['rmse']:
|
||||
new_population[job.id][key] = result[key]
|
||||
if collect_statistics: stats[key].append(result[key])
|
||||
else:
|
||||
print(job.exception)
|
||||
print(job.stdout)
|
||||
|
||||
|
||||
if collect_statistics:
|
||||
mean_stats = {key: np.nanmedian(stats[key]) for key in ['rmse'] }
|
||||
|
||||
generation_statistics['population'] = mean_stats
|
||||
|
||||
# Elitism
|
||||
population = elitism(population, new_population)
|
||||
|
||||
population = population[:npop]
|
||||
|
||||
new_population = []
|
||||
|
||||
last_best = best
|
||||
|
||||
best = population[0]
|
||||
|
||||
if collect_statistics:
|
||||
generation_statistics['best'] = {key: best[key] for key in ['rmse']}
|
||||
|
||||
statistics.append(generation_statistics)
|
||||
|
||||
if last_best['rmse'] <= best['rmse']:
|
||||
no_improvement_count += 1
|
||||
print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
|
||||
pmut += .05
|
||||
else:
|
||||
no_improvement_count = 0
|
||||
pcruz = kwargs.get('pcruz', .5)
|
||||
pmut = kwargs.get('pmut', .3)
|
||||
print(best)
|
||||
|
||||
if no_improvement_count == mgen:
|
||||
break
|
||||
|
||||
|
||||
return best, statistics
|
||||
|
||||
|
||||
def process_experiment(result, datasetname, conn):
|
||||
print(result)
|
||||
#log_result(conn, datasetname, result['individual'])
|
||||
#persist_statistics(result['statistics'])
|
||||
return result['individual']
|
||||
|
||||
|
||||
def persist_statistics(statistics):
|
||||
import json
|
||||
with open('statistics{}.txt'.format(time.time()), 'w') as file:
|
||||
file.write(json.dumps(statistics))
|
||||
|
||||
|
||||
def log_result(conn, datasetname, result):
|
||||
metrics = ['rmse', 'size', 'time']
|
||||
for metric in metrics:
|
||||
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
||||
result['order'], result['partitioner'], result['npart'],
|
||||
result['alpha'], str(result['lags']), metric, result[metric])
|
||||
|
||||
print(record)
|
||||
|
||||
hUtil.insert_hyperparam(record, conn)
|
||||
|
||||
|
||||
def execute(datasetname, dataset, **kwargs):
|
||||
conn = hUtil.open_hyperparam_db('hyperparam.db')
|
||||
|
||||
experiments = kwargs.get('experiments', 30)
|
||||
|
||||
distributed = kwargs.get('distributed', False)
|
||||
|
||||
if distributed == 'dispy':
|
||||
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||
cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
|
||||
kwargs['cluster'] = cluster
|
||||
|
||||
ret = []
|
||||
for i in np.arange(experiments):
|
||||
print("Experiment {}".format(i))
|
||||
|
||||
start = time.time()
|
||||
ret, statistics = GeneticAlgorithm(dataset, **kwargs)
|
||||
end = time.time()
|
||||
ret['time'] = end - start
|
||||
experiment = {'individual': ret, 'statistics': statistics}
|
||||
|
||||
ret = process_experiment(experiment, datasetname, conn)
|
||||
|
||||
if distributed == 'dispy':
|
||||
dUtil.stop_dispy_cluster(cluster, http_server)
|
||||
|
||||
return ret
|
||||
|
0
pyFTS/fcm/__init__.py
Normal file
0
pyFTS/fcm/__init__.py
Normal file
18
pyFTS/fcm/common.py
Normal file
18
pyFTS/fcm/common.py
Normal file
@ -0,0 +1,18 @@
|
||||
from pyFTS.fcm import Activations
|
||||
import numpy as np
|
||||
|
||||
|
||||
class FuzzyCognitiveMap(object):
|
||||
def __init__(self, **kwargs):
|
||||
super(FuzzyCognitiveMap, self).__init__()
|
||||
self.order = kwargs.get('order',1)
|
||||
self.concepts = kwargs.get('partitioner',None)
|
||||
self.weights = []
|
||||
self.activation_function = kwargs.get('func', Activations.sigmoid)
|
||||
|
||||
def activate(self, concepts):
|
||||
dot_products = np.zeros(len(self.concepts))
|
||||
for k in np.arange(0, self.order):
|
||||
dot_products += np.dot(np.array(concepts[k]).T, self.weights[k])
|
||||
return self.activation_function( dot_products )
|
||||
|
30
pyFTS/fcm/fts.py
Normal file
30
pyFTS/fcm/fts.py
Normal file
@ -0,0 +1,30 @@
|
||||
from pyFTS.common import fts
|
||||
from pyFTS.models import hofts
|
||||
from pyFTS.fcm import common
|
||||
import numpy as np
|
||||
|
||||
|
||||
class FCM_FTS(hofts.HighOrderFTS):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(FCM_FTS, self).__init__(**kwargs)
|
||||
self.fcm = common.FuzzyCognitiveMap(**kwargs)
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
ret = []
|
||||
|
||||
midpoints = np.array([fset.centroid for fset in self.partitioner])
|
||||
|
||||
for t in np.arange(self.order, len(ndata)+1):
|
||||
|
||||
sample = ndata[t - self.order : t]
|
||||
|
||||
fuzzyfied = self.partitioner.fuzzyfy(sample, mode='vector')
|
||||
|
||||
activation = self.fcm.activate(fuzzyfied)
|
||||
|
||||
final = np.dot(midpoints, activation)/np.sum(activation)
|
||||
|
||||
ret.append(final)
|
||||
|
||||
return ret
|
@ -14,6 +14,39 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
|
||||
from pyFTS.common import Transformations, Membership
|
||||
|
||||
from pyFTS.fcm import fts, common, GA
|
||||
|
||||
from pyFTS.data import Enrollments, TAIEX
|
||||
|
||||
import pandas as pd
|
||||
df = pd.read_csv('https://query.data.world/s/7zfy4d5uep7wbgf56k4uu5g52dmvap', sep=';')
|
||||
|
||||
data = df['glo_avg'].values[:12000]
|
||||
|
||||
fs = Grid.GridPartitioner(data=data, npart=35, func=Membership.trimf)
|
||||
|
||||
|
||||
GA.parameters['num_concepts'] = 35
|
||||
GA.parameters['order'] = 2
|
||||
GA.parameters['partitioner'] = fs
|
||||
|
||||
GA.execute('TAIEX', data)
|
||||
|
||||
|
||||
'''
|
||||
model = fts.FCM_FTS(partitioner=fs, order=1)
|
||||
|
||||
model.fcm.weights = np.array([
|
||||
[1, 1, 0, -1, -1],
|
||||
[1, 1, 1, 0, -1],
|
||||
[0, 1, 1, 1, 0],
|
||||
[-1, 0, 1, 1, 1],
|
||||
[-1, -1, 0, 1, 1]
|
||||
])
|
||||
|
||||
print(data)
|
||||
print(model.forecast(data))
|
||||
'''
|
||||
'''
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
|
||||
@ -80,7 +113,7 @@ forecasts = model.predict(test_mv, type='multivariate', generators={'data': time
|
||||
print(forecasts)
|
||||
|
||||
'''
|
||||
|
||||
'''
|
||||
from pyFTS.data import lorentz
|
||||
df = lorentz.get_dataframe(iterations=5000)
|
||||
|
||||
@ -104,3 +137,4 @@ model.fit(train)
|
||||
forecasts = model.predict(test, type='multivariate', steps_ahead=20)
|
||||
|
||||
print(forecasts)
|
||||
'''
|
Loading…
Reference in New Issue
Block a user