From 27b7cf59f86723e0ce7af72835742fb94d031453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Fri, 26 Apr 2019 17:28:43 -0300 Subject: [PATCH] Fuzzy Cognitive Methods for FTS --- pyFTS/fcm/Activations.py | 18 ++ pyFTS/fcm/GA.py | 425 +++++++++++++++++++++++++++++++++++++++ pyFTS/fcm/__init__.py | 0 pyFTS/fcm/common.py | 18 ++ pyFTS/fcm/fts.py | 30 +++ pyFTS/tests/general.py | 36 +++- 6 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 pyFTS/fcm/Activations.py create mode 100644 pyFTS/fcm/GA.py create mode 100644 pyFTS/fcm/__init__.py create mode 100644 pyFTS/fcm/common.py create mode 100644 pyFTS/fcm/fts.py diff --git a/pyFTS/fcm/Activations.py b/pyFTS/fcm/Activations.py new file mode 100644 index 0000000..b6b3722 --- /dev/null +++ b/pyFTS/fcm/Activations.py @@ -0,0 +1,18 @@ +import numpy as np + + +def step(x): + if x <= 0: + return 0 + else: + return 1 + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def softmax(x): + mvs = sum([np.exp(k) for k in x.flatten()]) + return np.array([np.exp(k)/mvs for k in x.flatten()]) + diff --git a/pyFTS/fcm/GA.py b/pyFTS/fcm/GA.py new file mode 100644 index 0000000..c40981f --- /dev/null +++ b/pyFTS/fcm/GA.py @@ -0,0 +1,425 @@ +import numpy as np +import pandas as pd +import math +import time +from functools import reduce +from operator import itemgetter +import dispy + +import random +from pyFTS.common import Util +from pyFTS.benchmarks import Measures +from pyFTS.partitioners import Grid, Entropy # , Huarng +from pyFTS.models import hofts +from pyFTS.common import Membership +from pyFTS.hyperparam import Util as hUtil +from pyFTS.distributed import dispy as dUtil + +from pyFTS.fcm import common, fts + + +parameters = {} + +# +def genotype(): + ''' + Create the individual genotype + + :param mf: membership function + :param npart: number of partitions + :param partitioner: partitioner method + :param order: model order + :param alpha: alpha-cut + :param lags: array with lag indexes + :param f1: accuracy fitness value + :param f2: parsimony fitness value + :return: the genotype, a dictionary with all hyperparameters + ''' + num_concepts = parameters['num_concepts'] + order = parameters['order'] + ind = dict(weights=[np.random.normal(0,.5,(num_concepts,num_concepts)) for k in range(order)]) + return ind + + +def random_genotype(): + ''' + Create random genotype + + :return: the genotype, a dictionary with all hyperparameters + ''' + return genotype() + + +# +def initial_population(n): + ''' + Create a random population of size n + + :param n: the size of the population + :return: a list with n random individuals + ''' + pop = [] + for i in range(n): + pop.append(random_genotype()) + return pop + + +def phenotype(individual, train): + ''' + Instantiate the genotype, creating a fitted model with the genotype hyperparameters + + :param individual: a genotype + :param train: the training dataset + :param parameters: dict with model specific arguments for fit method. + :return: a fitted FTS model + ''' + partitioner = parameters['partitioner'] + order = parameters['order'] + + model = fts.FCM_FTS(partitioner=partitioner, order=order) + + model.fcm.weights = individual['weights'] + + return model + + + +def evaluate(dataset, individual, **kwargs): + ''' + Evaluate an individual using a sliding window cross validation over the dataset. + + :param dataset: Evaluation dataset + :param individual: genotype to be tested + :param window_size: The length of scrolling window for train/test on dataset + :param train_rate: The train/test split ([0,1]) + :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) + :param parameters: dict with model specific arguments for fit method. + :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value + ''' + from pyFTS.common import Util + from pyFTS.benchmarks import Measures + from pyFTS.fcm.GA import phenotype + import numpy as np + + window_size = kwargs.get('window_size', 800) + train_rate = kwargs.get('train_rate', .8) + increment_rate = kwargs.get('increment_rate', .2) + #parameters = kwargs.get('parameters',{}) + + + errors = [] + + for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): + + model = phenotype(individual, train) + + if model is None: + raise Exception("Phenotype returned None") + + model.uod_clip = False + + forecasts = model.predict(test) + + rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model) + + errors.append(rmse) + + _rmse = np.nanmean(errors) + + #print("EVALUATION {}".format(individual)) + return {'rmse': _rmse} + + + +def tournament(population, objective): + ''' + Simple tournament selection strategy. + + :param population: the population + :param objective: the objective to be considered on tournament + :return: + ''' + n = len(population) - 1 + + r1 = random.randint(0, n) if n > 2 else 0 + r2 = random.randint(0, n) if n > 2 else 1 + ix = r1 if population[r1][objective] < population[r2][objective] else r2 + return population[ix] + + + +def crossover(parents): + ''' + Crossover operation between two parents + + :param parents: a list with two genotypes + :return: a genotype + ''' + import random + + descendent = genotype() + + for k in range(parameters['order']): + new_weight = [] + weights1 = parents[0]['weights'][k] + weights2 = parents[1]['weights'][k] + + for (row, col), a in np.ndenumerate(weights1): + new_weight.append(.7*weights1[row, col] + .3*weights2[row, col] ) + + descendent['weights'][k] = np.array(new_weight).reshape(weights1.shape) + + return descendent + + +def mutation(individual, pmut): + ''' + Mutation operator + + :param population: + :return: + ''' + import numpy.random + + for k in range(parameters['order']): + (rows, cols) = individual['weights'][k].shape + + rnd = random.uniform(0, 1) + + if rnd < pmut: + + num_mutations = random.randint(1, parameters['num_concepts']**2) + + for q in np.arange(0, num_mutations): + + row = random.randint(0, rows-1) + col = random.randint(0, cols-1) + + individual['weights'][k][row, col] += np.random.normal(0, .5, 1) + individual['weights'][k][row, col] = np.clip(individual['weights'][k][row, col], -1, 1) + + + return individual + + +def elitism(population, new_population): + ''' + Elitism operation, always select the best individual of the population and discard the worst + + :param population: + :param new_population: + :return: + ''' + population = sorted(population, key=itemgetter('rmse')) + best = population[0] + + new_population = sorted(new_population, key=itemgetter('rmse')) + if new_population[0]["rmse"] > best["rmse"]: + new_population.insert(0,best) + + return new_population + + +def GeneticAlgorithm(dataset, **kwargs): + ''' + Genetic algoritm for hyperparameter optimization + + :param dataset: + :param ngen: Max number of generations + :param mgen: Max number of generations without improvement + :param npop: Population size + :param pcruz: Probability of crossover + :param pmut: Probability of mutation + :param window_size: The length of scrolling window for train/test on dataset + :param train_rate: The train/test split ([0,1]) + :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) + :param parameters: dict with model specific arguments for fit method. + :return: the best genotype + ''' + + statistics = [] + + ngen = kwargs.get('ngen',30) + mgen = kwargs.get('mgen', 7) + npop = kwargs.get('npop',20) + pcruz = kwargs.get('pcruz',.5) + pmut = kwargs.get('pmut',.3) + distributed = kwargs.get('distributed', False) + + if distributed == 'dispy': + cluster = kwargs.pop('cluster', None) + + collect_statistics = kwargs.get('collect_statistics', True) + + no_improvement_count = 0 + + new_population = [] + + population = initial_population(npop) + + last_best = population[0] + best = population[1] + + print("Evaluating initial population {}".format(time.time())) + if not distributed: + for individual in population: + ret = evaluate(dataset, individual, **kwargs) + individual['rmse'] = ret['rmse'] + elif distributed=='dispy': + jobs = [] + for ct, individual in enumerate(population): + job = cluster.submit(dataset, individual, **kwargs) + job.id = ct + jobs.append(job) + for job in jobs: + result = job() + if job.status == dispy.DispyJob.Finished and result is not None: + population[job.id]['rmse'] = result['rmse'] + else: + print(job.exception) + print(job.stdout) + + for i in range(ngen): + print("GENERATION {} {}".format(i, time.time())) + + generation_statistics = {} + + # Selection + for j in range(int(npop / 2)): + new_population.append(tournament(population, 'rmse')) + new_population.append(tournament(population, 'rmse')) + + # Crossover + new = [] + for j in range(int(npop * pcruz)): + new.append(crossover(new_population)) + new_population.extend(new) + + # Mutation + for ct, individual in enumerate(new_population): + new_population[ct] = mutation(individual, pmut) + + # Evaluation + if collect_statistics: + stats = {} + for key in ['rmse']: + stats[key] = [] + + if not distributed: + for individual in new_population: + ret = evaluate(dataset, individual, **kwargs) + for key in ['rmse']: + individual[key] = ret[key] + if collect_statistics: stats[key].append(ret[key]) + + elif distributed == 'dispy': + jobs = [] + + for ct, individual in enumerate(new_population): + job = cluster.submit(dataset, individual, **kwargs) + job.id = ct + jobs.append(job) + for job in jobs: + print('job id {}'.format(job.id)) + result = job() + if job.status == dispy.DispyJob.Finished and result is not None: + for key in ['rmse']: + new_population[job.id][key] = result[key] + if collect_statistics: stats[key].append(result[key]) + else: + print(job.exception) + print(job.stdout) + + + if collect_statistics: + mean_stats = {key: np.nanmedian(stats[key]) for key in ['rmse'] } + + generation_statistics['population'] = mean_stats + + # Elitism + population = elitism(population, new_population) + + population = population[:npop] + + new_population = [] + + last_best = best + + best = population[0] + + if collect_statistics: + generation_statistics['best'] = {key: best[key] for key in ['rmse']} + + statistics.append(generation_statistics) + + if last_best['rmse'] <= best['rmse']: + no_improvement_count += 1 + print("WITHOUT IMPROVEMENT {}".format(no_improvement_count)) + pmut += .05 + else: + no_improvement_count = 0 + pcruz = kwargs.get('pcruz', .5) + pmut = kwargs.get('pmut', .3) + print(best) + + if no_improvement_count == mgen: + break + + + return best, statistics + + +def process_experiment(result, datasetname, conn): + print(result) + #log_result(conn, datasetname, result['individual']) + #persist_statistics(result['statistics']) + return result['individual'] + + +def persist_statistics(statistics): + import json + with open('statistics{}.txt'.format(time.time()), 'w') as file: + file.write(json.dumps(statistics)) + + +def log_result(conn, datasetname, result): + metrics = ['rmse', 'size', 'time'] + for metric in metrics: + record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'], + result['order'], result['partitioner'], result['npart'], + result['alpha'], str(result['lags']), metric, result[metric]) + + print(record) + + hUtil.insert_hyperparam(record, conn) + + +def execute(datasetname, dataset, **kwargs): + conn = hUtil.open_hyperparam_db('hyperparam.db') + + experiments = kwargs.get('experiments', 30) + + distributed = kwargs.get('distributed', False) + + if distributed == 'dispy': + nodes = kwargs.get('nodes', ['127.0.0.1']) + cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes) + kwargs['cluster'] = cluster + + ret = [] + for i in np.arange(experiments): + print("Experiment {}".format(i)) + + start = time.time() + ret, statistics = GeneticAlgorithm(dataset, **kwargs) + end = time.time() + ret['time'] = end - start + experiment = {'individual': ret, 'statistics': statistics} + + ret = process_experiment(experiment, datasetname, conn) + + if distributed == 'dispy': + dUtil.stop_dispy_cluster(cluster, http_server) + + return ret + diff --git a/pyFTS/fcm/__init__.py b/pyFTS/fcm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyFTS/fcm/common.py b/pyFTS/fcm/common.py new file mode 100644 index 0000000..02db2de --- /dev/null +++ b/pyFTS/fcm/common.py @@ -0,0 +1,18 @@ +from pyFTS.fcm import Activations +import numpy as np + + +class FuzzyCognitiveMap(object): + def __init__(self, **kwargs): + super(FuzzyCognitiveMap, self).__init__() + self.order = kwargs.get('order',1) + self.concepts = kwargs.get('partitioner',None) + self.weights = [] + self.activation_function = kwargs.get('func', Activations.sigmoid) + + def activate(self, concepts): + dot_products = np.zeros(len(self.concepts)) + for k in np.arange(0, self.order): + dot_products += np.dot(np.array(concepts[k]).T, self.weights[k]) + return self.activation_function( dot_products ) + diff --git a/pyFTS/fcm/fts.py b/pyFTS/fcm/fts.py new file mode 100644 index 0000000..4d31995 --- /dev/null +++ b/pyFTS/fcm/fts.py @@ -0,0 +1,30 @@ +from pyFTS.common import fts +from pyFTS.models import hofts +from pyFTS.fcm import common +import numpy as np + + +class FCM_FTS(hofts.HighOrderFTS): + + def __init__(self, **kwargs): + super(FCM_FTS, self).__init__(**kwargs) + self.fcm = common.FuzzyCognitiveMap(**kwargs) + + def forecast(self, ndata, **kwargs): + ret = [] + + midpoints = np.array([fset.centroid for fset in self.partitioner]) + + for t in np.arange(self.order, len(ndata)+1): + + sample = ndata[t - self.order : t] + + fuzzyfied = self.partitioner.fuzzyfy(sample, mode='vector') + + activation = self.fcm.activate(fuzzyfied) + + final = np.dot(midpoints, activation)/np.sum(activation) + + ret.append(final) + + return ret diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 3618a21..7f4ef73 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -14,6 +14,39 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei from pyFTS.common import Transformations, Membership +from pyFTS.fcm import fts, common, GA + +from pyFTS.data import Enrollments, TAIEX + +import pandas as pd +df = pd.read_csv('https://query.data.world/s/7zfy4d5uep7wbgf56k4uu5g52dmvap', sep=';') + +data = df['glo_avg'].values[:12000] + +fs = Grid.GridPartitioner(data=data, npart=35, func=Membership.trimf) + + +GA.parameters['num_concepts'] = 35 +GA.parameters['order'] = 2 +GA.parameters['partitioner'] = fs + +GA.execute('TAIEX', data) + + +''' +model = fts.FCM_FTS(partitioner=fs, order=1) + +model.fcm.weights = np.array([ + [1, 1, 0, -1, -1], + [1, 1, 1, 0, -1], + [0, 1, 1, 1, 0], + [-1, 0, 1, 1, 1], + [-1, -1, 0, 1, 1] +]) + +print(data) +print(model.forecast(data)) +''' ''' dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') @@ -80,7 +113,7 @@ forecasts = model.predict(test_mv, type='multivariate', generators={'data': time print(forecasts) ''' - +''' from pyFTS.data import lorentz df = lorentz.get_dataframe(iterations=5000) @@ -104,3 +137,4 @@ model.fit(train) forecasts = model.predict(test, type='multivariate', steps_ahead=20) print(forecasts) +''' \ No newline at end of file