From 27b7cf59f86723e0ce7af72835742fb94d031453 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= <petronio.candido@gmail.com>
Date: Fri, 26 Apr 2019 17:28:43 -0300
Subject: [PATCH] Fuzzy Cognitive Methods for FTS

---
 pyFTS/fcm/Activations.py |  18 ++
 pyFTS/fcm/GA.py          | 425 +++++++++++++++++++++++++++++++++++++++
 pyFTS/fcm/__init__.py    |   0
 pyFTS/fcm/common.py      |  18 ++
 pyFTS/fcm/fts.py         |  30 +++
 pyFTS/tests/general.py   |  36 +++-
 6 files changed, 526 insertions(+), 1 deletion(-)
 create mode 100644 pyFTS/fcm/Activations.py
 create mode 100644 pyFTS/fcm/GA.py
 create mode 100644 pyFTS/fcm/__init__.py
 create mode 100644 pyFTS/fcm/common.py
 create mode 100644 pyFTS/fcm/fts.py

diff --git a/pyFTS/fcm/Activations.py b/pyFTS/fcm/Activations.py
new file mode 100644
index 0000000..b6b3722
--- /dev/null
+++ b/pyFTS/fcm/Activations.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+
+def step(x):
+    if x <= 0:
+        return 0
+    else:
+        return 1
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def softmax(x):
+  mvs = sum([np.exp(k) for k in x.flatten()])
+  return np.array([np.exp(k)/mvs for k in x.flatten()])
+
diff --git a/pyFTS/fcm/GA.py b/pyFTS/fcm/GA.py
new file mode 100644
index 0000000..c40981f
--- /dev/null
+++ b/pyFTS/fcm/GA.py
@@ -0,0 +1,425 @@
+import numpy as np
+import pandas as pd
+import math
+import time
+from functools import reduce
+from operator import itemgetter
+import dispy
+
+import random
+from pyFTS.common import Util
+from pyFTS.benchmarks import Measures
+from pyFTS.partitioners import Grid, Entropy  # , Huarng
+from pyFTS.models import hofts
+from pyFTS.common import Membership
+from pyFTS.hyperparam import Util as hUtil
+from pyFTS.distributed import dispy as dUtil
+
+from pyFTS.fcm import common, fts
+
+
+parameters = {}
+
+#
+def genotype():
+    '''
+    Create the individual genotype
+
+    :param mf: membership function
+    :param npart: number of partitions
+    :param partitioner: partitioner method
+    :param order: model order
+    :param alpha: alpha-cut
+    :param lags: array with lag indexes
+    :param f1: accuracy fitness value
+    :param f2: parsimony fitness value
+    :return: the genotype, a dictionary with all hyperparameters
+    '''
+    num_concepts = parameters['num_concepts']
+    order = parameters['order']
+    ind = dict(weights=[np.random.normal(0,.5,(num_concepts,num_concepts)) for k in range(order)])
+    return ind
+
+
+def random_genotype():
+    '''
+    Create random genotype
+
+    :return: the genotype, a dictionary with all hyperparameters
+    '''
+    return genotype()
+
+
+#
+def initial_population(n):
+    '''
+    Create a random population of size n
+
+    :param n: the size of the population
+    :return: a list with n random individuals
+    '''
+    pop = []
+    for i in range(n):
+        pop.append(random_genotype())
+    return pop
+
+
+def phenotype(individual, train):
+    '''
+    Instantiate the genotype, creating a fitted model with the genotype hyperparameters
+
+    :param individual: a genotype
+    :param train: the training dataset
+    :param parameters: dict with model specific arguments for fit method.
+    :return: a fitted FTS model
+    '''
+    partitioner = parameters['partitioner']
+    order = parameters['order']
+
+    model = fts.FCM_FTS(partitioner=partitioner, order=order)
+
+    model.fcm.weights = individual['weights']
+
+    return model
+
+
+
+def evaluate(dataset, individual, **kwargs):
+    '''
+    Evaluate an individual using a sliding window cross validation over the dataset.
+
+    :param dataset: Evaluation dataset
+    :param individual: genotype to be tested
+    :param window_size: The length of scrolling window for train/test on dataset
+    :param train_rate: The train/test split ([0,1])
+    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
+    :param parameters: dict with model specific arguments for fit method.
+    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
+    '''
+    from pyFTS.common import Util
+    from pyFTS.benchmarks import Measures
+    from pyFTS.fcm.GA import phenotype
+    import numpy as np
+
+    window_size = kwargs.get('window_size', 800)
+    train_rate = kwargs.get('train_rate', .8)
+    increment_rate = kwargs.get('increment_rate', .2)
+    #parameters = kwargs.get('parameters',{})
+
+
+    errors = []
+
+    for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
+
+        model = phenotype(individual, train)
+
+        if model is None:
+            raise Exception("Phenotype returned None")
+
+        model.uod_clip = False
+
+        forecasts = model.predict(test)
+
+        rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model)
+
+        errors.append(rmse)
+
+    _rmse = np.nanmean(errors)
+
+    #print("EVALUATION {}".format(individual))
+    return {'rmse': _rmse}
+
+
+
+def tournament(population, objective):
+    '''
+    Simple tournament selection strategy.
+
+    :param population: the population
+    :param objective: the objective to be considered on tournament
+    :return:
+    '''
+    n = len(population) - 1
+
+    r1 = random.randint(0, n) if n > 2 else 0
+    r2 = random.randint(0, n) if n > 2 else 1
+    ix = r1 if population[r1][objective] < population[r2][objective] else r2
+    return population[ix]
+
+
+
+def crossover(parents):
+    '''
+    Crossover operation between two parents
+
+    :param parents: a list with two genotypes
+    :return: a genotype
+    '''
+    import random
+
+    descendent = genotype()
+
+    for k in range(parameters['order']):
+        new_weight = []
+        weights1 = parents[0]['weights'][k]
+        weights2 = parents[1]['weights'][k]
+
+        for (row, col), a in np.ndenumerate(weights1):
+            new_weight.append(.7*weights1[row, col] + .3*weights2[row, col]  )
+
+        descendent['weights'][k] = np.array(new_weight).reshape(weights1.shape)
+
+    return descendent
+
+
+def mutation(individual, pmut):
+    '''
+    Mutation operator
+
+    :param population:
+    :return:
+    '''
+    import numpy.random
+
+    for k in range(parameters['order']):
+        (rows, cols) = individual['weights'][k].shape
+
+        rnd = random.uniform(0, 1)
+
+        if rnd < pmut:
+
+            num_mutations = random.randint(1, parameters['num_concepts']**2)
+
+            for q in np.arange(0, num_mutations):
+
+                row = random.randint(0, rows-1)
+                col = random.randint(0, cols-1)
+
+                individual['weights'][k][row, col] += np.random.normal(0, .5, 1)
+                individual['weights'][k][row, col] = np.clip(individual['weights'][k][row, col], -1, 1)
+
+
+    return individual
+
+
+def elitism(population, new_population):
+    '''
+    Elitism operation, always select the best individual of the population and discard the worst
+
+    :param population:
+    :param new_population:
+    :return:
+    '''
+    population = sorted(population, key=itemgetter('rmse'))
+    best = population[0]
+
+    new_population = sorted(new_population, key=itemgetter('rmse'))
+    if new_population[0]["rmse"] > best["rmse"]:
+        new_population.insert(0,best)
+
+    return new_population
+
+
+def GeneticAlgorithm(dataset, **kwargs):
+    '''
+    Genetic algoritm for hyperparameter optimization
+
+    :param dataset:
+    :param ngen: Max number of generations
+    :param mgen: Max number of generations without improvement
+    :param npop: Population size
+    :param pcruz: Probability of crossover
+    :param pmut: Probability of mutation
+    :param window_size: The length of scrolling window for train/test on dataset
+    :param train_rate: The train/test split ([0,1])
+    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
+    :param parameters: dict with model specific arguments for fit method.
+    :return: the best genotype
+    '''
+
+    statistics = []
+
+    ngen = kwargs.get('ngen',30)
+    mgen = kwargs.get('mgen', 7)
+    npop = kwargs.get('npop',20)
+    pcruz = kwargs.get('pcruz',.5)
+    pmut = kwargs.get('pmut',.3)
+    distributed = kwargs.get('distributed', False)
+
+    if distributed == 'dispy':
+        cluster = kwargs.pop('cluster', None)
+
+    collect_statistics = kwargs.get('collect_statistics', True)
+
+    no_improvement_count = 0
+
+    new_population = []
+
+    population = initial_population(npop)
+
+    last_best = population[0]
+    best = population[1]
+
+    print("Evaluating initial population {}".format(time.time()))
+    if not distributed:
+        for individual in population:
+            ret = evaluate(dataset, individual, **kwargs)
+            individual['rmse'] = ret['rmse']
+    elif distributed=='dispy':
+        jobs = []
+        for ct, individual in enumerate(population):
+            job = cluster.submit(dataset, individual, **kwargs)
+            job.id = ct
+            jobs.append(job)
+        for job in jobs:
+            result = job()
+            if job.status == dispy.DispyJob.Finished and result is not None:
+                population[job.id]['rmse'] = result['rmse']
+            else:
+                print(job.exception)
+                print(job.stdout)
+
+    for i in range(ngen):
+        print("GENERATION {} {}".format(i, time.time()))
+
+        generation_statistics = {}
+
+        # Selection
+        for j in range(int(npop / 2)):
+            new_population.append(tournament(population, 'rmse'))
+            new_population.append(tournament(population, 'rmse'))
+
+        # Crossover
+        new = []
+        for j in range(int(npop * pcruz)):
+            new.append(crossover(new_population))
+        new_population.extend(new)
+
+        # Mutation
+        for ct, individual in enumerate(new_population):
+            new_population[ct] = mutation(individual, pmut)
+
+        # Evaluation
+        if collect_statistics:
+            stats = {}
+            for key in ['rmse']:
+                stats[key] = []
+
+        if not distributed:
+            for individual in new_population:
+                ret = evaluate(dataset, individual, **kwargs)
+                for key in ['rmse']:
+                    individual[key] = ret[key]
+                    if collect_statistics: stats[key].append(ret[key])
+
+        elif distributed == 'dispy':
+            jobs = []
+
+            for ct, individual in enumerate(new_population):
+                job = cluster.submit(dataset, individual, **kwargs)
+                job.id = ct
+                jobs.append(job)
+            for job in jobs:
+                print('job id {}'.format(job.id))
+                result = job()
+                if job.status == dispy.DispyJob.Finished and result is not None:
+                    for key in ['rmse']:
+                        new_population[job.id][key] = result[key]
+                        if collect_statistics: stats[key].append(result[key])
+                else:
+                    print(job.exception)
+                    print(job.stdout)
+
+
+        if collect_statistics:
+            mean_stats = {key: np.nanmedian(stats[key]) for key in ['rmse'] }
+
+            generation_statistics['population'] = mean_stats
+
+        # Elitism
+        population = elitism(population, new_population)
+
+        population = population[:npop]
+
+        new_population = []
+
+        last_best = best
+
+        best = population[0]
+
+        if collect_statistics:
+            generation_statistics['best'] = {key: best[key] for key in ['rmse']}
+
+            statistics.append(generation_statistics)
+
+        if last_best['rmse'] <= best['rmse']:
+            no_improvement_count += 1
+            print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
+            pmut += .05
+        else:
+            no_improvement_count = 0
+            pcruz = kwargs.get('pcruz', .5)
+            pmut = kwargs.get('pmut', .3)
+            print(best)
+
+        if no_improvement_count == mgen:
+            break
+
+
+    return best, statistics
+
+
+def process_experiment(result, datasetname, conn):
+    print(result)
+    #log_result(conn, datasetname, result['individual'])
+    #persist_statistics(result['statistics'])
+    return result['individual']
+
+
+def persist_statistics(statistics):
+    import json
+    with open('statistics{}.txt'.format(time.time()), 'w') as file:
+        file.write(json.dumps(statistics))
+
+
+def log_result(conn, datasetname, result):
+    metrics = ['rmse', 'size', 'time']
+    for metric in metrics:
+        record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
+                  result['order'], result['partitioner'], result['npart'],
+                  result['alpha'], str(result['lags']), metric, result[metric])
+
+        print(record)
+
+        hUtil.insert_hyperparam(record, conn)
+
+
+def execute(datasetname, dataset, **kwargs):
+    conn = hUtil.open_hyperparam_db('hyperparam.db')
+
+    experiments = kwargs.get('experiments', 30)
+
+    distributed = kwargs.get('distributed', False)
+
+    if distributed == 'dispy':
+        nodes = kwargs.get('nodes', ['127.0.0.1'])
+        cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
+        kwargs['cluster'] = cluster
+
+    ret = []
+    for i in np.arange(experiments):
+        print("Experiment {}".format(i))
+
+        start = time.time()
+        ret, statistics = GeneticAlgorithm(dataset, **kwargs)
+        end = time.time()
+        ret['time'] = end - start
+        experiment = {'individual': ret, 'statistics': statistics}
+
+        ret = process_experiment(experiment, datasetname, conn)
+
+    if distributed == 'dispy':
+        dUtil.stop_dispy_cluster(cluster, http_server)
+
+    return ret
+
diff --git a/pyFTS/fcm/__init__.py b/pyFTS/fcm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyFTS/fcm/common.py b/pyFTS/fcm/common.py
new file mode 100644
index 0000000..02db2de
--- /dev/null
+++ b/pyFTS/fcm/common.py
@@ -0,0 +1,18 @@
+from pyFTS.fcm import Activations
+import numpy as np
+
+
+class FuzzyCognitiveMap(object):
+    def __init__(self, **kwargs):
+        super(FuzzyCognitiveMap, self).__init__()
+        self.order = kwargs.get('order',1)
+        self.concepts = kwargs.get('partitioner',None)
+        self.weights = []
+        self.activation_function = kwargs.get('func', Activations.sigmoid)
+
+    def activate(self, concepts):
+        dot_products = np.zeros(len(self.concepts))
+        for k in np.arange(0, self.order):
+            dot_products += np.dot(np.array(concepts[k]).T, self.weights[k])
+        return self.activation_function( dot_products )
+
diff --git a/pyFTS/fcm/fts.py b/pyFTS/fcm/fts.py
new file mode 100644
index 0000000..4d31995
--- /dev/null
+++ b/pyFTS/fcm/fts.py
@@ -0,0 +1,30 @@
+from pyFTS.common import fts
+from pyFTS.models import hofts
+from pyFTS.fcm import common
+import numpy as np
+
+
+class FCM_FTS(hofts.HighOrderFTS):
+
+    def __init__(self, **kwargs):
+        super(FCM_FTS, self).__init__(**kwargs)
+        self.fcm = common.FuzzyCognitiveMap(**kwargs)
+
+    def forecast(self, ndata, **kwargs):
+        ret = []
+
+        midpoints = np.array([fset.centroid for fset in self.partitioner])
+
+        for t in np.arange(self.order, len(ndata)+1):
+
+            sample = ndata[t - self.order : t]
+
+            fuzzyfied = self.partitioner.fuzzyfy(sample, mode='vector')
+
+            activation = self.fcm.activate(fuzzyfied)
+
+            final = np.dot(midpoints, activation)/np.sum(activation)
+
+            ret.append(final)
+
+        return ret
diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py
index 3618a21..7f4ef73 100644
--- a/pyFTS/tests/general.py
+++ b/pyFTS/tests/general.py
@@ -14,6 +14,39 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
 from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
 from pyFTS.common import Transformations, Membership
 
+from pyFTS.fcm import fts, common, GA
+
+from pyFTS.data import Enrollments, TAIEX
+
+import pandas as pd
+df = pd.read_csv('https://query.data.world/s/7zfy4d5uep7wbgf56k4uu5g52dmvap', sep=';')
+
+data = df['glo_avg'].values[:12000]
+
+fs = Grid.GridPartitioner(data=data, npart=35, func=Membership.trimf)
+
+
+GA.parameters['num_concepts'] = 35
+GA.parameters['order'] = 2
+GA.parameters['partitioner'] = fs
+
+GA.execute('TAIEX', data)
+
+
+'''
+model = fts.FCM_FTS(partitioner=fs, order=1)
+
+model.fcm.weights = np.array([
+    [1, 1, 0, -1, -1],
+    [1, 1, 1, 0, -1],
+    [0, 1, 1, 1, 0],
+    [-1, 0, 1, 1, 1],
+    [-1, -1, 0, 1, 1]
+])
+
+print(data)
+print(model.forecast(data))
+'''
 '''
 dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
 
@@ -80,7 +113,7 @@ forecasts = model.predict(test_mv, type='multivariate', generators={'data': time
 print(forecasts)
 
 '''
-
+'''
 from pyFTS.data import lorentz
 df = lorentz.get_dataframe(iterations=5000)
 
@@ -104,3 +137,4 @@ model.fit(train)
 forecasts = model.predict(test, type='multivariate', steps_ahead=20)
 
 print(forecasts)
+'''
\ No newline at end of file