From 6aa2a6c92e4dde97250d4ca0fbb805bcb8204154 Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Mon, 16 Nov 2020 22:37:04 -0300 Subject: [PATCH 1/9] treinamento da rede --- pyFTS/models/multivariate/som.py | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 pyFTS/models/multivariate/som.py diff --git a/pyFTS/models/multivariate/som.py b/pyFTS/models/multivariate/som.py new file mode 100644 index 0000000..f1faf67 --- /dev/null +++ b/pyFTS/models/multivariate/som.py @@ -0,0 +1,63 @@ +""" +Kohonen Self Organizing Maps for Fuzzy Time Series +""" +import pandas as pd +import SimpSOM as sps +from pyFTS.models.multivariate import wmvfts +from typing import Tuple + + +class SOMFTS: + def __init__(self, + grid_dimension: Tuple, + **kwargs): + # SOM attributes + self.net: sps.somNet = None + self.data: pd.DataFrame = None + self.grid_dimension: Tuple = grid_dimension + self.pbc = kwargs.get('PBC', True) + + # fts attributes + self.fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS) + self.order = kwargs.get('order', 2) + self.is_trained = False + + # debug attributes + self.name = 'Kohonen Self Organizing Maps FTS' + self.shortname = 'SOM-FTS' + + def __repr__(self): + status = "is trained" if self.is_trained else "not trained" + return f'{self.name}-{status}' + + def __str__(self): + return self.name + + def __del__(self): + del self.net + + def train(self, + data: pd.DataFrame, + percentage_train: float = .7, + leaning_rate: float = 0.01, + epochs: int = 10000): + self.data = data + limit = len(self.data) * percentage_train + train = data[:limit] + x, y = self.grid_dimension + self.net = sps.somNet(x, y, train, self.pbc) + self.net.train(startLearnRate=leaning_rate, + epochs=epochs) + + def save_net(self, + filename: str = "SomNet trained"): + self.net.save(filename) + + def show_grid(self, + graph_type: str = 'nodes_graph', + **kwargs): + if graph_type == 'nodes_graph': + colnum = kwargs.get('colnum', 0) + self.net.nodes_graph(colnum=colnum) + else: + self.net.diff_graph() \ No newline at end of file From 5e4eb03b893b0785773a087d9e4a146e34ec2a0c Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Wed, 2 Dec 2020 17:45:26 -0300 Subject: [PATCH 2/9] test save_net --- pyFTS/common/fts.py | 2 +- pyFTS/common/transformations/som.py | 72 +++++++++++++++++++ .../multivariate => partitioners}/som.py | 16 +++-- pyFTS/tests/test_SOMTransformation.py | 47 ++++++++++++ 4 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 pyFTS/common/transformations/som.py rename pyFTS/{models/multivariate => partitioners}/som.py (88%) create mode 100644 pyFTS/tests/test_SOMTransformation.py diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index b0c66e5..ca651eb 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -503,7 +503,7 @@ class FTS(object): params = [ None for k in self.transformations] for c, t in enumerate(self.transformations, start=0): - ndata = t.apply(ndata,params[c]) + ndata = t.apply(ndata, params[c], ) return ndata diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py new file mode 100644 index 0000000..af66c26 --- /dev/null +++ b/pyFTS/common/transformations/som.py @@ -0,0 +1,72 @@ +""" +Kohonen Self Organizing Maps for Fuzzy Time Series +""" +import pandas as pd +import SimpSOM as sps +from pyFTS.models.multivariate import wmvfts +from typing import Tuple +from pyFTS.common.Transformations import Transformation + + +class SOMTransformation(Transformation): + def __init__(self, + grid_dimension: Tuple, + **kwargs): + # SOM attributes + self.net: sps.somNet = None + self.data: pd.DataFrame = None + self.grid_dimension: Tuple = grid_dimension + self.pbc = kwargs.get('PBC', True) + + # debug attributes + self.name = 'Kohonen Self Organizing Maps FTS' + self.shortname = 'SOM-FTS' + + # def apply(self, data, endogen_variable, param, **kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING + # """ + # Transform dataset from M-DIMENSION to 3-dimension + # """ + # pass + + def __repr__(self): + status = "is trained" if self.is_trained else "not trained" + return f'{self.name}-{status}' + + def __str__(self): + return self.name + + def __del__(self): + del self.net + + def train(self, + data: pd.DataFrame, + percentage_train: float = .7, + leaning_rate: float = 0.01, + epochs: int = 10000): + self.data = data.values + limit = round(len(self.data) * percentage_train) + train = self.data[:limit] + x, y = self.grid_dimension + self.net = sps.somNet(x, y, train, PBC=self.pbc) + self.net.train(startLearnRate=leaning_rate, + epochs=epochs) + + def save_net(self, + filename: str = "SomNet trained"): + self.net.save(filename) + + def show_grid(self, + graph_type: str = 'nodes_graph', + **kwargs): + if graph_type == 'nodes_graph': + colnum = kwargs.get('colnum', 0) + self.net.nodes_graph(colnum=colnum) + else: + self.net.diff_graph() + + +""" +Requisitos + - apply(herdado de transformations): transforma os conjunto de dados + - inverse - não é necessária +""" diff --git a/pyFTS/models/multivariate/som.py b/pyFTS/partitioners/som.py similarity index 88% rename from pyFTS/models/multivariate/som.py rename to pyFTS/partitioners/som.py index f1faf67..059ea11 100644 --- a/pyFTS/models/multivariate/som.py +++ b/pyFTS/partitioners/som.py @@ -7,7 +7,7 @@ from pyFTS.models.multivariate import wmvfts from typing import Tuple -class SOMFTS: +class SOMPartitioner: def __init__(self, grid_dimension: Tuple, **kwargs): @@ -17,10 +17,6 @@ class SOMFTS: self.grid_dimension: Tuple = grid_dimension self.pbc = kwargs.get('PBC', True) - # fts attributes - self.fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS) - self.order = kwargs.get('order', 2) - self.is_trained = False # debug attributes self.name = 'Kohonen Self Organizing Maps FTS' @@ -60,4 +56,12 @@ class SOMFTS: colnum = kwargs.get('colnum', 0) self.net.nodes_graph(colnum=colnum) else: - self.net.diff_graph() \ No newline at end of file + self.net.diff_graph() + + + +""" +Requisitos + + +""" \ No newline at end of file diff --git a/pyFTS/tests/test_SOMTransformation.py b/pyFTS/tests/test_SOMTransformation.py new file mode 100644 index 0000000..e1fab22 --- /dev/null +++ b/pyFTS/tests/test_SOMTransformation.py @@ -0,0 +1,47 @@ +import unittest +from pyFTS.common.transformations.som import SOMTransformation +import pandas as pd +import os + +class MyTestCase(unittest.TestCase): + def test_apply(self): + self.assertEqual(True, False) + + def test_save_net(self): + som_transformer = self.som_transformer_trained() + + filename = 'test_net.npy' + som_transformer.save_net(filename) + files = os.listdir() + + if filename in files: + is_in_files = True + os.remove(filename) + else: + is_in_files = False + + self.assertEqual(True, is_in_files) + + def test_train(self): + self.assertEqual() + + @staticmethod + def simple_dataset(): + data = [ + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 0], + [1, 1, 1, 0, 0], + [1, 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ] + df = pd.DataFrame(data) + return df + + def som_transformer_trained(self): + data = self.simple_dataset() + som_transformer = SOMTransformation(grid_dimension=(2, 2)) + som_transformer.train(data=data, epochs=100) + return som_transformer + +if __name__ == '__main__': + unittest.main() From 41818258b227e8b1af8e50546a85d00e5188ecda Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Wed, 2 Dec 2020 18:19:54 -0300 Subject: [PATCH 3/9] teste apply --- pyFTS/common/transformations/som.py | 27 ++++++++++++++++++++------- pyFTS/tests/test_SOMTransformation.py | 16 +++++++++++++++- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index af66c26..84e4773 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -6,7 +6,7 @@ import SimpSOM as sps from pyFTS.models.multivariate import wmvfts from typing import Tuple from pyFTS.common.Transformations import Transformation - +from typing import List class SOMTransformation(Transformation): def __init__(self, @@ -22,14 +22,27 @@ class SOMTransformation(Transformation): self.name = 'Kohonen Self Organizing Maps FTS' self.shortname = 'SOM-FTS' - # def apply(self, data, endogen_variable, param, **kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING - # """ - # Transform dataset from M-DIMENSION to 3-dimension - # """ - # pass + def apply(self, + data: pd.DataFrame, + endogen_variable=None, + names: List[str] = ['x', 'y'], + param=None, + **kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING + """ + Transform dataset from M-DIMENSION to 3-dimension + """ + if self.net is None: + cols = data.columns[:-1] + train = data[cols] + self.train(data=train) + new_data = self.net.project(data.values) + new_data = pd.DataFrame(new_data, columns=names) + endogen = endogen_variable if endogen_variable is not None else data.columns[-1] + new_data[endogen] = data[endogen] + return new_data def __repr__(self): - status = "is trained" if self.is_trained else "not trained" + status = "is trained" if self.net is not None else "not trained" return f'{self.name}-{status}' def __str__(self): diff --git a/pyFTS/tests/test_SOMTransformation.py b/pyFTS/tests/test_SOMTransformation.py index e1fab22..253263f 100644 --- a/pyFTS/tests/test_SOMTransformation.py +++ b/pyFTS/tests/test_SOMTransformation.py @@ -2,10 +2,22 @@ import unittest from pyFTS.common.transformations.som import SOMTransformation import pandas as pd import os +import numpy as np class MyTestCase(unittest.TestCase): def test_apply(self): - self.assertEqual(True, False) + data = [ + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + ] + som = self.som_transformer_trained() + transformed = som.apply(data=pd.DataFrame(data)) + uniques = np.unique(transformed) + + self.assertEqual(1, len(uniques.shape)) + self.assertEqual(3, transformed.values.shape[1]) def test_save_net(self): som_transformer = self.som_transformer_trained() @@ -22,6 +34,8 @@ class MyTestCase(unittest.TestCase): self.assertEqual(True, is_in_files) + # def + def test_train(self): self.assertEqual() From 3b047e4f447a6572a64618049ff63a35669fd54c Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Thu, 3 Dec 2020 16:47:24 -0300 Subject: [PATCH 4/9] =?UTF-8?q?limpeza=20de=20coment=C3=A1rios?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 84e4773..16bd087 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -64,6 +64,7 @@ class SOMTransformation(Transformation): self.net.train(startLearnRate=leaning_rate, epochs=epochs) + def save_net(self, filename: str = "SomNet trained"): self.net.save(filename) @@ -76,10 +77,3 @@ class SOMTransformation(Transformation): self.net.nodes_graph(colnum=colnum) else: self.net.diff_graph() - - -""" -Requisitos - - apply(herdado de transformations): transforma os conjunto de dados - - inverse - não é necessária -""" From 3dd08a9937e4d6ae3110cb9d06fb3a3f306ef438 Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Thu, 3 Dec 2020 19:24:54 -0300 Subject: [PATCH 5/9] =?UTF-8?q?teste=20com=20nome=20da=20vari=C3=A1vel=20e?= =?UTF-8?q?nd=C3=B3gena?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 20 +++++++++++++++---- pyFTS/tests/test_SOMTransformation.py | 28 +++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 16bd087..264ad0e 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -13,6 +13,7 @@ class SOMTransformation(Transformation): grid_dimension: Tuple, **kwargs): # SOM attributes + self.load_file = kwargs.get('loadFile') self.net: sps.somNet = None self.data: pd.DataFrame = None self.grid_dimension: Tuple = grid_dimension @@ -31,14 +32,17 @@ class SOMTransformation(Transformation): """ Transform dataset from M-DIMENSION to 3-dimension """ + if endogen_variable not in data.columns: + endogen_variable = None + cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if + col != endogen_variable] if self.net is None: - cols = data.columns[:-1] train = data[cols] self.train(data=train) - new_data = self.net.project(data.values) + new_data = self.net.project(data[cols].values) new_data = pd.DataFrame(new_data, columns=names) endogen = endogen_variable if endogen_variable is not None else data.columns[-1] - new_data[endogen] = data[endogen] + new_data[endogen] = data[endogen].values return new_data def __repr__(self): @@ -56,11 +60,12 @@ class SOMTransformation(Transformation): percentage_train: float = .7, leaning_rate: float = 0.01, epochs: int = 10000): + data.dropna(inplace=True) self.data = data.values limit = round(len(self.data) * percentage_train) train = self.data[:limit] x, y = self.grid_dimension - self.net = sps.somNet(x, y, train, PBC=self.pbc) + self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file) self.net.train(startLearnRate=leaning_rate, epochs=epochs) @@ -68,6 +73,7 @@ class SOMTransformation(Transformation): def save_net(self, filename: str = "SomNet trained"): self.net.save(filename) + self.load_file = filename def show_grid(self, graph_type: str = 'nodes_graph', @@ -77,3 +83,9 @@ class SOMTransformation(Transformation): self.net.nodes_graph(colnum=colnum) else: self.net.diff_graph() + +if __name__ == '__main__': + file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv' + df = pd.read_csv(file, index_col=0) + som = SOMTransformation(grid_dimension=(20, 20)) + new_df = som.apply(df.iloc[:50], endogen_variable='Appliances') \ No newline at end of file diff --git a/pyFTS/tests/test_SOMTransformation.py b/pyFTS/tests/test_SOMTransformation.py index 253263f..a9ec8f6 100644 --- a/pyFTS/tests/test_SOMTransformation.py +++ b/pyFTS/tests/test_SOMTransformation.py @@ -5,7 +5,7 @@ import os import numpy as np class MyTestCase(unittest.TestCase): - def test_apply(self): + def test_apply_without_column_names(self): data = [ [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], @@ -19,6 +19,16 @@ class MyTestCase(unittest.TestCase): self.assertEqual(1, len(uniques.shape)) self.assertEqual(3, transformed.values.shape[1]) + def test_apply_with_column_names(self): + df = self.simple_dataset() + df.columns = ['a', 'b', 'c', 'd', 'e'] + som = SOMTransformation(grid_dimension=(2, 2)) + result = som.apply(df, endogen_variable='a') + result.dropna(inplace=True) + self.assertEqual(5, len(result)) + self.assertEqual(3, len(result.columns)) + + def test_save_net(self): som_transformer = self.som_transformer_trained() @@ -34,10 +44,20 @@ class MyTestCase(unittest.TestCase): self.assertEqual(True, is_in_files) - # def + def test_train_with_invalid_values_should_remove_nan_row(self): + data = [ + [1, 1, float('nan'), 1, 1], + [1, 1, 1, 1, 0], + [1, 1, 1, 0, 0], + [float('nan'), 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ] + df = pd.DataFrame(data) + som = SOMTransformation(grid_dimension=(2, 2)) + som.train(data=df) - def test_train(self): - self.assertEqual() + self.assertEqual(3, len(som.data)) + self.assertEqual(5, len(df.columns)) @staticmethod def simple_dataset(): From 27b55b1fde2ab73a66e16c0bf894e02cf6a896bb Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Mon, 7 Dec 2020 15:33:35 -0300 Subject: [PATCH 6/9] =?UTF-8?q?passagem=20de=20argumentos=20do=20m=C3=A9to?= =?UTF-8?q?do=20train=20pelo=20m=C3=A9todo=20apply?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 264ad0e..364ae3e 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -38,7 +38,7 @@ class SOMTransformation(Transformation): col != endogen_variable] if self.net is None: train = data[cols] - self.train(data=train) + self.train(data=train, **kwargs) new_data = self.net.project(data[cols].values) new_data = pd.DataFrame(new_data, columns=names) endogen = endogen_variable if endogen_variable is not None else data.columns[-1] @@ -88,4 +88,5 @@ if __name__ == '__main__': file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv' df = pd.read_csv(file, index_col=0) som = SOMTransformation(grid_dimension=(20, 20)) - new_df = som.apply(df.iloc[:50], endogen_variable='Appliances') \ No newline at end of file + new_df = som.apply(df.iloc[:50], endogen_variable='Appliances', epochs=100, leaning_rate=0.1) + print(new_df.head()) \ No newline at end of file From 20a4b9dfef01e6944703da09caa13d824487503e Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Mon, 7 Dec 2020 15:37:35 -0300 Subject: [PATCH 7/9] =?UTF-8?q?retirada=20de=20celulas=20NaN=20por=20meio?= =?UTF-8?q?=20de=20reatribu=C3=AD=C3=A7=C3=A3o=20da=20vari=C3=A1vel=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 364ae3e..456cc16 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -60,7 +60,7 @@ class SOMTransformation(Transformation): percentage_train: float = .7, leaning_rate: float = 0.01, epochs: int = 10000): - data.dropna(inplace=True) + data = data.dropna() self.data = data.values limit = round(len(self.data) * percentage_train) train = self.data[:limit] From b9995e780a6b2da6a48bf70c51d88c56eab4967b Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Mon, 7 Dec 2020 15:38:48 -0300 Subject: [PATCH 8/9] retirada do escopo if __name__ no arquivo som.py --- pyFTS/common/transformations/som.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 456cc16..49a4203 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -83,10 +83,3 @@ class SOMTransformation(Transformation): self.net.nodes_graph(colnum=colnum) else: self.net.diff_graph() - -if __name__ == '__main__': - file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv' - df = pd.read_csv(file, index_col=0) - som = SOMTransformation(grid_dimension=(20, 20)) - new_df = som.apply(df.iloc[:50], endogen_variable='Appliances', epochs=100, leaning_rate=0.1) - print(new_df.head()) \ No newline at end of file From 81036e506fa2efa4f48f41c1a56be2699226de0a Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Mon, 7 Dec 2020 15:56:34 -0300 Subject: [PATCH 9/9] =?UTF-8?q?docstring=20do=20m=C3=A9todo=20apply?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 49a4203..a5aee39 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -26,12 +26,25 @@ class SOMTransformation(Transformation): def apply(self, data: pd.DataFrame, endogen_variable=None, - names: List[str] = ['x', 'y'], + names: Tuple[str] = ('x', 'y'), param=None, - **kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING + **kwargs): """ - Transform dataset from M-DIMENSION to 3-dimension + Transform a M-dimensional dataset into a 3-dimensional dataset, where one dimension is the endogen variable + If endogen_variable = None, the last column will be the endogen_variable. + Args: + data (pd.DataFrame): M-Dimensional dataset + endogen_variable (str): column of dataset + names (Tuple): names for new columns created by SOM Transformation. + param: + **kwargs: params of SOM's train process + percentage_train (float). Percentage of dataset that will be used for train SOM network. default: 0.7 + leaning_rate (float): leaning rate of SOM network. default: 0.01 + epochs: epochs of SOM network. default: 10000 + Returns: + """ + if endogen_variable not in data.columns: endogen_variable = None cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if