From 3dd08a9937e4d6ae3110cb9d06fb3a3f306ef438 Mon Sep 17 00:00:00 2001 From: matheus_cascalho Date: Thu, 3 Dec 2020 19:24:54 -0300 Subject: [PATCH] =?UTF-8?q?teste=20com=20nome=20da=20vari=C3=A1vel=20end?= =?UTF-8?q?=C3=B3gena?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyFTS/common/transformations/som.py | 20 +++++++++++++++---- pyFTS/tests/test_SOMTransformation.py | 28 +++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pyFTS/common/transformations/som.py b/pyFTS/common/transformations/som.py index 16bd087..264ad0e 100644 --- a/pyFTS/common/transformations/som.py +++ b/pyFTS/common/transformations/som.py @@ -13,6 +13,7 @@ class SOMTransformation(Transformation): grid_dimension: Tuple, **kwargs): # SOM attributes + self.load_file = kwargs.get('loadFile') self.net: sps.somNet = None self.data: pd.DataFrame = None self.grid_dimension: Tuple = grid_dimension @@ -31,14 +32,17 @@ class SOMTransformation(Transformation): """ Transform dataset from M-DIMENSION to 3-dimension """ + if endogen_variable not in data.columns: + endogen_variable = None + cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if + col != endogen_variable] if self.net is None: - cols = data.columns[:-1] train = data[cols] self.train(data=train) - new_data = self.net.project(data.values) + new_data = self.net.project(data[cols].values) new_data = pd.DataFrame(new_data, columns=names) endogen = endogen_variable if endogen_variable is not None else data.columns[-1] - new_data[endogen] = data[endogen] + new_data[endogen] = data[endogen].values return new_data def __repr__(self): @@ -56,11 +60,12 @@ class SOMTransformation(Transformation): percentage_train: float = .7, leaning_rate: float = 0.01, epochs: int = 10000): + data.dropna(inplace=True) self.data = data.values limit = round(len(self.data) * percentage_train) train = self.data[:limit] x, y = self.grid_dimension - self.net = sps.somNet(x, y, train, PBC=self.pbc) + self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file) self.net.train(startLearnRate=leaning_rate, epochs=epochs) @@ -68,6 +73,7 @@ class SOMTransformation(Transformation): def save_net(self, filename: str = "SomNet trained"): self.net.save(filename) + self.load_file = filename def show_grid(self, graph_type: str = 'nodes_graph', @@ -77,3 +83,9 @@ class SOMTransformation(Transformation): self.net.nodes_graph(colnum=colnum) else: self.net.diff_graph() + +if __name__ == '__main__': + file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv' + df = pd.read_csv(file, index_col=0) + som = SOMTransformation(grid_dimension=(20, 20)) + new_df = som.apply(df.iloc[:50], endogen_variable='Appliances') \ No newline at end of file diff --git a/pyFTS/tests/test_SOMTransformation.py b/pyFTS/tests/test_SOMTransformation.py index 253263f..a9ec8f6 100644 --- a/pyFTS/tests/test_SOMTransformation.py +++ b/pyFTS/tests/test_SOMTransformation.py @@ -5,7 +5,7 @@ import os import numpy as np class MyTestCase(unittest.TestCase): - def test_apply(self): + def test_apply_without_column_names(self): data = [ [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], @@ -19,6 +19,16 @@ class MyTestCase(unittest.TestCase): self.assertEqual(1, len(uniques.shape)) self.assertEqual(3, transformed.values.shape[1]) + def test_apply_with_column_names(self): + df = self.simple_dataset() + df.columns = ['a', 'b', 'c', 'd', 'e'] + som = SOMTransformation(grid_dimension=(2, 2)) + result = som.apply(df, endogen_variable='a') + result.dropna(inplace=True) + self.assertEqual(5, len(result)) + self.assertEqual(3, len(result.columns)) + + def test_save_net(self): som_transformer = self.som_transformer_trained() @@ -34,10 +44,20 @@ class MyTestCase(unittest.TestCase): self.assertEqual(True, is_in_files) - # def + def test_train_with_invalid_values_should_remove_nan_row(self): + data = [ + [1, 1, float('nan'), 1, 1], + [1, 1, 1, 1, 0], + [1, 1, 1, 0, 0], + [float('nan'), 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ] + df = pd.DataFrame(data) + som = SOMTransformation(grid_dimension=(2, 2)) + som.train(data=df) - def test_train(self): - self.assertEqual() + self.assertEqual(3, len(som.data)) + self.assertEqual(5, len(df.columns)) @staticmethod def simple_dataset():