teste com nome da variável endógena

This commit is contained in:
matheus_cascalho 2020-12-03 19:24:54 -03:00
parent 3b047e4f44
commit 3dd08a9937
2 changed files with 40 additions and 8 deletions

View File

@ -13,6 +13,7 @@ class SOMTransformation(Transformation):
grid_dimension: Tuple,
**kwargs):
# SOM attributes
self.load_file = kwargs.get('loadFile')
self.net: sps.somNet = None
self.data: pd.DataFrame = None
self.grid_dimension: Tuple = grid_dimension
@ -31,14 +32,17 @@ class SOMTransformation(Transformation):
"""
Transform dataset from M-DIMENSION to 3-dimension
"""
if endogen_variable not in data.columns:
endogen_variable = None
cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if
col != endogen_variable]
if self.net is None:
cols = data.columns[:-1]
train = data[cols]
self.train(data=train)
new_data = self.net.project(data.values)
new_data = self.net.project(data[cols].values)
new_data = pd.DataFrame(new_data, columns=names)
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
new_data[endogen] = data[endogen]
new_data[endogen] = data[endogen].values
return new_data
def __repr__(self):
@ -56,11 +60,12 @@ class SOMTransformation(Transformation):
percentage_train: float = .7,
leaning_rate: float = 0.01,
epochs: int = 10000):
data.dropna(inplace=True)
self.data = data.values
limit = round(len(self.data) * percentage_train)
train = self.data[:limit]
x, y = self.grid_dimension
self.net = sps.somNet(x, y, train, PBC=self.pbc)
self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file)
self.net.train(startLearnRate=leaning_rate,
epochs=epochs)
@ -68,6 +73,7 @@ class SOMTransformation(Transformation):
def save_net(self,
filename: str = "SomNet trained"):
self.net.save(filename)
self.load_file = filename
def show_grid(self,
graph_type: str = 'nodes_graph',
@ -77,3 +83,9 @@ class SOMTransformation(Transformation):
self.net.nodes_graph(colnum=colnum)
else:
self.net.diff_graph()
if __name__ == '__main__':
file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv'
df = pd.read_csv(file, index_col=0)
som = SOMTransformation(grid_dimension=(20, 20))
new_df = som.apply(df.iloc[:50], endogen_variable='Appliances')

View File

@ -5,7 +5,7 @@ import os
import numpy as np
class MyTestCase(unittest.TestCase):
def test_apply(self):
def test_apply_without_column_names(self):
data = [
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
@ -19,6 +19,16 @@ class MyTestCase(unittest.TestCase):
self.assertEqual(1, len(uniques.shape))
self.assertEqual(3, transformed.values.shape[1])
def test_apply_with_column_names(self):
df = self.simple_dataset()
df.columns = ['a', 'b', 'c', 'd', 'e']
som = SOMTransformation(grid_dimension=(2, 2))
result = som.apply(df, endogen_variable='a')
result.dropna(inplace=True)
self.assertEqual(5, len(result))
self.assertEqual(3, len(result.columns))
def test_save_net(self):
som_transformer = self.som_transformer_trained()
@ -34,10 +44,20 @@ class MyTestCase(unittest.TestCase):
self.assertEqual(True, is_in_files)
# def
def test_train_with_invalid_values_should_remove_nan_row(self):
data = [
[1, 1, float('nan'), 1, 1],
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0],
[float('nan'), 1, 0, 0, 0],
[1, 0, 0, 0, 0],
]
df = pd.DataFrame(data)
som = SOMTransformation(grid_dimension=(2, 2))
som.train(data=df)
def test_train(self):
self.assertEqual()
self.assertEqual(3, len(som.data))
self.assertEqual(5, len(df.columns))
@staticmethod
def simple_dataset():