teste com nome da variável endógena
This commit is contained in:
parent
3b047e4f44
commit
3dd08a9937
@ -13,6 +13,7 @@ class SOMTransformation(Transformation):
|
|||||||
grid_dimension: Tuple,
|
grid_dimension: Tuple,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
# SOM attributes
|
# SOM attributes
|
||||||
|
self.load_file = kwargs.get('loadFile')
|
||||||
self.net: sps.somNet = None
|
self.net: sps.somNet = None
|
||||||
self.data: pd.DataFrame = None
|
self.data: pd.DataFrame = None
|
||||||
self.grid_dimension: Tuple = grid_dimension
|
self.grid_dimension: Tuple = grid_dimension
|
||||||
@ -31,14 +32,17 @@ class SOMTransformation(Transformation):
|
|||||||
"""
|
"""
|
||||||
Transform dataset from M-DIMENSION to 3-dimension
|
Transform dataset from M-DIMENSION to 3-dimension
|
||||||
"""
|
"""
|
||||||
|
if endogen_variable not in data.columns:
|
||||||
|
endogen_variable = None
|
||||||
|
cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if
|
||||||
|
col != endogen_variable]
|
||||||
if self.net is None:
|
if self.net is None:
|
||||||
cols = data.columns[:-1]
|
|
||||||
train = data[cols]
|
train = data[cols]
|
||||||
self.train(data=train)
|
self.train(data=train)
|
||||||
new_data = self.net.project(data.values)
|
new_data = self.net.project(data[cols].values)
|
||||||
new_data = pd.DataFrame(new_data, columns=names)
|
new_data = pd.DataFrame(new_data, columns=names)
|
||||||
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
|
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
|
||||||
new_data[endogen] = data[endogen]
|
new_data[endogen] = data[endogen].values
|
||||||
return new_data
|
return new_data
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@ -56,11 +60,12 @@ class SOMTransformation(Transformation):
|
|||||||
percentage_train: float = .7,
|
percentage_train: float = .7,
|
||||||
leaning_rate: float = 0.01,
|
leaning_rate: float = 0.01,
|
||||||
epochs: int = 10000):
|
epochs: int = 10000):
|
||||||
|
data.dropna(inplace=True)
|
||||||
self.data = data.values
|
self.data = data.values
|
||||||
limit = round(len(self.data) * percentage_train)
|
limit = round(len(self.data) * percentage_train)
|
||||||
train = self.data[:limit]
|
train = self.data[:limit]
|
||||||
x, y = self.grid_dimension
|
x, y = self.grid_dimension
|
||||||
self.net = sps.somNet(x, y, train, PBC=self.pbc)
|
self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file)
|
||||||
self.net.train(startLearnRate=leaning_rate,
|
self.net.train(startLearnRate=leaning_rate,
|
||||||
epochs=epochs)
|
epochs=epochs)
|
||||||
|
|
||||||
@ -68,6 +73,7 @@ class SOMTransformation(Transformation):
|
|||||||
def save_net(self,
|
def save_net(self,
|
||||||
filename: str = "SomNet trained"):
|
filename: str = "SomNet trained"):
|
||||||
self.net.save(filename)
|
self.net.save(filename)
|
||||||
|
self.load_file = filename
|
||||||
|
|
||||||
def show_grid(self,
|
def show_grid(self,
|
||||||
graph_type: str = 'nodes_graph',
|
graph_type: str = 'nodes_graph',
|
||||||
@ -77,3 +83,9 @@ class SOMTransformation(Transformation):
|
|||||||
self.net.nodes_graph(colnum=colnum)
|
self.net.nodes_graph(colnum=colnum)
|
||||||
else:
|
else:
|
||||||
self.net.diff_graph()
|
self.net.diff_graph()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv'
|
||||||
|
df = pd.read_csv(file, index_col=0)
|
||||||
|
som = SOMTransformation(grid_dimension=(20, 20))
|
||||||
|
new_df = som.apply(df.iloc[:50], endogen_variable='Appliances')
|
@ -5,7 +5,7 @@ import os
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class MyTestCase(unittest.TestCase):
|
class MyTestCase(unittest.TestCase):
|
||||||
def test_apply(self):
|
def test_apply_without_column_names(self):
|
||||||
data = [
|
data = [
|
||||||
[1, 1, 1, 1, 1],
|
[1, 1, 1, 1, 1],
|
||||||
[1, 1, 1, 1, 1],
|
[1, 1, 1, 1, 1],
|
||||||
@ -19,6 +19,16 @@ class MyTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(1, len(uniques.shape))
|
self.assertEqual(1, len(uniques.shape))
|
||||||
self.assertEqual(3, transformed.values.shape[1])
|
self.assertEqual(3, transformed.values.shape[1])
|
||||||
|
|
||||||
|
def test_apply_with_column_names(self):
|
||||||
|
df = self.simple_dataset()
|
||||||
|
df.columns = ['a', 'b', 'c', 'd', 'e']
|
||||||
|
som = SOMTransformation(grid_dimension=(2, 2))
|
||||||
|
result = som.apply(df, endogen_variable='a')
|
||||||
|
result.dropna(inplace=True)
|
||||||
|
self.assertEqual(5, len(result))
|
||||||
|
self.assertEqual(3, len(result.columns))
|
||||||
|
|
||||||
|
|
||||||
def test_save_net(self):
|
def test_save_net(self):
|
||||||
som_transformer = self.som_transformer_trained()
|
som_transformer = self.som_transformer_trained()
|
||||||
|
|
||||||
@ -34,10 +44,20 @@ class MyTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(True, is_in_files)
|
self.assertEqual(True, is_in_files)
|
||||||
|
|
||||||
# def
|
def test_train_with_invalid_values_should_remove_nan_row(self):
|
||||||
|
data = [
|
||||||
|
[1, 1, float('nan'), 1, 1],
|
||||||
|
[1, 1, 1, 1, 0],
|
||||||
|
[1, 1, 1, 0, 0],
|
||||||
|
[float('nan'), 1, 0, 0, 0],
|
||||||
|
[1, 0, 0, 0, 0],
|
||||||
|
]
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
som = SOMTransformation(grid_dimension=(2, 2))
|
||||||
|
som.train(data=df)
|
||||||
|
|
||||||
def test_train(self):
|
self.assertEqual(3, len(som.data))
|
||||||
self.assertEqual()
|
self.assertEqual(5, len(df.columns))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def simple_dataset():
|
def simple_dataset():
|
||||||
|
Loading…
Reference in New Issue
Block a user