Merge pull request #31 from PYFTS/feature/som_fts

Feature/som fts
This commit is contained in:
Petrônio Cândido de Lima e Silva 2020-12-09 15:59:15 -03:00 committed by GitHub
commit e739cd404d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 247 additions and 1 deletions

View File

@ -548,7 +548,7 @@ class FTS(object):
params = [ None for k in self.transformations] params = [ None for k in self.transformations]
for c, t in enumerate(self.transformations, start=0): for c, t in enumerate(self.transformations, start=0):
ndata = t.apply(ndata,params[c]) ndata = t.apply(ndata, params[c], )
return ndata return ndata

View File

@ -0,0 +1,98 @@
"""
Kohonen Self Organizing Maps for Fuzzy Time Series
"""
import pandas as pd
import SimpSOM as sps
from pyFTS.models.multivariate import wmvfts
from typing import Tuple
from pyFTS.common.Transformations import Transformation
from typing import List
class SOMTransformation(Transformation):
def __init__(self,
grid_dimension: Tuple,
**kwargs):
# SOM attributes
self.load_file = kwargs.get('loadFile')
self.net: sps.somNet = None
self.data: pd.DataFrame = None
self.grid_dimension: Tuple = grid_dimension
self.pbc = kwargs.get('PBC', True)
# debug attributes
self.name = 'Kohonen Self Organizing Maps FTS'
self.shortname = 'SOM-FTS'
def apply(self,
data: pd.DataFrame,
endogen_variable=None,
names: Tuple[str] = ('x', 'y'),
param=None,
**kwargs):
"""
Transform a M-dimensional dataset into a 3-dimensional dataset, where one dimension is the endogen variable
If endogen_variable = None, the last column will be the endogen_variable.
Args:
data (pd.DataFrame): M-Dimensional dataset
endogen_variable (str): column of dataset
names (Tuple): names for new columns created by SOM Transformation.
param:
**kwargs: params of SOM's train process
percentage_train (float). Percentage of dataset that will be used for train SOM network. default: 0.7
leaning_rate (float): leaning rate of SOM network. default: 0.01
epochs: epochs of SOM network. default: 10000
Returns:
"""
if endogen_variable not in data.columns:
endogen_variable = None
cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if
col != endogen_variable]
if self.net is None:
train = data[cols]
self.train(data=train, **kwargs)
new_data = self.net.project(data[cols].values)
new_data = pd.DataFrame(new_data, columns=names)
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
new_data[endogen] = data[endogen].values
return new_data
def __repr__(self):
status = "is trained" if self.net is not None else "not trained"
return f'{self.name}-{status}'
def __str__(self):
return self.name
def __del__(self):
del self.net
def train(self,
data: pd.DataFrame,
percentage_train: float = .7,
leaning_rate: float = 0.01,
epochs: int = 10000):
data = data.dropna()
self.data = data.values
limit = round(len(self.data) * percentage_train)
train = self.data[:limit]
x, y = self.grid_dimension
self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file)
self.net.train(startLearnRate=leaning_rate,
epochs=epochs)
def save_net(self,
filename: str = "SomNet trained"):
self.net.save(filename)
self.load_file = filename
def show_grid(self,
graph_type: str = 'nodes_graph',
**kwargs):
if graph_type == 'nodes_graph':
colnum = kwargs.get('colnum', 0)
self.net.nodes_graph(colnum=colnum)
else:
self.net.diff_graph()

67
pyFTS/partitioners/som.py Normal file
View File

@ -0,0 +1,67 @@
"""
Kohonen Self Organizing Maps for Fuzzy Time Series
"""
import pandas as pd
import SimpSOM as sps
from pyFTS.models.multivariate import wmvfts
from typing import Tuple
class SOMPartitioner:
def __init__(self,
grid_dimension: Tuple,
**kwargs):
# SOM attributes
self.net: sps.somNet = None
self.data: pd.DataFrame = None
self.grid_dimension: Tuple = grid_dimension
self.pbc = kwargs.get('PBC', True)
# debug attributes
self.name = 'Kohonen Self Organizing Maps FTS'
self.shortname = 'SOM-FTS'
def __repr__(self):
status = "is trained" if self.is_trained else "not trained"
return f'{self.name}-{status}'
def __str__(self):
return self.name
def __del__(self):
del self.net
def train(self,
data: pd.DataFrame,
percentage_train: float = .7,
leaning_rate: float = 0.01,
epochs: int = 10000):
self.data = data
limit = len(self.data) * percentage_train
train = data[:limit]
x, y = self.grid_dimension
self.net = sps.somNet(x, y, train, self.pbc)
self.net.train(startLearnRate=leaning_rate,
epochs=epochs)
def save_net(self,
filename: str = "SomNet trained"):
self.net.save(filename)
def show_grid(self,
graph_type: str = 'nodes_graph',
**kwargs):
if graph_type == 'nodes_graph':
colnum = kwargs.get('colnum', 0)
self.net.nodes_graph(colnum=colnum)
else:
self.net.diff_graph()
"""
Requisitos
"""

View File

@ -0,0 +1,81 @@
import unittest
from pyFTS.common.transformations.som import SOMTransformation
import pandas as pd
import os
import numpy as np
class MyTestCase(unittest.TestCase):
def test_apply_without_column_names(self):
data = [
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
]
som = self.som_transformer_trained()
transformed = som.apply(data=pd.DataFrame(data))
uniques = np.unique(transformed)
self.assertEqual(1, len(uniques.shape))
self.assertEqual(3, transformed.values.shape[1])
def test_apply_with_column_names(self):
df = self.simple_dataset()
df.columns = ['a', 'b', 'c', 'd', 'e']
som = SOMTransformation(grid_dimension=(2, 2))
result = som.apply(df, endogen_variable='a')
result.dropna(inplace=True)
self.assertEqual(5, len(result))
self.assertEqual(3, len(result.columns))
def test_save_net(self):
som_transformer = self.som_transformer_trained()
filename = 'test_net.npy'
som_transformer.save_net(filename)
files = os.listdir()
if filename in files:
is_in_files = True
os.remove(filename)
else:
is_in_files = False
self.assertEqual(True, is_in_files)
def test_train_with_invalid_values_should_remove_nan_row(self):
data = [
[1, 1, float('nan'), 1, 1],
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0],
[float('nan'), 1, 0, 0, 0],
[1, 0, 0, 0, 0],
]
df = pd.DataFrame(data)
som = SOMTransformation(grid_dimension=(2, 2))
som.train(data=df)
self.assertEqual(3, len(som.data))
self.assertEqual(5, len(df.columns))
@staticmethod
def simple_dataset():
data = [
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 0],
[1, 1, 1, 0, 0],
[1, 1, 0, 0, 0],
[1, 0, 0, 0, 0],
]
df = pd.DataFrame(data)
return df
def som_transformer_trained(self):
data = self.simple_dataset()
som_transformer = SOMTransformation(grid_dimension=(2, 2))
som_transformer.train(data=data, epochs=100)
return som_transformer
if __name__ == '__main__':
unittest.main()