92 lines
3.2 KiB
Python

"""
Kohonen Self Organizing Maps for Fuzzy Time Series
"""
import pandas as pd
import SimpSOM as sps
from pyFTS.models.multivariate import wmvfts
from typing import Tuple
from pyFTS.common.Transformations import Transformation
from typing import List
class SOMTransformation(Transformation):
def __init__(self,
grid_dimension: Tuple,
**kwargs):
# SOM attributes
self.load_file = kwargs.get('loadFile')
self.net: sps.somNet = None
self.data: pd.DataFrame = None
self.grid_dimension: Tuple = grid_dimension
self.pbc = kwargs.get('PBC', True)
# debug attributes
self.name = 'Kohonen Self Organizing Maps FTS'
self.shortname = 'SOM-FTS'
def apply(self,
data: pd.DataFrame,
endogen_variable=None,
names: List[str] = ['x', 'y'],
param=None,
**kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING
"""
Transform dataset from M-DIMENSION to 3-dimension
"""
if endogen_variable not in data.columns:
endogen_variable = None
cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if
col != endogen_variable]
if self.net is None:
train = data[cols]
self.train(data=train, **kwargs)
new_data = self.net.project(data[cols].values)
new_data = pd.DataFrame(new_data, columns=names)
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
new_data[endogen] = data[endogen].values
return new_data
def __repr__(self):
status = "is trained" if self.net is not None else "not trained"
return f'{self.name}-{status}'
def __str__(self):
return self.name
def __del__(self):
del self.net
def train(self,
data: pd.DataFrame,
percentage_train: float = .7,
leaning_rate: float = 0.01,
epochs: int = 10000):
data.dropna(inplace=True)
self.data = data.values
limit = round(len(self.data) * percentage_train)
train = self.data[:limit]
x, y = self.grid_dimension
self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file)
self.net.train(startLearnRate=leaning_rate,
epochs=epochs)
def save_net(self,
filename: str = "SomNet trained"):
self.net.save(filename)
self.load_file = filename
def show_grid(self,
graph_type: str = 'nodes_graph',
**kwargs):
if graph_type == 'nodes_graph':
colnum = kwargs.get('colnum', 0)
self.net.nodes_graph(colnum=colnum)
else:
self.net.diff_graph()
if __name__ == '__main__':
file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv'
df = pd.read_csv(file, index_col=0)
som = SOMTransformation(grid_dimension=(20, 20))
new_df = som.apply(df.iloc[:50], endogen_variable='Appliances', epochs=100, leaning_rate=0.1)
print(new_df.head())