92 lines
3.2 KiB
Python
92 lines
3.2 KiB
Python
"""
|
|
Kohonen Self Organizing Maps for Fuzzy Time Series
|
|
"""
|
|
import pandas as pd
|
|
import SimpSOM as sps
|
|
from pyFTS.models.multivariate import wmvfts
|
|
from typing import Tuple
|
|
from pyFTS.common.Transformations import Transformation
|
|
from typing import List
|
|
|
|
class SOMTransformation(Transformation):
|
|
def __init__(self,
|
|
grid_dimension: Tuple,
|
|
**kwargs):
|
|
# SOM attributes
|
|
self.load_file = kwargs.get('loadFile')
|
|
self.net: sps.somNet = None
|
|
self.data: pd.DataFrame = None
|
|
self.grid_dimension: Tuple = grid_dimension
|
|
self.pbc = kwargs.get('PBC', True)
|
|
|
|
# debug attributes
|
|
self.name = 'Kohonen Self Organizing Maps FTS'
|
|
self.shortname = 'SOM-FTS'
|
|
|
|
def apply(self,
|
|
data: pd.DataFrame,
|
|
endogen_variable=None,
|
|
names: List[str] = ['x', 'y'],
|
|
param=None,
|
|
**kwargs): #TODO(CASCALHO) MELHORAR DOCSTRING
|
|
"""
|
|
Transform dataset from M-DIMENSION to 3-dimension
|
|
"""
|
|
if endogen_variable not in data.columns:
|
|
endogen_variable = None
|
|
cols = data.columns[:-1] if endogen_variable is None else [col for col in data.columns if
|
|
col != endogen_variable]
|
|
if self.net is None:
|
|
train = data[cols]
|
|
self.train(data=train, **kwargs)
|
|
new_data = self.net.project(data[cols].values)
|
|
new_data = pd.DataFrame(new_data, columns=names)
|
|
endogen = endogen_variable if endogen_variable is not None else data.columns[-1]
|
|
new_data[endogen] = data[endogen].values
|
|
return new_data
|
|
|
|
def __repr__(self):
|
|
status = "is trained" if self.net is not None else "not trained"
|
|
return f'{self.name}-{status}'
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
def __del__(self):
|
|
del self.net
|
|
|
|
def train(self,
|
|
data: pd.DataFrame,
|
|
percentage_train: float = .7,
|
|
leaning_rate: float = 0.01,
|
|
epochs: int = 10000):
|
|
data.dropna(inplace=True)
|
|
self.data = data.values
|
|
limit = round(len(self.data) * percentage_train)
|
|
train = self.data[:limit]
|
|
x, y = self.grid_dimension
|
|
self.net = sps.somNet(x, y, train, PBC=self.pbc, loadFile=self.load_file)
|
|
self.net.train(startLearnRate=leaning_rate,
|
|
epochs=epochs)
|
|
|
|
|
|
def save_net(self,
|
|
filename: str = "SomNet trained"):
|
|
self.net.save(filename)
|
|
self.load_file = filename
|
|
|
|
def show_grid(self,
|
|
graph_type: str = 'nodes_graph',
|
|
**kwargs):
|
|
if graph_type == 'nodes_graph':
|
|
colnum = kwargs.get('colnum', 0)
|
|
self.net.nodes_graph(colnum=colnum)
|
|
else:
|
|
self.net.diff_graph()
|
|
|
|
if __name__ == '__main__':
|
|
file = '/home/matheus_cascalho/Documentos/matheus_cascalho/MINDS/TimeSeries_Lab/SOM/Appliance Energy Prediction/energydata_complete.csv'
|
|
df = pd.read_csv(file, index_col=0)
|
|
som = SOMTransformation(grid_dimension=(20, 20))
|
|
new_df = som.apply(df.iloc[:50], endogen_variable='Appliances', epochs=100, leaning_rate=0.1)
|
|
print(new_df.head()) |