pyFTS/pyFTS/partitioners/KMeans.py
2024-08-08 11:59:33 +04:00

101 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import math
import random as rnd
import functools, operator
from pyFTS.common import FuzzySet, Membership
from pyFTS.partitioners import partitioner
def distance(x, y):
if isinstance(x, list):
tmp = functools.reduce(operator.add, [(x[k] - y[k]) ** 2 for k in range(0, len(x))])
else:
tmp = (x - y) ** 2
return math.sqrt(tmp)
def k_means(k, dados, tam):
# Инициализирует центроиды, выбирая случайные элементы из множества
centroides = [dados[rnd.randint(0, len(dados)-1)] for kk in range(0, k)]
grupos = [-1 for x in range(0, len(dados))]
it_semmodificacao = 0
# для каждого экземпляра
iteracoes = 0
while iteracoes < 1000 and it_semmodificacao < 10:
inst_count = 0
modificacao = False
for instancia in dados:
# проверяет расстояние до каждого центроида
grupo_count = 0
dist = 10000
grupotmp = grupos[inst_count]
for grupo in centroides:
tmp = distance(instancia, grupo)
if tmp < dist:
dist = tmp
# ассоциирует центроид с наименьшим расстоянием до экземпляра
grupos[inst_count] = grupo_count
grupo_count = grupo_count + 1
if grupotmp != grupos[inst_count]:
modificacao = True
inst_count = inst_count + 1
if not modificacao:
it_semmodificacao = it_semmodificacao + 1
else:
it_semmodificacao = 0
# обновляет каждый центроид на основе средних значений всех связанных с ним экземпляров
grupo_count = 0
for grupo in centroides:
total_inst = functools.reduce(operator.add, [1 for xx in grupos if xx == grupo_count], 0)
if total_inst > 0:
if tam > 1:
for count in range(0, tam):
soma = functools.reduce(operator.add,
[dados[kk][count] for kk in range(0, len(dados)) if
grupos[kk] == grupo_count])
centroides[grupo_count][count] = soma / total_inst
else:
soma = functools.reduce(operator.add,
[dados[kk] for kk in range(0, len(dados)) if grupos[kk] == grupo_count])
centroides[grupo_count] = soma / total_inst
grupo_count = grupo_count + 1
iteracoes = iteracoes + 1
return centroides
class KMeansPartitioner(partitioner.Partitioner):
def __init__(self, **kwargs):
super(KMeansPartitioner, self).__init__(name="KMeans", **kwargs)
def build(self, data):
sets = {}
kwargs = {'type': self.type, 'variable': self.variable}
centroides = k_means(self.partitions, data, 1)
centroides.append(self.max)
centroides.append(self.min)
centroides = list(set(centroides))
centroides.sort()
for c in range(1, len(centroides) - 1):
_name = self.get_name(c)
sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
[round(centroides[c - 1], 3), round(centroides[c], 3), round(centroides[c + 1], 3)],
round(centroides[c], 3), **kwargs)
return sets