Bugfix in partitioners.Entropy, not catching fuzzy set names correctly

This commit is contained in:
Petrônio Cândido 2019-01-28 11:44:34 -02:00
parent d7d4efad60
commit 6f4bf247f8
3 changed files with 25 additions and 10 deletions

View File

@ -94,8 +94,8 @@ class EntropyPartitioner(partitioner.Partitioner):
partitions.append(self.max) partitions.append(self.max)
partitions = list(set(partitions)) partitions = list(set(partitions))
partitions.sort() partitions.sort()
for c in np.arange(1, len(partitions) - 1): for c in np.arange(1, len(partitions)-1):
_name = self.get_name(c) _name = self.get_name(c-1)
if self.membership_function == Membership.trimf: if self.membership_function == Membership.trimf:
sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
[partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c], **kwargs) [partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c], **kwargs)

View File

@ -25,14 +25,29 @@ from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia, Enrollments
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.models import pwfts, tsaur from pyFTS.models import pwfts, tsaur
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',') dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S') dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
dataset['value'] = dataset['Daily-Max API']
train_uv = dataset['value'].values[:24505]
test_uv = dataset['value'].values[24505:]
partitioner = Grid.GridPartitioner(data=train_uv, npart=35) train_uv = dataset['value'].values[:732]
test_uv = dataset['value'].values[732:]
from itertools import product
levels = ['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
print(names)
#partitioner = Grid.GridPartitioner(data=train_uv, npart=35, names=names)
partitioner = Entropy.EntropyPartitioner(data=train_uv,npart=35, names=names)
print(partitioner)
model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4]) model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4])
#model = tsaur.MarkovWeightedFTS(partitioner=partitioner) #model = tsaur.MarkovWeightedFTS(partitioner=partitioner)

View File

@ -3,7 +3,7 @@ import pandas as pd
import time import time
from pyFTS.data import Enrollments, TAIEX, SONDA from pyFTS.data import Enrollments, TAIEX, SONDA
from pyFTS.partitioners import Grid, Simple from pyFTS.partitioners import Grid, Simple, Entropy
from pyFTS.common import Util from pyFTS.common import Util
from pyspark import SparkConf from pyspark import SparkConf
@ -114,7 +114,7 @@ vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGr
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S') data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
vvalue = variable.Variable("Pollution", data_label="value", alias='value', vvalue = variable.Variable("Pollution", data_label="value", alias='value',
partitioner=Grid.GridPartitioner, npart=35, data_type=np.float64, partitioner=Entropy.EntropyPartitioner, npart=35, data_type=np.float64,
data=train_mv) data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue) fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
@ -122,7 +122,7 @@ fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vva
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue, model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue,
partitioner=fs) partitioner=fs)
model.fit(train_mv, distributed='spark', url='spark://192.168.0.106:7077') model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
#''' #'''
#print(model) #print(model)