From 6f4bf247f8b63fb27bc1f6fba0589f809525ee7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Mon, 28 Jan 2019 11:44:34 -0200 Subject: [PATCH] Bugfix in partitioners.Entropy, not catching fuzzy set names correctly --- pyFTS/partitioners/Entropy.py | 4 ++-- pyFTS/tests/general.py | 25 ++++++++++++++++++++----- pyFTS/tests/multivariate.py | 6 +++--- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pyFTS/partitioners/Entropy.py b/pyFTS/partitioners/Entropy.py index 969463c..5a8ba53 100644 --- a/pyFTS/partitioners/Entropy.py +++ b/pyFTS/partitioners/Entropy.py @@ -94,8 +94,8 @@ class EntropyPartitioner(partitioner.Partitioner): partitions.append(self.max) partitions = list(set(partitions)) partitions.sort() - for c in np.arange(1, len(partitions) - 1): - _name = self.get_name(c) + for c in np.arange(1, len(partitions)-1): + _name = self.get_name(c-1) if self.membership_function == Membership.trimf: sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, [partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c], **kwargs) diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 868a6cd..ad37b3d 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -25,14 +25,29 @@ from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia, Enrollments from pyFTS.partitioners import Grid from pyFTS.models import pwfts, tsaur -dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',') +dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',') -dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S') +dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y') +dataset['value'] = dataset['Daily-Max API'] -train_uv = dataset['value'].values[:24505] -test_uv = dataset['value'].values[24505:] -partitioner = Grid.GridPartitioner(data=train_uv, npart=35) +train_uv = dataset['value'].values[:732] +test_uv = dataset['value'].values[732:] + +from itertools import product + +levels = ['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'] +sublevels = [str(k) for k in np.arange(0, 7)] +names = [] +for combination in product(*[levels, sublevels]): + names.append(combination[0] + combination[1]) + +print(names) + +#partitioner = Grid.GridPartitioner(data=train_uv, npart=35, names=names) +partitioner = Entropy.EntropyPartitioner(data=train_uv,npart=35, names=names) + +print(partitioner) model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4]) #model = tsaur.MarkovWeightedFTS(partitioner=partitioner) diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index c2ddc0b..d2682b5 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -3,7 +3,7 @@ import pandas as pd import time from pyFTS.data import Enrollments, TAIEX, SONDA -from pyFTS.partitioners import Grid, Simple +from pyFTS.partitioners import Grid, Simple, Entropy from pyFTS.common import Util from pyspark import SparkConf @@ -114,7 +114,7 @@ vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGr data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S') vvalue = variable.Variable("Pollution", data_label="value", alias='value', - partitioner=Grid.GridPartitioner, npart=35, data_type=np.float64, + partitioner=Entropy.EntropyPartitioner, npart=35, data_type=np.float64, data=train_mv) fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue) @@ -122,7 +122,7 @@ fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vva model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue, partitioner=fs) -model.fit(train_mv, distributed='spark', url='spark://192.168.0.106:7077') +model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077') #''' #print(model)