Bugfixes and improvementos con cmvfts and benchmarks.measures

This commit is contained in:
Petrônio Cândido 2019-01-28 15:38:40 -02:00
parent e36ce6692e
commit 1fce1145cc
5 changed files with 58 additions and 41 deletions

View File

@ -19,7 +19,7 @@ def acf(data, k):
:param k: :param k:
:return: :return:
""" """
mu = np.mean(data) mu = np.nanmean(data)
sigma = np.var(data) sigma = np.var(data)
n = len(data) n = len(data)
s = 0 s = 0
@ -68,7 +68,7 @@ def mape(targets, forecasts):
targets = np.array(targets) targets = np.array(targets)
if isinstance(forecasts, list): if isinstance(forecasts, list):
forecasts = np.array(forecasts) forecasts = np.array(forecasts)
return np.mean(np.abs(np.divide((targets - forecasts), targets))) * 100 return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
def smape(targets, forecasts, type=2): def smape(targets, forecasts, type=2):
@ -85,11 +85,11 @@ def smape(targets, forecasts, type=2):
if isinstance(forecasts, list): if isinstance(forecasts, list):
forecasts = np.array(forecasts) forecasts = np.array(forecasts)
if type == 1: if type == 1:
return np.mean(np.abs(forecasts - targets) / ((forecasts + targets) / 2)) return np.nanmean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
elif type == 2: elif type == 2:
return np.mean(np.abs(forecasts - targets) / (abs(forecasts) + abs(targets))) * 100 return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
else: else:
return sum(np.abs(forecasts - targets)) / sum(forecasts + targets) return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
def mape_interval(targets, forecasts): def mape_interval(targets, forecasts):
@ -114,9 +114,9 @@ def UStatistic(targets, forecasts):
naive = [] naive = []
y = [] y = []
for k in np.arange(0, l - 1): for k in np.arange(0, l - 1):
y.append((forecasts[k] - targets[k]) ** 2) y.append(np.subtract(forecasts[k], targets[k]) ** 2)
naive.append((targets[k + 1] - targets[k]) ** 2) naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
return np.sqrt(sum(y) / sum(naive)) return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
def TheilsInequality(targets, forecasts): def TheilsInequality(targets, forecasts):
@ -188,7 +188,7 @@ def coverage(targets, forecasts):
preds.append(1) preds.append(1)
else: else:
preds.append(0) preds.append(0)
return np.mean(preds) return np.nanmean(preds)
def pinball(tau, target, forecast): def pinball(tau, target, forecast):
@ -201,9 +201,9 @@ def pinball(tau, target, forecast):
:return: float, distance of forecast to the tau-quantile of the target :return: float, distance of forecast to the tau-quantile of the target
""" """
if target >= forecast: if target >= forecast:
return (target - forecast) * tau return np.subtract(target, forecast) * tau
else: else:
return (forecast - target) * (1 - tau) return np.subtract(forecast, target) * (1 - tau)
def pinball_mean(tau, targets, forecasts): def pinball_mean(tau, targets, forecasts):

View File

@ -526,7 +526,7 @@ class FTS(object):
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)): for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
else: else:
for r in self.model.flrgs: for r in self.flrgs:
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
return tmp return tmp

View File

@ -9,6 +9,7 @@ import numpy as np
from pyFTS.common import FuzzySet, FLR, fts, flrg from pyFTS.common import FuzzySet, FLR, fts, flrg
from itertools import product from itertools import product
class HighOrderFLRG(flrg.FLRG): class HighOrderFLRG(flrg.FLRG):
"""Conventional High Order Fuzzy Logical Relationship Group""" """Conventional High Order Fuzzy Logical Relationship Group"""
def __init__(self, order, **kwargs): def __init__(self, order, **kwargs):
@ -184,6 +185,8 @@ class HighOrderFTS(fts.FTS):
explain = kwargs.get('explain', False) explain = kwargs.get('explain', False)
fuzzyfied = kwargs.get('fuzzyfied', False)
ret = [] ret = []
l = len(ndata) if not explain else self.max_lag + 1 l = len(ndata) if not explain else self.max_lag + 1
@ -191,26 +194,31 @@ class HighOrderFTS(fts.FTS):
if l < self.max_lag: if l < self.max_lag:
return ndata return ndata
for k in np.arange(self.max_lag, l+1): for k in np.arange(self.max_lag, l):
sample = ndata[k - self.max_lag: k]
if explain: if explain:
print("Fuzzyfication \n") print("Fuzzyfication \n")
if not kwargs.get('fuzzyfied', False): if not fuzzyfied:
flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k], explain) flrgs = self.generate_lhs_flrg(sample, explain)
else: else:
flrgs = self.generate_lhs_flrg_fuzzyfied(ndata[k - self.max_lag: k], explain) flrgs = self.generate_lhs_flrg_fuzzyfied(sample, explain)
if explain: if explain:
print("Rules:\n") print("Rules:\n")
tmp = [] midpoints = []
memberships = []
for flrg in flrgs: for flrg in flrgs:
if flrg.get_key() not in self.flrgs: if flrg.get_key() not in self.flrgs:
if len(flrg.LHS) > 0: if len(flrg.LHS) > 0:
mp = self.partitioner.sets[flrg.LHS[-1]].centroid mp = self.partitioner.sets[flrg.LHS[-1]].centroid
tmp.append(mp) mv = self.partitioner.sets[flrg.LHS[-1]].membership(sample[-1]) if not fuzzyfied else None
midpoints.append(mp)
memberships.append(mv)
if explain: if explain:
print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1], print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1],
@ -218,12 +226,15 @@ class HighOrderFTS(fts.FTS):
else: else:
flrg = self.flrgs[flrg.get_key()] flrg = self.flrgs[flrg.get_key()]
mp = flrg.get_midpoint(self.partitioner.sets) mp = flrg.get_midpoint(self.partitioner.sets)
tmp.append(mp) mv = flrg.get_membership(sample, self.partitioner.sets) if not fuzzyfied else None
midpoints.append(mp)
memberships.append(mv)
if explain: if explain:
print("\t {} \t Midpoint: {}\n".format(str(flrg), mp)) print("\t {} \t Midpoint: {}\n".format(str(flrg), mp))
print("\t {} \t Membership: {}\n".format(str(flrg), mv))
final = np.nanmean(tmp) final = np.dot(midpoints, memberships) if not fuzzyfied else np.nanmean(midpoints)
ret.append(final) ret.append(final)
if explain: if explain:

View File

@ -27,7 +27,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
self.order = kwargs.get("order", 2) self.order = kwargs.get("order", 2)
self.lags = kwargs.get("lags", None) self.lags = kwargs.get("lags", None)
self.alpha_cut = kwargs.get('alpha_cut', 0.25) self.alpha_cut = kwargs.get('alpha_cut', 0.0)
self.shortname = "ClusteredMVFTS" self.shortname = "ClusteredMVFTS"
self.name = "Clustered Multivariate FTS" self.name = "Clustered Multivariate FTS"
@ -38,7 +38,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
ndata = [] ndata = []
for index, row in data.iterrows(): for index, row in data.iterrows():
data_point = self.format_data(row) data_point = self.format_data(row)
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut)) ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner,
alpha_cut=self.alpha_cut))
return ndata return ndata

View File

@ -14,14 +14,7 @@ import os
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3' os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3' os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
#''' #'''
data = SONDA.get_dataframe()
data = data[['datahora','glo_avg']]
data = data[~(np.isnan(data['glo_avg']) | np.equal(data['glo_avg'], 0.0))]
train = data.iloc[:1500000]
test = data.iloc[1500000:]
from pyFTS.models.multivariate import common, variable, wmvfts from pyFTS.models.multivariate import common, variable, wmvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
@ -96,12 +89,14 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',') dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S') dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
dataset['value'] = dataset['Daily-Max API']
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:] train_mv = dataset.iloc[:732]
test_mv = dataset.iloc[732:]
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']} sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
@ -109,26 +104,36 @@ vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.Ti
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp)
print(vday.partitioner) sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
vvalue = variable.Variable("Pollution", data_label="value", alias='value', vvalue = variable.Variable("Pollution", data_label="value", alias='value',
partitioner=Entropy.EntropyPartitioner, npart=35, data_type=np.float64, partitioner=Grid.GridPartitioner, npart=35,
data=train_mv) data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue) fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue)
print(len(fs.sets))
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue) #model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue, model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue,
partitioner=fs) partitioner=fs, knn=5, order=2)
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077') model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
#''' #'''
#print(model) #print(model)
print(len(fs.sets))
from pyFTS.benchmarks import Measures
print(Measures.get_point_statistics(test_mv, model))
#print(model)
''' '''
def fun(x): def fun(x):
return (x, x % 2) return (x, x % 2)