Bugfixes and improvementos con cmvfts and benchmarks.measures
This commit is contained in:
parent
e36ce6692e
commit
1fce1145cc
@ -19,7 +19,7 @@ def acf(data, k):
|
|||||||
:param k:
|
:param k:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
mu = np.mean(data)
|
mu = np.nanmean(data)
|
||||||
sigma = np.var(data)
|
sigma = np.var(data)
|
||||||
n = len(data)
|
n = len(data)
|
||||||
s = 0
|
s = 0
|
||||||
@ -68,7 +68,7 @@ def mape(targets, forecasts):
|
|||||||
targets = np.array(targets)
|
targets = np.array(targets)
|
||||||
if isinstance(forecasts, list):
|
if isinstance(forecasts, list):
|
||||||
forecasts = np.array(forecasts)
|
forecasts = np.array(forecasts)
|
||||||
return np.mean(np.abs(np.divide((targets - forecasts), targets))) * 100
|
return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
|
||||||
|
|
||||||
|
|
||||||
def smape(targets, forecasts, type=2):
|
def smape(targets, forecasts, type=2):
|
||||||
@ -85,11 +85,11 @@ def smape(targets, forecasts, type=2):
|
|||||||
if isinstance(forecasts, list):
|
if isinstance(forecasts, list):
|
||||||
forecasts = np.array(forecasts)
|
forecasts = np.array(forecasts)
|
||||||
if type == 1:
|
if type == 1:
|
||||||
return np.mean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
|
return np.nanmean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
|
||||||
elif type == 2:
|
elif type == 2:
|
||||||
return np.mean(np.abs(forecasts - targets) / (abs(forecasts) + abs(targets))) * 100
|
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
|
||||||
else:
|
else:
|
||||||
return sum(np.abs(forecasts - targets)) / sum(forecasts + targets)
|
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
|
||||||
|
|
||||||
|
|
||||||
def mape_interval(targets, forecasts):
|
def mape_interval(targets, forecasts):
|
||||||
@ -114,9 +114,9 @@ def UStatistic(targets, forecasts):
|
|||||||
naive = []
|
naive = []
|
||||||
y = []
|
y = []
|
||||||
for k in np.arange(0, l - 1):
|
for k in np.arange(0, l - 1):
|
||||||
y.append((forecasts[k] - targets[k]) ** 2)
|
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
|
||||||
naive.append((targets[k + 1] - targets[k]) ** 2)
|
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
|
||||||
return np.sqrt(sum(y) / sum(naive))
|
return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
|
||||||
|
|
||||||
|
|
||||||
def TheilsInequality(targets, forecasts):
|
def TheilsInequality(targets, forecasts):
|
||||||
@ -188,7 +188,7 @@ def coverage(targets, forecasts):
|
|||||||
preds.append(1)
|
preds.append(1)
|
||||||
else:
|
else:
|
||||||
preds.append(0)
|
preds.append(0)
|
||||||
return np.mean(preds)
|
return np.nanmean(preds)
|
||||||
|
|
||||||
|
|
||||||
def pinball(tau, target, forecast):
|
def pinball(tau, target, forecast):
|
||||||
@ -201,9 +201,9 @@ def pinball(tau, target, forecast):
|
|||||||
:return: float, distance of forecast to the tau-quantile of the target
|
:return: float, distance of forecast to the tau-quantile of the target
|
||||||
"""
|
"""
|
||||||
if target >= forecast:
|
if target >= forecast:
|
||||||
return (target - forecast) * tau
|
return np.subtract(target, forecast) * tau
|
||||||
else:
|
else:
|
||||||
return (forecast - target) * (1 - tau)
|
return np.subtract(forecast, target) * (1 - tau)
|
||||||
|
|
||||||
|
|
||||||
def pinball_mean(tau, targets, forecasts):
|
def pinball_mean(tau, targets, forecasts):
|
||||||
|
@ -526,7 +526,7 @@ class FTS(object):
|
|||||||
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
|
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
|
||||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||||
else:
|
else:
|
||||||
for r in self.model.flrgs:
|
for r in self.flrgs:
|
||||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||||
return tmp
|
return tmp
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import numpy as np
|
|||||||
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||||
from itertools import product
|
from itertools import product
|
||||||
|
|
||||||
|
|
||||||
class HighOrderFLRG(flrg.FLRG):
|
class HighOrderFLRG(flrg.FLRG):
|
||||||
"""Conventional High Order Fuzzy Logical Relationship Group"""
|
"""Conventional High Order Fuzzy Logical Relationship Group"""
|
||||||
def __init__(self, order, **kwargs):
|
def __init__(self, order, **kwargs):
|
||||||
@ -184,6 +185,8 @@ class HighOrderFTS(fts.FTS):
|
|||||||
|
|
||||||
explain = kwargs.get('explain', False)
|
explain = kwargs.get('explain', False)
|
||||||
|
|
||||||
|
fuzzyfied = kwargs.get('fuzzyfied', False)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
l = len(ndata) if not explain else self.max_lag + 1
|
l = len(ndata) if not explain else self.max_lag + 1
|
||||||
@ -191,26 +194,31 @@ class HighOrderFTS(fts.FTS):
|
|||||||
if l < self.max_lag:
|
if l < self.max_lag:
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
for k in np.arange(self.max_lag, l+1):
|
for k in np.arange(self.max_lag, l):
|
||||||
|
|
||||||
|
sample = ndata[k - self.max_lag: k]
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication \n")
|
print("Fuzzyfication \n")
|
||||||
|
|
||||||
if not kwargs.get('fuzzyfied', False):
|
if not fuzzyfied:
|
||||||
flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k], explain)
|
flrgs = self.generate_lhs_flrg(sample, explain)
|
||||||
else:
|
else:
|
||||||
flrgs = self.generate_lhs_flrg_fuzzyfied(ndata[k - self.max_lag: k], explain)
|
flrgs = self.generate_lhs_flrg_fuzzyfied(sample, explain)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Rules:\n")
|
print("Rules:\n")
|
||||||
|
|
||||||
tmp = []
|
midpoints = []
|
||||||
|
memberships = []
|
||||||
for flrg in flrgs:
|
for flrg in flrgs:
|
||||||
|
|
||||||
if flrg.get_key() not in self.flrgs:
|
if flrg.get_key() not in self.flrgs:
|
||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
mp = self.partitioner.sets[flrg.LHS[-1]].centroid
|
mp = self.partitioner.sets[flrg.LHS[-1]].centroid
|
||||||
tmp.append(mp)
|
mv = self.partitioner.sets[flrg.LHS[-1]].membership(sample[-1]) if not fuzzyfied else None
|
||||||
|
midpoints.append(mp)
|
||||||
|
memberships.append(mv)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1],
|
print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1],
|
||||||
@ -218,12 +226,15 @@ class HighOrderFTS(fts.FTS):
|
|||||||
else:
|
else:
|
||||||
flrg = self.flrgs[flrg.get_key()]
|
flrg = self.flrgs[flrg.get_key()]
|
||||||
mp = flrg.get_midpoint(self.partitioner.sets)
|
mp = flrg.get_midpoint(self.partitioner.sets)
|
||||||
tmp.append(mp)
|
mv = flrg.get_membership(sample, self.partitioner.sets) if not fuzzyfied else None
|
||||||
|
midpoints.append(mp)
|
||||||
|
memberships.append(mv)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("\t {} \t Midpoint: {}\n".format(str(flrg), mp))
|
print("\t {} \t Midpoint: {}\n".format(str(flrg), mp))
|
||||||
|
print("\t {} \t Membership: {}\n".format(str(flrg), mv))
|
||||||
|
|
||||||
final = np.nanmean(tmp)
|
final = np.dot(midpoints, memberships) if not fuzzyfied else np.nanmean(midpoints)
|
||||||
ret.append(final)
|
ret.append(final)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
|
@ -27,7 +27,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
self.order = kwargs.get("order", 2)
|
self.order = kwargs.get("order", 2)
|
||||||
self.lags = kwargs.get("lags", None)
|
self.lags = kwargs.get("lags", None)
|
||||||
self.alpha_cut = kwargs.get('alpha_cut', 0.25)
|
self.alpha_cut = kwargs.get('alpha_cut', 0.0)
|
||||||
|
|
||||||
self.shortname = "ClusteredMVFTS"
|
self.shortname = "ClusteredMVFTS"
|
||||||
self.name = "Clustered Multivariate FTS"
|
self.name = "Clustered Multivariate FTS"
|
||||||
@ -38,7 +38,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
ndata = []
|
ndata = []
|
||||||
for index, row in data.iterrows():
|
for index, row in data.iterrows():
|
||||||
data_point = self.format_data(row)
|
data_point = self.format_data(row)
|
||||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut))
|
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner,
|
||||||
|
alpha_cut=self.alpha_cut))
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
|
@ -14,14 +14,7 @@ import os
|
|||||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||||
#'''
|
#'''
|
||||||
data = SONDA.get_dataframe()
|
|
||||||
|
|
||||||
data = data[['datahora','glo_avg']]
|
|
||||||
|
|
||||||
data = data[~(np.isnan(data['glo_avg']) | np.equal(data['glo_avg'], 0.0))]
|
|
||||||
|
|
||||||
train = data.iloc[:1500000]
|
|
||||||
test = data.iloc[1500000:]
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, wmvfts
|
from pyFTS.models.multivariate import common, variable, wmvfts
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
@ -96,12 +89,14 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g
|
|||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
|
||||||
|
|
||||||
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
|
||||||
|
dataset['value'] = dataset['Daily-Max API']
|
||||||
|
|
||||||
train_mv = dataset.iloc[:24505]
|
|
||||||
test_mv = dataset.iloc[24505:]
|
train_mv = dataset.iloc[:732]
|
||||||
|
test_mv = dataset.iloc[732:]
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
||||||
|
|
||||||
@ -109,26 +104,36 @@ vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.Ti
|
|||||||
data=train_mv, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
|
||||||
print(vday.partitioner)
|
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||||
|
|
||||||
|
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
|
||||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
||||||
partitioner=Entropy.EntropyPartitioner, npart=35, data_type=np.float64,
|
partitioner=Grid.GridPartitioner, npart=35,
|
||||||
data=train_mv)
|
data=train_mv)
|
||||||
|
|
||||||
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue)
|
||||||
|
|
||||||
|
print(len(fs.sets))
|
||||||
|
|
||||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue,
|
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue,
|
||||||
partitioner=fs)
|
partitioner=fs, knn=5, order=2)
|
||||||
|
|
||||||
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
|
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
|
||||||
#'''
|
#'''
|
||||||
#print(model)
|
#print(model)
|
||||||
|
|
||||||
|
print(len(fs.sets))
|
||||||
|
|
||||||
|
|
||||||
|
from pyFTS.benchmarks import Measures
|
||||||
|
print(Measures.get_point_statistics(test_mv, model))
|
||||||
|
|
||||||
|
#print(model)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
def fun(x):
|
def fun(x):
|
||||||
return (x, x % 2)
|
return (x, x % 2)
|
||||||
|
Loading…
Reference in New Issue
Block a user