Bugfixes and improvementos con cmvfts and benchmarks.measures
This commit is contained in:
parent
e36ce6692e
commit
1fce1145cc
@ -19,7 +19,7 @@ def acf(data, k):
|
||||
:param k:
|
||||
:return:
|
||||
"""
|
||||
mu = np.mean(data)
|
||||
mu = np.nanmean(data)
|
||||
sigma = np.var(data)
|
||||
n = len(data)
|
||||
s = 0
|
||||
@ -68,7 +68,7 @@ def mape(targets, forecasts):
|
||||
targets = np.array(targets)
|
||||
if isinstance(forecasts, list):
|
||||
forecasts = np.array(forecasts)
|
||||
return np.mean(np.abs(np.divide((targets - forecasts), targets))) * 100
|
||||
return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
|
||||
|
||||
|
||||
def smape(targets, forecasts, type=2):
|
||||
@ -85,11 +85,11 @@ def smape(targets, forecasts, type=2):
|
||||
if isinstance(forecasts, list):
|
||||
forecasts = np.array(forecasts)
|
||||
if type == 1:
|
||||
return np.mean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
|
||||
return np.nanmean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
|
||||
elif type == 2:
|
||||
return np.mean(np.abs(forecasts - targets) / (abs(forecasts) + abs(targets))) * 100
|
||||
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
|
||||
else:
|
||||
return sum(np.abs(forecasts - targets)) / sum(forecasts + targets)
|
||||
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
|
||||
|
||||
|
||||
def mape_interval(targets, forecasts):
|
||||
@ -114,9 +114,9 @@ def UStatistic(targets, forecasts):
|
||||
naive = []
|
||||
y = []
|
||||
for k in np.arange(0, l - 1):
|
||||
y.append((forecasts[k] - targets[k]) ** 2)
|
||||
naive.append((targets[k + 1] - targets[k]) ** 2)
|
||||
return np.sqrt(sum(y) / sum(naive))
|
||||
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
|
||||
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
|
||||
return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
|
||||
|
||||
|
||||
def TheilsInequality(targets, forecasts):
|
||||
@ -188,7 +188,7 @@ def coverage(targets, forecasts):
|
||||
preds.append(1)
|
||||
else:
|
||||
preds.append(0)
|
||||
return np.mean(preds)
|
||||
return np.nanmean(preds)
|
||||
|
||||
|
||||
def pinball(tau, target, forecast):
|
||||
@ -201,9 +201,9 @@ def pinball(tau, target, forecast):
|
||||
:return: float, distance of forecast to the tau-quantile of the target
|
||||
"""
|
||||
if target >= forecast:
|
||||
return (target - forecast) * tau
|
||||
return np.subtract(target, forecast) * tau
|
||||
else:
|
||||
return (forecast - target) * (1 - tau)
|
||||
return np.subtract(forecast, target) * (1 - tau)
|
||||
|
||||
|
||||
def pinball_mean(tau, targets, forecasts):
|
||||
|
@ -526,7 +526,7 @@ class FTS(object):
|
||||
for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
|
||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||
else:
|
||||
for r in self.model.flrgs:
|
||||
for r in self.flrgs:
|
||||
tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
|
||||
return tmp
|
||||
|
||||
|
@ -9,6 +9,7 @@ import numpy as np
|
||||
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||
from itertools import product
|
||||
|
||||
|
||||
class HighOrderFLRG(flrg.FLRG):
|
||||
"""Conventional High Order Fuzzy Logical Relationship Group"""
|
||||
def __init__(self, order, **kwargs):
|
||||
@ -184,6 +185,8 @@ class HighOrderFTS(fts.FTS):
|
||||
|
||||
explain = kwargs.get('explain', False)
|
||||
|
||||
fuzzyfied = kwargs.get('fuzzyfied', False)
|
||||
|
||||
ret = []
|
||||
|
||||
l = len(ndata) if not explain else self.max_lag + 1
|
||||
@ -191,26 +194,31 @@ class HighOrderFTS(fts.FTS):
|
||||
if l < self.max_lag:
|
||||
return ndata
|
||||
|
||||
for k in np.arange(self.max_lag, l+1):
|
||||
for k in np.arange(self.max_lag, l):
|
||||
|
||||
sample = ndata[k - self.max_lag: k]
|
||||
|
||||
if explain:
|
||||
print("Fuzzyfication \n")
|
||||
|
||||
if not kwargs.get('fuzzyfied', False):
|
||||
flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k], explain)
|
||||
if not fuzzyfied:
|
||||
flrgs = self.generate_lhs_flrg(sample, explain)
|
||||
else:
|
||||
flrgs = self.generate_lhs_flrg_fuzzyfied(ndata[k - self.max_lag: k], explain)
|
||||
flrgs = self.generate_lhs_flrg_fuzzyfied(sample, explain)
|
||||
|
||||
if explain:
|
||||
print("Rules:\n")
|
||||
|
||||
tmp = []
|
||||
midpoints = []
|
||||
memberships = []
|
||||
for flrg in flrgs:
|
||||
|
||||
if flrg.get_key() not in self.flrgs:
|
||||
if len(flrg.LHS) > 0:
|
||||
mp = self.partitioner.sets[flrg.LHS[-1]].centroid
|
||||
tmp.append(mp)
|
||||
mv = self.partitioner.sets[flrg.LHS[-1]].membership(sample[-1]) if not fuzzyfied else None
|
||||
midpoints.append(mp)
|
||||
memberships.append(mv)
|
||||
|
||||
if explain:
|
||||
print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1],
|
||||
@ -218,12 +226,15 @@ class HighOrderFTS(fts.FTS):
|
||||
else:
|
||||
flrg = self.flrgs[flrg.get_key()]
|
||||
mp = flrg.get_midpoint(self.partitioner.sets)
|
||||
tmp.append(mp)
|
||||
mv = flrg.get_membership(sample, self.partitioner.sets) if not fuzzyfied else None
|
||||
midpoints.append(mp)
|
||||
memberships.append(mv)
|
||||
|
||||
if explain:
|
||||
print("\t {} \t Midpoint: {}\n".format(str(flrg), mp))
|
||||
print("\t {} \t Membership: {}\n".format(str(flrg), mv))
|
||||
|
||||
final = np.nanmean(tmp)
|
||||
final = np.dot(midpoints, memberships) if not fuzzyfied else np.nanmean(midpoints)
|
||||
ret.append(final)
|
||||
|
||||
if explain:
|
||||
|
@ -27,7 +27,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
self.order = kwargs.get("order", 2)
|
||||
self.lags = kwargs.get("lags", None)
|
||||
self.alpha_cut = kwargs.get('alpha_cut', 0.25)
|
||||
self.alpha_cut = kwargs.get('alpha_cut', 0.0)
|
||||
|
||||
self.shortname = "ClusteredMVFTS"
|
||||
self.name = "Clustered Multivariate FTS"
|
||||
@ -38,7 +38,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
ndata = []
|
||||
for index, row in data.iterrows():
|
||||
data_point = self.format_data(row)
|
||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut))
|
||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner,
|
||||
alpha_cut=self.alpha_cut))
|
||||
|
||||
return ndata
|
||||
|
||||
|
@ -14,14 +14,7 @@ import os
|
||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||
#'''
|
||||
data = SONDA.get_dataframe()
|
||||
|
||||
data = data[['datahora','glo_avg']]
|
||||
|
||||
data = data[~(np.isnan(data['glo_avg']) | np.equal(data['glo_avg'], 0.0))]
|
||||
|
||||
train = data.iloc[:1500000]
|
||||
test = data.iloc[1500000:]
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, wmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
@ -96,12 +89,14 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
|
||||
|
||||
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
||||
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
|
||||
dataset['value'] = dataset['Daily-Max API']
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
train_mv = dataset.iloc[:732]
|
||||
test_mv = dataset.iloc[732:]
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
||||
|
||||
@ -109,26 +104,36 @@ vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.Ti
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
|
||||
print(vday.partitioner)
|
||||
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
||||
partitioner=Entropy.EntropyPartitioner, npart=35, data_type=np.float64,
|
||||
partitioner=Grid.GridPartitioner, npart=35,
|
||||
data=train_mv)
|
||||
|
||||
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||
fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue)
|
||||
|
||||
print(len(fs.sets))
|
||||
|
||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue,
|
||||
partitioner=fs)
|
||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue,
|
||||
partitioner=fs, knn=5, order=2)
|
||||
|
||||
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
|
||||
#'''
|
||||
#print(model)
|
||||
|
||||
print(len(fs.sets))
|
||||
|
||||
|
||||
from pyFTS.benchmarks import Measures
|
||||
print(Measures.get_point_statistics(test_mv, model))
|
||||
|
||||
#print(model)
|
||||
|
||||
'''
|
||||
def fun(x):
|
||||
return (x, x % 2)
|
||||
|
Loading…
Reference in New Issue
Block a user