Improvements for numerical stability

This commit is contained in:
Petrônio Cândido 2019-06-22 09:20:53 -03:00
parent de82af1b9d
commit 35898f338a
12 changed files with 432 additions and 148 deletions

View File

@ -89,7 +89,7 @@ def smape(targets, forecasts, type=2):
elif type == 2: elif type == 2:
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100 return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
else: else:
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets) return np.nansum(np.abs(forecasts - targets)) / np.nansum(forecasts + targets)
def mape_interval(targets, forecasts): def mape_interval(targets, forecasts):
@ -124,7 +124,7 @@ def UStatistic(targets, forecasts):
for k in np.arange(0, l - 1): for k in np.arange(0, l - 1):
y.append(np.subtract(forecasts[k], targets[k]) ** 2) y.append(np.subtract(forecasts[k], targets[k]) ** 2)
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2) naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
return np.sqrt(np.divide(np.sum(y), np.sum(naive))) return np.sqrt(np.divide(np.nansum(y), np.nansum(naive)))
def TheilsInequality(targets, forecasts): def TheilsInequality(targets, forecasts):
@ -137,9 +137,9 @@ def TheilsInequality(targets, forecasts):
""" """
res = targets - forecasts res = targets - forecasts
t = len(res) t = len(res)
us = np.sqrt(sum([u ** 2 for u in res])) us = np.sqrt(np.nansum([u ** 2 for u in res]))
ys = np.sqrt(sum([y ** 2 for y in targets])) ys = np.sqrt(np.nansum([y ** 2 for y in targets]))
fs = np.sqrt(sum([f ** 2 for f in forecasts])) fs = np.sqrt(np.nansum([f ** 2 for f in forecasts]))
return us / (ys + fs) return us / (ys + fs)
@ -255,12 +255,12 @@ def brier_score(targets, densities):
try: try:
v = d.bin_index.find_le(targets[ct]) v = d.bin_index.find_le(targets[ct])
score = sum([d.density(k) ** 2 for k in d.bins if k != v]) score = np.nansum([d.density(k) ** 2 for k in d.bins if k != v])
score += (d.density(v) - 1) ** 2 score += (d.density(v) - 1) ** 2
ret.append(score) ret.append(score)
except ValueError as ex: except ValueError as ex:
ret.append(sum([d.density(k) ** 2 for k in d.bins])) ret.append(np.nansum([d.density(k) ** 2 for k in d.bins]))
return sum(ret) / len(ret) return np.nansum(ret) / len(ret)
def logarithm_score(targets, densities): def logarithm_score(targets, densities):
@ -301,7 +301,7 @@ def crps(targets, densities):
n = len(densities) n = len(densities)
for ct, df in enumerate(densities): for ct, df in enumerate(densities):
_crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins]) _crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
return _crps / n return _crps / n
@ -419,6 +419,8 @@ def get_interval_statistics(data, model, **kwargs):
forecasts = model.predict(data, **kwargs) forecasts = model.predict(data, **kwargs)
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
@ -436,6 +438,8 @@ def get_interval_statistics(data, model, **kwargs):
start = model.max_lag + steps_ahead - 1 start = model.max_lag + steps_ahead - 1
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(coverage(data[model.max_lag:], forecasts), 2)) ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
@ -502,6 +506,8 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time() _s1 = time.time()
forecasts = model.predict(data, **kwargs) forecasts = model.predict(data, **kwargs)
_e1 = time.time() _e1 = time.time()
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3)) ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3)) ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
@ -516,6 +522,8 @@ def get_distribution_statistics(data, model, **kwargs):
_e1 = time.time() _e1 = time.time()
start = model.max_lag + steps_ahead start = model.max_lag + steps_ahead
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(crps(data[start:-1:skip], forecasts), 3)) ret.append(round(crps(data[start:-1:skip], forecasts), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3)) ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))

View File

@ -81,6 +81,105 @@ def get_benchmark_probabilistic_methods():
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors] return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def multivariate_sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
from pyFTS.models.multivariate import common, variable, mvfts
tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs)
distributed = __pop('distributed', False, kwargs)
variables = __pop('variables', {}, kwargs)
target_variable = __pop('target_variable', '', kwargs)
type = kwargs.get("type", 'point')
steps_ahead = __pop('steps_ahead', [1], kwargs)
steps_ahead = [k for k in steps_ahead]
fts_methods = __pop('methods', [], kwargs)
if fts_methods is not None:
methods_parameters = __pop('methods_parameters', None, kwargs)
if type == 'point':
experiment_method = mv_run_point2
synthesis_method = process_point_jobs2
elif type == 'interval':
experiment_method = mv_run_interval2
synthesis_method = process_interval_jobs2
elif type == 'distribution':
experiment_method = mv_run_probabilistic2
synthesis_method = process_probabilistic_jobs2
else:
raise ValueError("Type parameter has a unkown value!")
if distributed:
import pyFTS.distributed.dispy as dispy
nodes = kwargs.get("nodes", ['127.0.0.1'])
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
inc = __pop("inc", 0.1, kwargs)
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
jobs = []
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
for id, fts_method in enumerate(fts_methods):
kwargs['steps_ahead'] = max(steps_ahead)
parameters = {}
if methods_parameters is not None:
parameters = methods_parameters[id]
vars = []
tvar = None
for key, value in variables.items():
var = variable.Variable(key, data=train, **value)
vars.append(var)
if key == target_variable:
tvar = var
model = fts_method(explanatory_variables=vars, target_variable=tvar,
**parameters)
if not distributed:
try:
job = experiment_method(model, train, test, ct, **kwargs)
synthesis_method(dataset, tag, job, conn)
except Exception as ex:
print('EXCEPTION! ', fts_method)
traceback.print_exc()
else:
job = cluster.submit(model, train, test, ct, **kwargs)
job.id = id
jobs.append(job)
if distributed:
for job in jobs:
job()
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
tmp = job.result
synthesis_method(dataset, tag, tmp, conn)
else:
print("status", job.status)
print("result", job.result)
print("stdout", job.stdout)
print("stderr", job.exception)
cluster.wait() # wait for all jobs to finish
dispy.stop_dispy_cluster(cluster, http_server)
conn.close()
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs): def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
tag = __pop('tag', None, kwargs) tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs) dataset = __pop('dataset', None, kwargs)
@ -630,6 +729,63 @@ def __build_model(fts_method, order, parameters, partitioner_method, partitions,
return mfts, pttr return mfts, pttr
def mv_run_point2(mfts, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
'steps': steps_ahead, 'method': method}
else:
_start = time.time()
forecasts = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order + steps_ahead], forecasts)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time import time
@ -698,6 +854,67 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
return ret return ret
def mv_run_interval2(mfts,train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS ]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters',{})
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_start = time.time()
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
'time': times,'pinball05': metrics[3], 'pinball25': metrics[4], 'pinball75': metrics[5], 'pinball95': metrics[6],
'winkler05': metrics[7], 'winkler25': metrics[8],
'window': window_key,'steps': steps_ahead, 'method': method}
else:
_start = time.time()
intervals = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_interval_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], intervals)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time import time
from pyFTS.models import hofts,ifts,pwfts from pyFTS.models import hofts,ifts,pwfts
@ -760,6 +977,63 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
return ret return ret
def mv_run_probabilistic2(mfts, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
times += _t1
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'crps': _crps1, 'brier': _brier, 'window': window_key,
'steps': steps_ahead, 'method': method}
else:
_start = time.time()
distributions = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_distribution_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], distributions)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time import time
import numpy as np import numpy as np

View File

@ -202,6 +202,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap) scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)
for ct in np.arange(1, int(lq / 2) + 1): for ct in np.arange(1, int(lq / 2) + 1):
try:
y = [[data[start_at], data[start_at]]] y = [[data[start_at], data[start_at]]]
for pd in probabilitydist: for pd in probabilitydist:
qts = pd.quantile([qt[ct - 1], qt[-ct]]) qts = pd.quantile([qt[ct - 1], qt[-ct]])
@ -209,6 +210,8 @@ def plot_distribution2(probabilitydist, data, **kwargs):
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y], ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
facecolor=scalarMap.to_rgba(ct / lq)) facecolor=scalarMap.to_rgba(ct / lq))
except:
pass
if kwargs.get('median',True): if kwargs.get('median',True):
y = [data[start_at]] y = [data[start_at]]

View File

@ -110,7 +110,7 @@ class EnsembleFTS(fts.FTS):
ret = np.nanpercentile(forecasts, 50) ret = np.nanpercentile(forecasts, 50)
elif self.point_method == 'quantile': elif self.point_method == 'quantile':
alpha = kwargs.get("alpha",0.05) alpha = kwargs.get("alpha",0.05)
ret = np.percentile(forecasts, alpha*100) ret = np.nanpercentile(forecasts, alpha*100)
elif self.point_method == 'exponential': elif self.point_method == 'exponential':
l = len(self.models) l = len(self.models)
if l == 1: if l == 1:

View File

@ -80,9 +80,9 @@ class IntervalFTS(hofts.HighOrderFTS):
affected_flrgs_memberships.append(mv) affected_flrgs_memberships.append(mv)
# gerar o intervalo # gerar o intervalo
norm = sum(affected_flrgs_memberships) norm = np.nansum(affected_flrgs_memberships)
lo_ = sum(lo) / norm lo_ = np.nansum(lo) / norm
up_ = sum(up) / norm up_ = np.nansum(up) / norm
ret.append([lo_, up_]) ret.append([lo_, up_])
return ret return ret
@ -165,9 +165,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
affected_flrgs_memberships.append(mv) affected_flrgs_memberships.append(mv)
# gerar o intervalo # gerar o intervalo
norm = sum(affected_flrgs_memberships) norm = np.nansum(affected_flrgs_memberships)
lo_ = sum(lo) / norm lo_ = np.nansum(lo) / norm
up_ = sum(up) / norm up_ = np.nansum(up) / norm
ret.append([lo_, up_]) ret.append([lo_, up_])
return ret return ret

View File

@ -43,7 +43,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
for fset, mv in val: for fset, mv in val:
num.append(self.sets[fset].centroid * mv) num.append(self.sets[fset].centroid * mv)
den.append(mv) den.append(mv)
ret.append(np.sum(num) / np.sum(den)) ret.append(np.nansum(num) / np.nansum(den))
elif mode == 'both': elif mode == 'both':
num = np.mean([self.sets[fset].centroid for fset in val]) num = np.mean([self.sets[fset].centroid for fset in val])
ret.append(num) ret.append(num)
@ -53,7 +53,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
for fset, mv in enumerate(val): for fset, mv in enumerate(val):
num.append(self.sets[self.ordered_sets[fset]].centroid * mv) num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
den.append(mv) den.append(mv)
ret.append(np.sum(num) / np.sum(den)) ret.append(np.nansum(num) / np.nansum(den))
else: else:
raise Exception('Unknown deffuzyfication mode') raise Exception('Unknown deffuzyfication mode')

View File

@ -152,7 +152,7 @@ class MVFTS(fts.FTS):
mv = np.array(mvs) mv = np.array(mvs)
mp = np.array(mps) mp = np.array(mps)
ret.append(np.dot(mv,mp.T)/np.sum(mv)) ret.append(np.dot(mv,mp.T)/np.nansum(mv))
ret = self.target_variable.apply_inverse_transformations(ret, ret = self.target_variable.apply_inverse_transformations(ret,
params=data[self.target_variable.data_label].values) params=data[self.target_variable.data_label].values)

View File

@ -73,21 +73,21 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def get_midpoint(self, sets): def get_midpoint(self, sets):
'''Return the expectation of the PWFLRG, the weighted sum''' '''Return the expectation of the PWFLRG, the weighted sum'''
if self.midpoint is None: if self.midpoint is None:
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
for s in self.RHS.keys()])) for s in self.RHS.keys()]))
return self.midpoint return self.midpoint
def get_upper(self, sets): def get_upper(self, sets):
if self.upper is None: if self.upper is None:
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
for s in self.RHS.keys()])) for s in self.RHS.keys()]))
return self.upper return self.upper
def get_lower(self, sets): def get_lower(self, sets):
if self.lower is None: if self.lower is None:
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
for s in self.RHS.keys()])) for s in self.RHS.keys()]))
return self.lower return self.lower
@ -159,7 +159,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv) self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv) mvs.append(mv)
tmp_fq = sum([lhs_mv * kk for kk in mvs if kk > 0]) tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq self.global_frequency_count += tmp_fq
@ -226,7 +226,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv) self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv) mvs.append(mv)
tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0]) tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq self.global_frequency_count += tmp_fq
@ -268,7 +268,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else: else:
if len(flrg.LHS) > 0: if len(flrg.LHS) > 0:
pi = 1 / len(flrg.LHS) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS])) ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
else: else:
ret = np.nan ret = np.nan
return ret return ret
@ -282,10 +282,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
_set = self.partitioner.sets[s] _set = self.partitioner.sets[s]
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD())) tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
cond.append(tmp) cond.append(tmp)
ret = sum(np.array(cond)) ret = np.nansum(np.array(cond))
else: else:
pi = 1 / len(flrg.LHS) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS])) ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
return ret return ret
def get_upper(self, flrg): def get_upper(self, flrg):
@ -307,12 +307,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
method = kwargs.get('method','heuristic') method = kwargs.get('method','heuristic')
l = len(data) l = len(data)+1
ret = [] ret = []
for k in np.arange(self.max_lag - 1, l): for k in np.arange(self.max_lag, l):
sample = data[k - (self.max_lag - 1): k + 1] sample = data[k - self.max_lag: k]
if method == 'heuristic': if method == 'heuristic':
ret.append(self.point_heuristic(sample, **kwargs)) ret.append(self.point_heuristic(sample, **kwargs))
@ -360,9 +360,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
mp.append(norm * self.get_midpoint(flrg)) mp.append(norm * self.get_midpoint(flrg))
norms.append(norm) norms.append(norm)
norm = sum(norms) norm = np.nansum(norms)
final = sum(mp) / norm if norm != 0 else 0 final = np.nansum(mp) / norm if norm != 0 else 0
if explain: if explain:
print("Deffuzyfied value: {} \n".format(final)) print("Deffuzyfied value: {} \n".format(final))
@ -431,12 +431,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
norms.append(norm) norms.append(norm)
# gerar o intervalo # gerar o intervalo
norm = sum(norms) norm = np.nansum(norms)
if norm == 0: if norm == 0:
return [0, 0] return [0, 0]
else: else:
lo_ = sum(lo) / norm lo_ = np.nansum(lo) / norm
up_ = sum(up) / norm up_ = np.nansum(up) / norm
return [lo_, up_] return [lo_, up_]
def forecast_distribution(self, ndata, **kwargs): def forecast_distribution(self, ndata, **kwargs):
@ -496,7 +496,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else: else:
num.append(0.0) num.append(0.0)
den.append(0.000000001) den.append(0.000000001)
pf = sum(num) / sum(den) pf = np.nansum(num) / np.nansum(den)
dist.set(bin, pf) dist.set(bin, pf)

View File

@ -202,9 +202,9 @@ class Partitioner(object):
raise Exception('Unknown deffuzyfication mode') raise Exception('Unknown deffuzyfication mode')
if mode in ('both','vector'): if mode in ('both','vector'):
return np.sum(num) / np.sum(den) return np.nansum(num) / np.nansum(den)
else: else:
return np.mean(num) return np.nanmean(num)
def check_bounds(self, data): def check_bounds(self, data):
""" """

View File

@ -209,11 +209,17 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
try:
k = self.bin_index.find_ge(val) k = self.bin_index.find_ge(val)
ret.append(self.cdf[k]) ret.append(self.cdf[k])
except:
ret.append(np.nan)
else: else:
try:
k = self.bin_index.find_ge(values) k = self.bin_index.find_ge(values)
return self.cdf[values] return self.cdf[values]
except:
return np.nan
def quantile(self, values): def quantile(self, values):
""" """
@ -229,11 +235,17 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
try:
k = self.quantile_index.find_ge(val) k = self.quantile_index.find_ge(val)
ret.append(self.qtl[str(k)][0]) ret.append(self.qtl[str(k)][0])
except:
ret.append(np.nan)
else: else:
try:
k = self.quantile_index.find_ge(values) k = self.quantile_index.find_ge(values)
ret = self.qtl[str(k)] ret = self.qtl[str(k)]
except:
return np.nan
return ret return ret
@ -243,7 +255,7 @@ class ProbabilityDistribution(object):
:return:the entropy of the probability distribution :return:the entropy of the probability distribution
""" """
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 h = -np.nansum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins]) for k in self.bins])
return h return h
@ -255,7 +267,7 @@ class ProbabilityDistribution(object):
:param q: a probabilistic.ProbabilityDistribution object :param q: a probabilistic.ProbabilityDistribution object
:return: Cross entropy between this probability distribution and the given distribution :return: Cross entropy between this probability distribution and the given distribution
""" """
h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0 h = -np.nansum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins]) for k in self.bins])
return h return h
@ -267,7 +279,7 @@ class ProbabilityDistribution(object):
:param q: a probabilistic.ProbabilityDistribution object :param q: a probabilistic.ProbabilityDistribution object
:return: Kullback-Leibler divergence :return: Kullback-Leibler divergence
""" """
h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0 h = np.nansum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins]) for k in self.bins])
return h return h

View File

@ -7,13 +7,12 @@ from pyFTS.partitioners import Grid, Simple, Entropy
from pyFTS.common import Util from pyFTS.common import Util
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.common import Util as cUtil from pyFTS.common import Util as cUtil
from pyFTS.models import pwfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.common import Membership from pyFTS.common import Membership
@ -22,54 +21,45 @@ from pyFTS.common import Membership
import os import os
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') from pyFTS.data import SONDA, Malaysia
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_uv = dataset['glo_avg'].values[:24505]
test_uv = dataset['glo_avg'].values[24505:]
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from itertools import product
levels = ['VL', 'L', 'M', 'H', 'VH']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
'Jun','Jul', 'Aug','Sep','Oct',
'Nov','Dec']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
df = Malaysia.get_dataframe()
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]} sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, variables = {
data=train_mv, partitioner_specific=sp) "Hour": dict(data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
partitioner_specific=sp, alpha_cut=.3),
"Temperature": dict(data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
alpha_cut=.25),
"Load": dict(data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
alpha_cut=.25)
}
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names}, #methods = [granular.GranularWMVFTS]
data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
order=2, knn=1)
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model.fit(train_mv) parameters = [
generator = lambda x : x + pd.to_timedelta(1, unit='h') {},{},
forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} ) dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
order=1, knn=1)
]
print(forecasts) bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25,
methods=methods,
methods_parameters=parameters,
variables=variables,
target_variable='Load',
type='interval',
steps_ahead=[1],
distributed=False,
nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="experiments.db", dataset='Malaysia',
tag="experiments"
)
''' '''

View File

@ -14,71 +14,68 @@ from pyFTS.models import pwfts,hofts,ifts
from pyFTS.models.multivariate import granular, grid from pyFTS.models.multivariate import granular, grid
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
from pyFTS.common import Membership from pyFTS.common import Membership
from pyFTS.data import SONDA, Malaysia def sample_by_hour(data):
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
df = Malaysia.get_dataframe() def sample_date_by_hour(data):
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p') return [data[k] for k in np.arange(0,len(data),60)]
train_mv = df.iloc[:8000] from pyFTS.data import SONDA
test_mv = df.iloc[8000:10000]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]} sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
sonda = sonda.dropna()
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
var = {
'datahora': sample_date_by_hour(sonda['datahora'].values),
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
'ws_10m': sample_by_hour(sonda['ws_10m'].values),
}
df = pd.DataFrame(var)
train_mv = df.iloc[:9000]
test_mv = df.iloc[9000:10000]
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3])
sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp, alpha_cut=.3) data=train_mv, partitioner_specific=sp, alpha_cut=.3)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
order = 1 vmonth.partitioner.plot(ax[0])
knn = 1
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload, vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind',
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
data=train_mv, alpha_cut=.25)
vwin.partitioner.plot(ax[1])
plt.tight_layout()
order = 3
knn = 2
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin,
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both', fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
order=order, knn=knn) order=order, knn=knn)
model.fit(train_mv) model.fit(train_mv)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
ax.plot(test_mv['ws_10m'].values[:100], label='original')
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=1) forecasts = model.predict(test_mv.iloc[:100], type='distribution')
temp_generator.fit(train_mv['temperature'].values)
#print(model) Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax)
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
#temp_generator = lambda x : x
generators = {'time': time_generator, 'temperature': temp_generator}
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
#
#forecasts1 = model.predict(test_mv, type='multivariate')
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
# steps_ahead=200)
'''
from pyFTS.data import Enrollments
train = Enrollments.get_data()
fs = Grid.GridPartitioner(data=train, npart=10) #, transformation=tdiff)
model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)
model.fit(train)
print(model)
print(model.predict(train))
'''