Improvements for numerical stability
This commit is contained in:
parent
de82af1b9d
commit
35898f338a
@ -89,7 +89,7 @@ def smape(targets, forecasts, type=2):
|
||||
elif type == 2:
|
||||
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
|
||||
else:
|
||||
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
|
||||
return np.nansum(np.abs(forecasts - targets)) / np.nansum(forecasts + targets)
|
||||
|
||||
|
||||
def mape_interval(targets, forecasts):
|
||||
@ -124,7 +124,7 @@ def UStatistic(targets, forecasts):
|
||||
for k in np.arange(0, l - 1):
|
||||
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
|
||||
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
|
||||
return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
|
||||
return np.sqrt(np.divide(np.nansum(y), np.nansum(naive)))
|
||||
|
||||
|
||||
def TheilsInequality(targets, forecasts):
|
||||
@ -137,9 +137,9 @@ def TheilsInequality(targets, forecasts):
|
||||
"""
|
||||
res = targets - forecasts
|
||||
t = len(res)
|
||||
us = np.sqrt(sum([u ** 2 for u in res]))
|
||||
ys = np.sqrt(sum([y ** 2 for y in targets]))
|
||||
fs = np.sqrt(sum([f ** 2 for f in forecasts]))
|
||||
us = np.sqrt(np.nansum([u ** 2 for u in res]))
|
||||
ys = np.sqrt(np.nansum([y ** 2 for y in targets]))
|
||||
fs = np.sqrt(np.nansum([f ** 2 for f in forecasts]))
|
||||
return us / (ys + fs)
|
||||
|
||||
|
||||
@ -255,12 +255,12 @@ def brier_score(targets, densities):
|
||||
try:
|
||||
v = d.bin_index.find_le(targets[ct])
|
||||
|
||||
score = sum([d.density(k) ** 2 for k in d.bins if k != v])
|
||||
score = np.nansum([d.density(k) ** 2 for k in d.bins if k != v])
|
||||
score += (d.density(v) - 1) ** 2
|
||||
ret.append(score)
|
||||
except ValueError as ex:
|
||||
ret.append(sum([d.density(k) ** 2 for k in d.bins]))
|
||||
return sum(ret) / len(ret)
|
||||
ret.append(np.nansum([d.density(k) ** 2 for k in d.bins]))
|
||||
return np.nansum(ret) / len(ret)
|
||||
|
||||
|
||||
def logarithm_score(targets, densities):
|
||||
@ -301,7 +301,7 @@ def crps(targets, densities):
|
||||
|
||||
n = len(densities)
|
||||
for ct, df in enumerate(densities):
|
||||
_crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||
_crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||
|
||||
return _crps / n
|
||||
|
||||
@ -419,6 +419,8 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
forecasts = model.predict(data, **kwargs)
|
||||
ret.append(round(sharpness(forecasts), 2))
|
||||
ret.append(round(resolution(forecasts), 2))
|
||||
if model.is_multivariate:
|
||||
data = data[model.target_variable.data_label].values
|
||||
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
|
||||
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
|
||||
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
|
||||
@ -436,6 +438,8 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
start = model.max_lag + steps_ahead - 1
|
||||
ret.append(round(sharpness(forecasts), 2))
|
||||
ret.append(round(resolution(forecasts), 2))
|
||||
if model.is_multivariate:
|
||||
data = data[model.target_variable.data_label].values
|
||||
ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
|
||||
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
|
||||
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
|
||||
@ -502,6 +506,8 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
_s1 = time.time()
|
||||
forecasts = model.predict(data, **kwargs)
|
||||
_e1 = time.time()
|
||||
if model.is_multivariate:
|
||||
data = data[model.target_variable.data_label].values
|
||||
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
|
||||
@ -516,6 +522,8 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
_e1 = time.time()
|
||||
|
||||
start = model.max_lag + steps_ahead
|
||||
if model.is_multivariate:
|
||||
data = data[model.target_variable.data_label].values
|
||||
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
|
||||
|
@ -81,6 +81,105 @@ def get_benchmark_probabilistic_methods():
|
||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||
|
||||
|
||||
def multivariate_sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
|
||||
tag = __pop('tag', None, kwargs)
|
||||
dataset = __pop('dataset', None, kwargs)
|
||||
|
||||
distributed = __pop('distributed', False, kwargs)
|
||||
|
||||
variables = __pop('variables', {}, kwargs)
|
||||
|
||||
target_variable = __pop('target_variable', '', kwargs)
|
||||
|
||||
type = kwargs.get("type", 'point')
|
||||
|
||||
steps_ahead = __pop('steps_ahead', [1], kwargs)
|
||||
|
||||
steps_ahead = [k for k in steps_ahead]
|
||||
|
||||
fts_methods = __pop('methods', [], kwargs)
|
||||
|
||||
if fts_methods is not None:
|
||||
methods_parameters = __pop('methods_parameters', None, kwargs)
|
||||
|
||||
if type == 'point':
|
||||
experiment_method = mv_run_point2
|
||||
synthesis_method = process_point_jobs2
|
||||
elif type == 'interval':
|
||||
experiment_method = mv_run_interval2
|
||||
synthesis_method = process_interval_jobs2
|
||||
elif type == 'distribution':
|
||||
experiment_method = mv_run_probabilistic2
|
||||
synthesis_method = process_probabilistic_jobs2
|
||||
else:
|
||||
raise ValueError("Type parameter has a unkown value!")
|
||||
|
||||
if distributed:
|
||||
import pyFTS.distributed.dispy as dispy
|
||||
|
||||
nodes = kwargs.get("nodes", ['127.0.0.1'])
|
||||
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
|
||||
|
||||
inc = __pop("inc", 0.1, kwargs)
|
||||
|
||||
file = kwargs.get('file', "benchmarks.db")
|
||||
|
||||
conn = bUtil.open_benchmark_db(file)
|
||||
|
||||
jobs = []
|
||||
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||
for id, fts_method in enumerate(fts_methods):
|
||||
|
||||
kwargs['steps_ahead'] = max(steps_ahead)
|
||||
parameters = {}
|
||||
if methods_parameters is not None:
|
||||
parameters = methods_parameters[id]
|
||||
|
||||
vars = []
|
||||
tvar = None
|
||||
|
||||
for key, value in variables.items():
|
||||
var = variable.Variable(key, data=train, **value)
|
||||
vars.append(var)
|
||||
if key == target_variable:
|
||||
tvar = var
|
||||
|
||||
|
||||
model = fts_method(explanatory_variables=vars, target_variable=tvar,
|
||||
**parameters)
|
||||
|
||||
if not distributed:
|
||||
try:
|
||||
job = experiment_method(model, train, test, ct, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
except Exception as ex:
|
||||
print('EXCEPTION! ', fts_method)
|
||||
traceback.print_exc()
|
||||
else:
|
||||
job = cluster.submit(model, train, test, ct, **kwargs)
|
||||
job.id = id
|
||||
jobs.append(job)
|
||||
|
||||
if distributed:
|
||||
for job in jobs:
|
||||
job()
|
||||
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
|
||||
tmp = job.result
|
||||
synthesis_method(dataset, tag, tmp, conn)
|
||||
else:
|
||||
print("status", job.status)
|
||||
print("result", job.result)
|
||||
print("stdout", job.stdout)
|
||||
print("stderr", job.exception)
|
||||
|
||||
cluster.wait() # wait for all jobs to finish
|
||||
dispy.stop_dispy_cluster(cluster, http_server)
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
||||
tag = __pop('tag', None, kwargs)
|
||||
dataset = __pop('dataset', None, kwargs)
|
||||
@ -630,6 +729,63 @@ def __build_model(fts_method, order, parameters, partitioner_method, partitions,
|
||||
return mfts, pttr
|
||||
|
||||
|
||||
def mv_run_point2(mfts, train_data, test_data, window_key=None, **kwargs):
|
||||
import time
|
||||
from pyFTS.models import hofts, ifts, pwfts
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||
|
||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||
|
||||
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||
|
||||
tmp3 = [Measures.get_interval_statistics]
|
||||
|
||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||
method = kwargs.get('method', None)
|
||||
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
if steps_ahead == 1:
|
||||
|
||||
_start = time.time()
|
||||
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
|
||||
'steps': steps_ahead, 'method': method}
|
||||
else:
|
||||
_start = time.time()
|
||||
forecasts = mfts.predict(test_data, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order + steps_ahead], forecasts)
|
||||
|
||||
for key in eval.keys():
|
||||
eval[key]["time"] = times
|
||||
eval[key]["method"] = method
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||
'ahead_results': eval
|
||||
}
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||
|
||||
import time
|
||||
@ -698,6 +854,67 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
|
||||
return ret
|
||||
|
||||
|
||||
def mv_run_interval2(mfts,train_data, test_data, window_key=None, **kwargs):
|
||||
import time
|
||||
from pyFTS.models import hofts,ifts,pwfts
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||
|
||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS ]
|
||||
|
||||
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||
|
||||
tmp3 = [Measures.get_interval_statistics]
|
||||
|
||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||
method = kwargs.get('method', None)
|
||||
parameters = kwargs.get('parameters',{})
|
||||
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
if steps_ahead == 1:
|
||||
|
||||
_start = time.time()
|
||||
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
|
||||
'time': times,'pinball05': metrics[3], 'pinball25': metrics[4], 'pinball75': metrics[5], 'pinball95': metrics[6],
|
||||
'winkler05': metrics[7], 'winkler25': metrics[8],
|
||||
'window': window_key,'steps': steps_ahead, 'method': method}
|
||||
else:
|
||||
_start = time.time()
|
||||
intervals = mfts.predict(test_data, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
eval = Measures.get_interval_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], intervals)
|
||||
|
||||
for key in eval.keys():
|
||||
eval[key]["time"] = times
|
||||
eval[key]["method"] = method
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||
'ahead_results': eval
|
||||
}
|
||||
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||
import time
|
||||
from pyFTS.models import hofts,ifts,pwfts
|
||||
@ -760,6 +977,63 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
|
||||
return ret
|
||||
|
||||
|
||||
def mv_run_probabilistic2(mfts, train_data, test_data, window_key=None, **kwargs):
|
||||
import time
|
||||
from pyFTS.models import hofts, ifts, pwfts
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||
|
||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||
|
||||
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||
|
||||
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||
|
||||
tmp3 = [Measures.get_interval_statistics]
|
||||
|
||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||
method = kwargs.get('method', None)
|
||||
parameters = kwargs.get('parameters', {})
|
||||
|
||||
_start = time.time()
|
||||
mfts.fit(train_data, **kwargs)
|
||||
_end = time.time()
|
||||
times = _end - _start
|
||||
|
||||
if steps_ahead == 1:
|
||||
|
||||
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
|
||||
times += _t1
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'crps': _crps1, 'brier': _brier, 'window': window_key,
|
||||
'steps': steps_ahead, 'method': method}
|
||||
else:
|
||||
_start = time.time()
|
||||
distributions = mfts.predict(test_data, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
eval = Measures.get_distribution_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], distributions)
|
||||
|
||||
for key in eval.keys():
|
||||
eval[key]["time"] = times
|
||||
eval[key]["method"] = method
|
||||
|
||||
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||
'transformation': None,
|
||||
'size': len(mfts), 'time': times,
|
||||
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||
'ahead_results': eval
|
||||
}
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||
import time
|
||||
import numpy as np
|
||||
|
@ -202,6 +202,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
||||
scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)
|
||||
|
||||
for ct in np.arange(1, int(lq / 2) + 1):
|
||||
try:
|
||||
y = [[data[start_at], data[start_at]]]
|
||||
for pd in probabilitydist:
|
||||
qts = pd.quantile([qt[ct - 1], qt[-ct]])
|
||||
@ -209,6 +210,8 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
||||
|
||||
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
|
||||
facecolor=scalarMap.to_rgba(ct / lq))
|
||||
except:
|
||||
pass
|
||||
|
||||
if kwargs.get('median',True):
|
||||
y = [data[start_at]]
|
||||
|
@ -110,7 +110,7 @@ class EnsembleFTS(fts.FTS):
|
||||
ret = np.nanpercentile(forecasts, 50)
|
||||
elif self.point_method == 'quantile':
|
||||
alpha = kwargs.get("alpha",0.05)
|
||||
ret = np.percentile(forecasts, alpha*100)
|
||||
ret = np.nanpercentile(forecasts, alpha*100)
|
||||
elif self.point_method == 'exponential':
|
||||
l = len(self.models)
|
||||
if l == 1:
|
||||
|
@ -80,9 +80,9 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
affected_flrgs_memberships.append(mv)
|
||||
|
||||
# gerar o intervalo
|
||||
norm = sum(affected_flrgs_memberships)
|
||||
lo_ = sum(lo) / norm
|
||||
up_ = sum(up) / norm
|
||||
norm = np.nansum(affected_flrgs_memberships)
|
||||
lo_ = np.nansum(lo) / norm
|
||||
up_ = np.nansum(up) / norm
|
||||
ret.append([lo_, up_])
|
||||
|
||||
return ret
|
||||
@ -165,9 +165,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
||||
affected_flrgs_memberships.append(mv)
|
||||
|
||||
# gerar o intervalo
|
||||
norm = sum(affected_flrgs_memberships)
|
||||
lo_ = sum(lo) / norm
|
||||
up_ = sum(up) / norm
|
||||
norm = np.nansum(affected_flrgs_memberships)
|
||||
lo_ = np.nansum(lo) / norm
|
||||
up_ = np.nansum(up) / norm
|
||||
ret.append([lo_, up_])
|
||||
|
||||
return ret
|
||||
|
@ -43,7 +43,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
|
||||
for fset, mv in val:
|
||||
num.append(self.sets[fset].centroid * mv)
|
||||
den.append(mv)
|
||||
ret.append(np.sum(num) / np.sum(den))
|
||||
ret.append(np.nansum(num) / np.nansum(den))
|
||||
elif mode == 'both':
|
||||
num = np.mean([self.sets[fset].centroid for fset in val])
|
||||
ret.append(num)
|
||||
@ -53,7 +53,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
|
||||
for fset, mv in enumerate(val):
|
||||
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
||||
den.append(mv)
|
||||
ret.append(np.sum(num) / np.sum(den))
|
||||
ret.append(np.nansum(num) / np.nansum(den))
|
||||
else:
|
||||
raise Exception('Unknown deffuzyfication mode')
|
||||
|
||||
|
@ -152,7 +152,7 @@ class MVFTS(fts.FTS):
|
||||
mv = np.array(mvs)
|
||||
mp = np.array(mps)
|
||||
|
||||
ret.append(np.dot(mv,mp.T)/np.sum(mv))
|
||||
ret.append(np.dot(mv,mp.T)/np.nansum(mv))
|
||||
|
||||
ret = self.target_variable.apply_inverse_transformations(ret,
|
||||
params=data[self.target_variable.data_label].values)
|
||||
|
@ -73,21 +73,21 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
||||
def get_midpoint(self, sets):
|
||||
'''Return the expectation of the PWFLRG, the weighted sum'''
|
||||
if self.midpoint is None:
|
||||
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
||||
self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
||||
for s in self.RHS.keys()]))
|
||||
|
||||
return self.midpoint
|
||||
|
||||
def get_upper(self, sets):
|
||||
if self.upper is None:
|
||||
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
|
||||
self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
|
||||
for s in self.RHS.keys()]))
|
||||
|
||||
return self.upper
|
||||
|
||||
def get_lower(self, sets):
|
||||
if self.lower is None:
|
||||
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
|
||||
self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
|
||||
for s in self.RHS.keys()]))
|
||||
|
||||
return self.lower
|
||||
@ -159,7 +159,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
||||
mvs.append(mv)
|
||||
|
||||
tmp_fq = sum([lhs_mv * kk for kk in mvs if kk > 0])
|
||||
tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0])
|
||||
|
||||
self.global_frequency_count += tmp_fq
|
||||
|
||||
@ -226,7 +226,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
||||
mvs.append(mv)
|
||||
|
||||
tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0])
|
||||
tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0])
|
||||
|
||||
self.global_frequency_count += tmp_fq
|
||||
|
||||
@ -268,7 +268,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
else:
|
||||
if len(flrg.LHS) > 0:
|
||||
pi = 1 / len(flrg.LHS)
|
||||
ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
|
||||
ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
|
||||
else:
|
||||
ret = np.nan
|
||||
return ret
|
||||
@ -282,10 +282,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
_set = self.partitioner.sets[s]
|
||||
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
|
||||
cond.append(tmp)
|
||||
ret = sum(np.array(cond))
|
||||
ret = np.nansum(np.array(cond))
|
||||
else:
|
||||
pi = 1 / len(flrg.LHS)
|
||||
ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
|
||||
ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
|
||||
return ret
|
||||
|
||||
def get_upper(self, flrg):
|
||||
@ -307,12 +307,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
def forecast(self, data, **kwargs):
|
||||
method = kwargs.get('method','heuristic')
|
||||
|
||||
l = len(data)
|
||||
l = len(data)+1
|
||||
|
||||
ret = []
|
||||
|
||||
for k in np.arange(self.max_lag - 1, l):
|
||||
sample = data[k - (self.max_lag - 1): k + 1]
|
||||
for k in np.arange(self.max_lag, l):
|
||||
sample = data[k - self.max_lag: k]
|
||||
|
||||
if method == 'heuristic':
|
||||
ret.append(self.point_heuristic(sample, **kwargs))
|
||||
@ -360,9 +360,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
mp.append(norm * self.get_midpoint(flrg))
|
||||
norms.append(norm)
|
||||
|
||||
norm = sum(norms)
|
||||
norm = np.nansum(norms)
|
||||
|
||||
final = sum(mp) / norm if norm != 0 else 0
|
||||
final = np.nansum(mp) / norm if norm != 0 else 0
|
||||
|
||||
if explain:
|
||||
print("Deffuzyfied value: {} \n".format(final))
|
||||
@ -431,12 +431,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
norms.append(norm)
|
||||
|
||||
# gerar o intervalo
|
||||
norm = sum(norms)
|
||||
norm = np.nansum(norms)
|
||||
if norm == 0:
|
||||
return [0, 0]
|
||||
else:
|
||||
lo_ = sum(lo) / norm
|
||||
up_ = sum(up) / norm
|
||||
lo_ = np.nansum(lo) / norm
|
||||
up_ = np.nansum(up) / norm
|
||||
return [lo_, up_]
|
||||
|
||||
def forecast_distribution(self, ndata, **kwargs):
|
||||
@ -496,7 +496,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
else:
|
||||
num.append(0.0)
|
||||
den.append(0.000000001)
|
||||
pf = sum(num) / sum(den)
|
||||
pf = np.nansum(num) / np.nansum(den)
|
||||
|
||||
dist.set(bin, pf)
|
||||
|
||||
|
@ -202,9 +202,9 @@ class Partitioner(object):
|
||||
raise Exception('Unknown deffuzyfication mode')
|
||||
|
||||
if mode in ('both','vector'):
|
||||
return np.sum(num) / np.sum(den)
|
||||
return np.nansum(num) / np.nansum(den)
|
||||
else:
|
||||
return np.mean(num)
|
||||
return np.nanmean(num)
|
||||
|
||||
def check_bounds(self, data):
|
||||
"""
|
||||
|
@ -209,11 +209,17 @@ class ProbabilityDistribution(object):
|
||||
if isinstance(values, list):
|
||||
ret = []
|
||||
for val in values:
|
||||
try:
|
||||
k = self.bin_index.find_ge(val)
|
||||
ret.append(self.cdf[k])
|
||||
except:
|
||||
ret.append(np.nan)
|
||||
else:
|
||||
try:
|
||||
k = self.bin_index.find_ge(values)
|
||||
return self.cdf[values]
|
||||
except:
|
||||
return np.nan
|
||||
|
||||
def quantile(self, values):
|
||||
"""
|
||||
@ -229,11 +235,17 @@ class ProbabilityDistribution(object):
|
||||
if isinstance(values, list):
|
||||
ret = []
|
||||
for val in values:
|
||||
try:
|
||||
k = self.quantile_index.find_ge(val)
|
||||
ret.append(self.qtl[str(k)][0])
|
||||
except:
|
||||
ret.append(np.nan)
|
||||
else:
|
||||
try:
|
||||
k = self.quantile_index.find_ge(values)
|
||||
ret = self.qtl[str(k)]
|
||||
except:
|
||||
return np.nan
|
||||
|
||||
return ret
|
||||
|
||||
@ -243,7 +255,7 @@ class ProbabilityDistribution(object):
|
||||
|
||||
:return:the entropy of the probability distribution
|
||||
"""
|
||||
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
h = -np.nansum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
for k in self.bins])
|
||||
return h
|
||||
|
||||
@ -255,7 +267,7 @@ class ProbabilityDistribution(object):
|
||||
:param q: a probabilistic.ProbabilityDistribution object
|
||||
:return: Cross entropy between this probability distribution and the given distribution
|
||||
"""
|
||||
h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
h = -np.nansum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
for k in self.bins])
|
||||
return h
|
||||
|
||||
@ -267,7 +279,7 @@ class ProbabilityDistribution(object):
|
||||
:param q: a probabilistic.ProbabilityDistribution object
|
||||
:return: Kullback-Leibler divergence
|
||||
"""
|
||||
h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
h = np.nansum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||
for k in self.bins])
|
||||
return h
|
||||
|
||||
|
@ -7,13 +7,12 @@ from pyFTS.partitioners import Grid, Simple, Entropy
|
||||
from pyFTS.common import Util
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
||||
from pyFTS.benchmarks import Measures
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||
from pyFTS.common import Util as cUtil
|
||||
|
||||
from pyFTS.models import pwfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.common import Membership
|
||||
@ -22,54 +21,45 @@ from pyFTS.common import Membership
|
||||
import os
|
||||
|
||||
|
||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
||||
|
||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
train_uv = dataset['glo_avg'].values[:24505]
|
||||
test_uv = dataset['glo_avg'].values[24505:]
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
from itertools import product
|
||||
|
||||
levels = ['VL', 'L', 'M', 'H', 'VH']
|
||||
sublevels = [str(k) for k in np.arange(0, 7)]
|
||||
names = []
|
||||
for combination in product(*[levels, sublevels]):
|
||||
names.append(combination[0] + combination[1])
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
|
||||
'Jun','Jul', 'Aug','Sep','Oct',
|
||||
'Nov','Dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
from pyFTS.data import SONDA, Malaysia
|
||||
|
||||
df = Malaysia.get_dataframe()
|
||||
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
variables = {
|
||||
"Hour": dict(data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
partitioner_specific=sp, alpha_cut=.3),
|
||||
"Temperature": dict(data_label="temperature", alias='temp',
|
||||
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
|
||||
alpha_cut=.25),
|
||||
"Load": dict(data_label="load", alias='load',
|
||||
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
|
||||
alpha_cut=.25)
|
||||
}
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
|
||||
data=train_mv)
|
||||
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
|
||||
order=2, knn=1)
|
||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
||||
methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||
#methods = [granular.GranularWMVFTS]
|
||||
|
||||
model.fit(train_mv)
|
||||
generator = lambda x : x + pd.to_timedelta(1, unit='h')
|
||||
forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} )
|
||||
parameters = [
|
||||
{},{},
|
||||
dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
||||
order=1, knn=1)
|
||||
]
|
||||
|
||||
print(forecasts)
|
||||
bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25,
|
||||
methods=methods,
|
||||
methods_parameters=parameters,
|
||||
variables=variables,
|
||||
target_variable='Load',
|
||||
type='interval',
|
||||
steps_ahead=[1],
|
||||
distributed=False,
|
||||
nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
||||
file="experiments.db", dataset='Malaysia',
|
||||
tag="experiments"
|
||||
)
|
||||
|
||||
|
||||
'''
|
||||
|
@ -14,71 +14,68 @@ from pyFTS.models import pwfts,hofts,ifts
|
||||
from pyFTS.models.multivariate import granular, grid
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
from pyFTS.common import Membership
|
||||
|
||||
from pyFTS.data import SONDA, Malaysia
|
||||
def sample_by_hour(data):
|
||||
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||
|
||||
df = Malaysia.get_dataframe()
|
||||
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
||||
def sample_date_by_hour(data):
|
||||
return [data[k] for k in np.arange(0,len(data),60)]
|
||||
|
||||
train_mv = df.iloc[:8000]
|
||||
test_mv = df.iloc[8000:10000]
|
||||
from pyFTS.data import SONDA
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
||||
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
||||
sonda = sonda.dropna()
|
||||
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
|
||||
var = {
|
||||
'datahora': sample_date_by_hour(sonda['datahora'].values),
|
||||
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
|
||||
'ws_10m': sample_by_hour(sonda['ws_10m'].values),
|
||||
}
|
||||
|
||||
df = pd.DataFrame(var)
|
||||
|
||||
train_mv = df.iloc[:9000]
|
||||
test_mv = df.iloc[9000:10000]
|
||||
|
||||
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3])
|
||||
|
||||
sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
|
||||
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
|
||||
data=train_mv, alpha_cut=.3)
|
||||
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
|
||||
data=train_mv, alpha_cut=.3)
|
||||
|
||||
order = 1
|
||||
knn = 1
|
||||
vmonth.partitioner.plot(ax[0])
|
||||
|
||||
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
|
||||
vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind',
|
||||
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
|
||||
data=train_mv, alpha_cut=.25)
|
||||
|
||||
vwin.partitioner.plot(ax[1])
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
order = 3
|
||||
knn = 2
|
||||
|
||||
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin,
|
||||
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
||||
order=order, knn=knn)
|
||||
|
||||
model.fit(train_mv)
|
||||
|
||||
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
|
||||
ax.plot(test_mv['ws_10m'].values[:100], label='original')
|
||||
|
||||
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=1)
|
||||
temp_generator.fit(train_mv['temperature'].values)
|
||||
forecasts = model.predict(test_mv.iloc[:100], type='distribution')
|
||||
|
||||
#print(model)
|
||||
Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax)
|
||||
|
||||
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
||||
#temp_generator = lambda x : x
|
||||
|
||||
generators = {'time': time_generator, 'temperature': temp_generator}
|
||||
|
||||
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
|
||||
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
|
||||
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
|
||||
|
||||
|
||||
#
|
||||
|
||||
#forecasts1 = model.predict(test_mv, type='multivariate')
|
||||
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
|
||||
# steps_ahead=200)
|
||||
|
||||
|
||||
'''
|
||||
from pyFTS.data import Enrollments
|
||||
train = Enrollments.get_data()
|
||||
|
||||
fs = Grid.GridPartitioner(data=train, npart=10) #, transformation=tdiff)
|
||||
|
||||
model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)
|
||||
model.fit(train)
|
||||
print(model)
|
||||
|
||||
print(model.predict(train))
|
||||
'''
|
Loading…
Reference in New Issue
Block a user