Improvements for numerical stability

This commit is contained in:
Petrônio Cândido 2019-06-22 09:20:53 -03:00
parent de82af1b9d
commit 35898f338a
12 changed files with 432 additions and 148 deletions

View File

@ -89,7 +89,7 @@ def smape(targets, forecasts, type=2):
elif type == 2:
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
else:
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
return np.nansum(np.abs(forecasts - targets)) / np.nansum(forecasts + targets)
def mape_interval(targets, forecasts):
@ -124,7 +124,7 @@ def UStatistic(targets, forecasts):
for k in np.arange(0, l - 1):
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
return np.sqrt(np.divide(np.nansum(y), np.nansum(naive)))
def TheilsInequality(targets, forecasts):
@ -137,9 +137,9 @@ def TheilsInequality(targets, forecasts):
"""
res = targets - forecasts
t = len(res)
us = np.sqrt(sum([u ** 2 for u in res]))
ys = np.sqrt(sum([y ** 2 for y in targets]))
fs = np.sqrt(sum([f ** 2 for f in forecasts]))
us = np.sqrt(np.nansum([u ** 2 for u in res]))
ys = np.sqrt(np.nansum([y ** 2 for y in targets]))
fs = np.sqrt(np.nansum([f ** 2 for f in forecasts]))
return us / (ys + fs)
@ -255,12 +255,12 @@ def brier_score(targets, densities):
try:
v = d.bin_index.find_le(targets[ct])
score = sum([d.density(k) ** 2 for k in d.bins if k != v])
score = np.nansum([d.density(k) ** 2 for k in d.bins if k != v])
score += (d.density(v) - 1) ** 2
ret.append(score)
except ValueError as ex:
ret.append(sum([d.density(k) ** 2 for k in d.bins]))
return sum(ret) / len(ret)
ret.append(np.nansum([d.density(k) ** 2 for k in d.bins]))
return np.nansum(ret) / len(ret)
def logarithm_score(targets, densities):
@ -301,7 +301,7 @@ def crps(targets, densities):
n = len(densities)
for ct, df in enumerate(densities):
_crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
_crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
return _crps / n
@ -419,6 +419,8 @@ def get_interval_statistics(data, model, **kwargs):
forecasts = model.predict(data, **kwargs)
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
@ -436,6 +438,8 @@ def get_interval_statistics(data, model, **kwargs):
start = model.max_lag + steps_ahead - 1
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
@ -502,6 +506,8 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time()
forecasts = model.predict(data, **kwargs)
_e1 = time.time()
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
@ -516,6 +522,8 @@ def get_distribution_statistics(data, model, **kwargs):
_e1 = time.time()
start = model.max_lag + steps_ahead
if model.is_multivariate:
data = data[model.target_variable.data_label].values
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))

View File

@ -81,6 +81,105 @@ def get_benchmark_probabilistic_methods():
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def multivariate_sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
from pyFTS.models.multivariate import common, variable, mvfts
tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs)
distributed = __pop('distributed', False, kwargs)
variables = __pop('variables', {}, kwargs)
target_variable = __pop('target_variable', '', kwargs)
type = kwargs.get("type", 'point')
steps_ahead = __pop('steps_ahead', [1], kwargs)
steps_ahead = [k for k in steps_ahead]
fts_methods = __pop('methods', [], kwargs)
if fts_methods is not None:
methods_parameters = __pop('methods_parameters', None, kwargs)
if type == 'point':
experiment_method = mv_run_point2
synthesis_method = process_point_jobs2
elif type == 'interval':
experiment_method = mv_run_interval2
synthesis_method = process_interval_jobs2
elif type == 'distribution':
experiment_method = mv_run_probabilistic2
synthesis_method = process_probabilistic_jobs2
else:
raise ValueError("Type parameter has a unkown value!")
if distributed:
import pyFTS.distributed.dispy as dispy
nodes = kwargs.get("nodes", ['127.0.0.1'])
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
inc = __pop("inc", 0.1, kwargs)
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
jobs = []
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
for id, fts_method in enumerate(fts_methods):
kwargs['steps_ahead'] = max(steps_ahead)
parameters = {}
if methods_parameters is not None:
parameters = methods_parameters[id]
vars = []
tvar = None
for key, value in variables.items():
var = variable.Variable(key, data=train, **value)
vars.append(var)
if key == target_variable:
tvar = var
model = fts_method(explanatory_variables=vars, target_variable=tvar,
**parameters)
if not distributed:
try:
job = experiment_method(model, train, test, ct, **kwargs)
synthesis_method(dataset, tag, job, conn)
except Exception as ex:
print('EXCEPTION! ', fts_method)
traceback.print_exc()
else:
job = cluster.submit(model, train, test, ct, **kwargs)
job.id = id
jobs.append(job)
if distributed:
for job in jobs:
job()
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
tmp = job.result
synthesis_method(dataset, tag, tmp, conn)
else:
print("status", job.status)
print("result", job.result)
print("stdout", job.stdout)
print("stderr", job.exception)
cluster.wait() # wait for all jobs to finish
dispy.stop_dispy_cluster(cluster, http_server)
conn.close()
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs)
@ -630,6 +729,63 @@ def __build_model(fts_method, order, parameters, partitioner_method, partitions,
return mfts, pttr
def mv_run_point2(mfts, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
'steps': steps_ahead, 'method': method}
else:
_start = time.time()
forecasts = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order + steps_ahead], forecasts)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
@ -698,6 +854,67 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
return ret
def mv_run_interval2(mfts,train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS ]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters',{})
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_start = time.time()
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
'time': times,'pinball05': metrics[3], 'pinball25': metrics[4], 'pinball75': metrics[5], 'pinball95': metrics[6],
'winkler05': metrics[7], 'winkler25': metrics[8],
'window': window_key,'steps': steps_ahead, 'method': method}
else:
_start = time.time()
intervals = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_interval_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], intervals)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts,ifts,pwfts
@ -760,6 +977,63 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
return ret
def mv_run_probabilistic2(mfts, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.multivariate import mvfts, wmvfts, granular
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
if steps_ahead == 1:
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
times += _t1
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'crps': _crps1, 'brier': _brier, 'window': window_key,
'steps': steps_ahead, 'method': method}
else:
_start = time.time()
distributions = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_distribution_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], distributions)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
'transformation': None,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
import numpy as np

View File

@ -202,13 +202,16 @@ def plot_distribution2(probabilitydist, data, **kwargs):
scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)
for ct in np.arange(1, int(lq / 2) + 1):
y = [[data[start_at], data[start_at]]]
for pd in probabilitydist:
qts = pd.quantile([qt[ct - 1], qt[-ct]])
y.append(qts)
try:
y = [[data[start_at], data[start_at]]]
for pd in probabilitydist:
qts = pd.quantile([qt[ct - 1], qt[-ct]])
y.append(qts)
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
facecolor=scalarMap.to_rgba(ct / lq))
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
facecolor=scalarMap.to_rgba(ct / lq))
except:
pass
if kwargs.get('median',True):
y = [data[start_at]]

View File

@ -110,7 +110,7 @@ class EnsembleFTS(fts.FTS):
ret = np.nanpercentile(forecasts, 50)
elif self.point_method == 'quantile':
alpha = kwargs.get("alpha",0.05)
ret = np.percentile(forecasts, alpha*100)
ret = np.nanpercentile(forecasts, alpha*100)
elif self.point_method == 'exponential':
l = len(self.models)
if l == 1:

View File

@ -80,9 +80,9 @@ class IntervalFTS(hofts.HighOrderFTS):
affected_flrgs_memberships.append(mv)
# gerar o intervalo
norm = sum(affected_flrgs_memberships)
lo_ = sum(lo) / norm
up_ = sum(up) / norm
norm = np.nansum(affected_flrgs_memberships)
lo_ = np.nansum(lo) / norm
up_ = np.nansum(up) / norm
ret.append([lo_, up_])
return ret
@ -165,9 +165,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
affected_flrgs_memberships.append(mv)
# gerar o intervalo
norm = sum(affected_flrgs_memberships)
lo_ = sum(lo) / norm
up_ = sum(up) / norm
norm = np.nansum(affected_flrgs_memberships)
lo_ = np.nansum(lo) / norm
up_ = np.nansum(up) / norm
ret.append([lo_, up_])
return ret

View File

@ -43,7 +43,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
for fset, mv in val:
num.append(self.sets[fset].centroid * mv)
den.append(mv)
ret.append(np.sum(num) / np.sum(den))
ret.append(np.nansum(num) / np.nansum(den))
elif mode == 'both':
num = np.mean([self.sets[fset].centroid for fset in val])
ret.append(num)
@ -53,7 +53,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
for fset, mv in enumerate(val):
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
den.append(mv)
ret.append(np.sum(num) / np.sum(den))
ret.append(np.nansum(num) / np.nansum(den))
else:
raise Exception('Unknown deffuzyfication mode')

View File

@ -152,7 +152,7 @@ class MVFTS(fts.FTS):
mv = np.array(mvs)
mp = np.array(mps)
ret.append(np.dot(mv,mp.T)/np.sum(mv))
ret.append(np.dot(mv,mp.T)/np.nansum(mv))
ret = self.target_variable.apply_inverse_transformations(ret,
params=data[self.target_variable.data_label].values)

View File

@ -73,21 +73,21 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def get_midpoint(self, sets):
'''Return the expectation of the PWFLRG, the weighted sum'''
if self.midpoint is None:
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
for s in self.RHS.keys()]))
return self.midpoint
def get_upper(self, sets):
if self.upper is None:
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
for s in self.RHS.keys()]))
return self.upper
def get_lower(self, sets):
if self.lower is None:
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
for s in self.RHS.keys()]))
return self.lower
@ -159,7 +159,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv)
tmp_fq = sum([lhs_mv * kk for kk in mvs if kk > 0])
tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq
@ -226,7 +226,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv)
tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0])
tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq
@ -268,7 +268,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else:
if len(flrg.LHS) > 0:
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
else:
ret = np.nan
return ret
@ -282,10 +282,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
_set = self.partitioner.sets[s]
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
cond.append(tmp)
ret = sum(np.array(cond))
ret = np.nansum(np.array(cond))
else:
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
return ret
def get_upper(self, flrg):
@ -307,12 +307,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def forecast(self, data, **kwargs):
method = kwargs.get('method','heuristic')
l = len(data)
l = len(data)+1
ret = []
for k in np.arange(self.max_lag - 1, l):
sample = data[k - (self.max_lag - 1): k + 1]
for k in np.arange(self.max_lag, l):
sample = data[k - self.max_lag: k]
if method == 'heuristic':
ret.append(self.point_heuristic(sample, **kwargs))
@ -360,9 +360,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
mp.append(norm * self.get_midpoint(flrg))
norms.append(norm)
norm = sum(norms)
norm = np.nansum(norms)
final = sum(mp) / norm if norm != 0 else 0
final = np.nansum(mp) / norm if norm != 0 else 0
if explain:
print("Deffuzyfied value: {} \n".format(final))
@ -431,12 +431,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
norms.append(norm)
# gerar o intervalo
norm = sum(norms)
norm = np.nansum(norms)
if norm == 0:
return [0, 0]
else:
lo_ = sum(lo) / norm
up_ = sum(up) / norm
lo_ = np.nansum(lo) / norm
up_ = np.nansum(up) / norm
return [lo_, up_]
def forecast_distribution(self, ndata, **kwargs):
@ -496,7 +496,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else:
num.append(0.0)
den.append(0.000000001)
pf = sum(num) / sum(den)
pf = np.nansum(num) / np.nansum(den)
dist.set(bin, pf)

View File

@ -202,9 +202,9 @@ class Partitioner(object):
raise Exception('Unknown deffuzyfication mode')
if mode in ('both','vector'):
return np.sum(num) / np.sum(den)
return np.nansum(num) / np.nansum(den)
else:
return np.mean(num)
return np.nanmean(num)
def check_bounds(self, data):
"""

View File

@ -209,11 +209,17 @@ class ProbabilityDistribution(object):
if isinstance(values, list):
ret = []
for val in values:
k = self.bin_index.find_ge(val)
ret.append(self.cdf[k])
try:
k = self.bin_index.find_ge(val)
ret.append(self.cdf[k])
except:
ret.append(np.nan)
else:
k = self.bin_index.find_ge(values)
return self.cdf[values]
try:
k = self.bin_index.find_ge(values)
return self.cdf[values]
except:
return np.nan
def quantile(self, values):
"""
@ -229,11 +235,17 @@ class ProbabilityDistribution(object):
if isinstance(values, list):
ret = []
for val in values:
k = self.quantile_index.find_ge(val)
ret.append(self.qtl[str(k)][0])
try:
k = self.quantile_index.find_ge(val)
ret.append(self.qtl[str(k)][0])
except:
ret.append(np.nan)
else:
k = self.quantile_index.find_ge(values)
ret = self.qtl[str(k)]
try:
k = self.quantile_index.find_ge(values)
ret = self.qtl[str(k)]
except:
return np.nan
return ret
@ -243,7 +255,7 @@ class ProbabilityDistribution(object):
:return:the entropy of the probability distribution
"""
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
h = -np.nansum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins])
return h
@ -255,7 +267,7 @@ class ProbabilityDistribution(object):
:param q: a probabilistic.ProbabilityDistribution object
:return: Cross entropy between this probability distribution and the given distribution
"""
h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
h = -np.nansum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins])
return h
@ -267,7 +279,7 @@ class ProbabilityDistribution(object):
:param q: a probabilistic.ProbabilityDistribution object
:return: Kullback-Leibler divergence
"""
h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
h = np.nansum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins])
return h

View File

@ -7,13 +7,12 @@ from pyFTS.partitioners import Grid, Simple, Entropy
from pyFTS.common import Util
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.common import Util as cUtil
from pyFTS.models import pwfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid
from pyFTS.common import Membership
@ -22,54 +21,45 @@ from pyFTS.common import Membership
import os
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_uv = dataset['glo_avg'].values[:24505]
test_uv = dataset['glo_avg'].values[24505:]
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from itertools import product
levels = ['VL', 'L', 'M', 'H', 'VH']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
'Jun','Jul', 'Aug','Sep','Oct',
'Nov','Dec']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
from pyFTS.data import SONDA, Malaysia
df = Malaysia.get_dataframe()
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
variables = {
"Hour": dict(data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
partitioner_specific=sp, alpha_cut=.3),
"Temperature": dict(data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
alpha_cut=.25),
"Load": dict(data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
alpha_cut=.25)
}
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
order=2, knn=1)
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
#methods = [granular.GranularWMVFTS]
model.fit(train_mv)
generator = lambda x : x + pd.to_timedelta(1, unit='h')
forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} )
parameters = [
{},{},
dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
order=1, knn=1)
]
print(forecasts)
bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25,
methods=methods,
methods_parameters=parameters,
variables=variables,
target_variable='Load',
type='interval',
steps_ahead=[1],
distributed=False,
nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="experiments.db", dataset='Malaysia',
tag="experiments"
)
'''

View File

@ -14,71 +14,68 @@ from pyFTS.models import pwfts,hofts,ifts
from pyFTS.models.multivariate import granular, grid
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.common import Membership
from pyFTS.data import SONDA, Malaysia
def sample_by_hour(data):
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
df = Malaysia.get_dataframe()
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
def sample_date_by_hour(data):
return [data[k] for k in np.arange(0,len(data),60)]
train_mv = df.iloc[:8000]
test_mv = df.iloc[8000:10000]
from pyFTS.data import SONDA
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
sonda = sonda.dropna()
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
var = {
'datahora': sample_date_by_hour(sonda['datahora'].values),
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
'ws_10m': sample_by_hour(sonda['ws_10m'].values),
}
df = pd.DataFrame(var)
train_mv = df.iloc[:9000]
test_mv = df.iloc[9000:10000]
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3])
sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
order = 1
knn = 1
vmonth.partitioner.plot(ax[0])
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind',
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
data=train_mv, alpha_cut=.25)
vwin.partitioner.plot(ax[1])
plt.tight_layout()
order = 3
knn = 2
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin,
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
order=order, knn=knn)
model.fit(train_mv)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
ax.plot(test_mv['ws_10m'].values[:100], label='original')
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=1)
temp_generator.fit(train_mv['temperature'].values)
forecasts = model.predict(test_mv.iloc[:100], type='distribution')
#print(model)
Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax)
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
#temp_generator = lambda x : x
generators = {'time': time_generator, 'temperature': temp_generator}
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
#
#forecasts1 = model.predict(test_mv, type='multivariate')
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
# steps_ahead=200)
'''
from pyFTS.data import Enrollments
train = Enrollments.get_data()
fs = Grid.GridPartitioner(data=train, npart=10) #, transformation=tdiff)
model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)
model.fit(train)
print(model)
print(model.predict(train))
'''