Improvements for numerical stability
This commit is contained in:
parent
de82af1b9d
commit
35898f338a
@ -89,7 +89,7 @@ def smape(targets, forecasts, type=2):
|
|||||||
elif type == 2:
|
elif type == 2:
|
||||||
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
|
return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
|
||||||
else:
|
else:
|
||||||
return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
|
return np.nansum(np.abs(forecasts - targets)) / np.nansum(forecasts + targets)
|
||||||
|
|
||||||
|
|
||||||
def mape_interval(targets, forecasts):
|
def mape_interval(targets, forecasts):
|
||||||
@ -124,7 +124,7 @@ def UStatistic(targets, forecasts):
|
|||||||
for k in np.arange(0, l - 1):
|
for k in np.arange(0, l - 1):
|
||||||
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
|
y.append(np.subtract(forecasts[k], targets[k]) ** 2)
|
||||||
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
|
naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
|
||||||
return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
|
return np.sqrt(np.divide(np.nansum(y), np.nansum(naive)))
|
||||||
|
|
||||||
|
|
||||||
def TheilsInequality(targets, forecasts):
|
def TheilsInequality(targets, forecasts):
|
||||||
@ -137,9 +137,9 @@ def TheilsInequality(targets, forecasts):
|
|||||||
"""
|
"""
|
||||||
res = targets - forecasts
|
res = targets - forecasts
|
||||||
t = len(res)
|
t = len(res)
|
||||||
us = np.sqrt(sum([u ** 2 for u in res]))
|
us = np.sqrt(np.nansum([u ** 2 for u in res]))
|
||||||
ys = np.sqrt(sum([y ** 2 for y in targets]))
|
ys = np.sqrt(np.nansum([y ** 2 for y in targets]))
|
||||||
fs = np.sqrt(sum([f ** 2 for f in forecasts]))
|
fs = np.sqrt(np.nansum([f ** 2 for f in forecasts]))
|
||||||
return us / (ys + fs)
|
return us / (ys + fs)
|
||||||
|
|
||||||
|
|
||||||
@ -255,12 +255,12 @@ def brier_score(targets, densities):
|
|||||||
try:
|
try:
|
||||||
v = d.bin_index.find_le(targets[ct])
|
v = d.bin_index.find_le(targets[ct])
|
||||||
|
|
||||||
score = sum([d.density(k) ** 2 for k in d.bins if k != v])
|
score = np.nansum([d.density(k) ** 2 for k in d.bins if k != v])
|
||||||
score += (d.density(v) - 1) ** 2
|
score += (d.density(v) - 1) ** 2
|
||||||
ret.append(score)
|
ret.append(score)
|
||||||
except ValueError as ex:
|
except ValueError as ex:
|
||||||
ret.append(sum([d.density(k) ** 2 for k in d.bins]))
|
ret.append(np.nansum([d.density(k) ** 2 for k in d.bins]))
|
||||||
return sum(ret) / len(ret)
|
return np.nansum(ret) / len(ret)
|
||||||
|
|
||||||
|
|
||||||
def logarithm_score(targets, densities):
|
def logarithm_score(targets, densities):
|
||||||
@ -301,7 +301,7 @@ def crps(targets, densities):
|
|||||||
|
|
||||||
n = len(densities)
|
n = len(densities)
|
||||||
for ct, df in enumerate(densities):
|
for ct, df in enumerate(densities):
|
||||||
_crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
_crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||||
|
|
||||||
return _crps / n
|
return _crps / n
|
||||||
|
|
||||||
@ -419,6 +419,8 @@ def get_interval_statistics(data, model, **kwargs):
|
|||||||
forecasts = model.predict(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
|
if model.is_multivariate:
|
||||||
|
data = data[model.target_variable.data_label].values
|
||||||
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
@ -436,6 +438,8 @@ def get_interval_statistics(data, model, **kwargs):
|
|||||||
start = model.max_lag + steps_ahead - 1
|
start = model.max_lag + steps_ahead - 1
|
||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
|
if model.is_multivariate:
|
||||||
|
data = data[model.target_variable.data_label].values
|
||||||
ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
|
ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
|
||||||
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
|
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
|
||||||
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
|
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
|
||||||
@ -502,6 +506,8 @@ def get_distribution_statistics(data, model, **kwargs):
|
|||||||
_s1 = time.time()
|
_s1 = time.time()
|
||||||
forecasts = model.predict(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
_e1 = time.time()
|
_e1 = time.time()
|
||||||
|
if model.is_multivariate:
|
||||||
|
data = data[model.target_variable.data_label].values
|
||||||
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
|
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
|
||||||
ret.append(round(_e1 - _s1, 3))
|
ret.append(round(_e1 - _s1, 3))
|
||||||
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
|
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
|
||||||
@ -516,6 +522,8 @@ def get_distribution_statistics(data, model, **kwargs):
|
|||||||
_e1 = time.time()
|
_e1 = time.time()
|
||||||
|
|
||||||
start = model.max_lag + steps_ahead
|
start = model.max_lag + steps_ahead
|
||||||
|
if model.is_multivariate:
|
||||||
|
data = data[model.target_variable.data_label].values
|
||||||
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
|
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
|
||||||
ret.append(round(_e1 - _s1, 3))
|
ret.append(round(_e1 - _s1, 3))
|
||||||
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
|
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
|
||||||
|
@ -81,6 +81,105 @@ def get_benchmark_probabilistic_methods():
|
|||||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||||
|
|
||||||
|
|
||||||
|
def multivariate_sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
||||||
|
from pyFTS.models.multivariate import common, variable, mvfts
|
||||||
|
|
||||||
|
tag = __pop('tag', None, kwargs)
|
||||||
|
dataset = __pop('dataset', None, kwargs)
|
||||||
|
|
||||||
|
distributed = __pop('distributed', False, kwargs)
|
||||||
|
|
||||||
|
variables = __pop('variables', {}, kwargs)
|
||||||
|
|
||||||
|
target_variable = __pop('target_variable', '', kwargs)
|
||||||
|
|
||||||
|
type = kwargs.get("type", 'point')
|
||||||
|
|
||||||
|
steps_ahead = __pop('steps_ahead', [1], kwargs)
|
||||||
|
|
||||||
|
steps_ahead = [k for k in steps_ahead]
|
||||||
|
|
||||||
|
fts_methods = __pop('methods', [], kwargs)
|
||||||
|
|
||||||
|
if fts_methods is not None:
|
||||||
|
methods_parameters = __pop('methods_parameters', None, kwargs)
|
||||||
|
|
||||||
|
if type == 'point':
|
||||||
|
experiment_method = mv_run_point2
|
||||||
|
synthesis_method = process_point_jobs2
|
||||||
|
elif type == 'interval':
|
||||||
|
experiment_method = mv_run_interval2
|
||||||
|
synthesis_method = process_interval_jobs2
|
||||||
|
elif type == 'distribution':
|
||||||
|
experiment_method = mv_run_probabilistic2
|
||||||
|
synthesis_method = process_probabilistic_jobs2
|
||||||
|
else:
|
||||||
|
raise ValueError("Type parameter has a unkown value!")
|
||||||
|
|
||||||
|
if distributed:
|
||||||
|
import pyFTS.distributed.dispy as dispy
|
||||||
|
|
||||||
|
nodes = kwargs.get("nodes", ['127.0.0.1'])
|
||||||
|
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
|
||||||
|
|
||||||
|
inc = __pop("inc", 0.1, kwargs)
|
||||||
|
|
||||||
|
file = kwargs.get('file', "benchmarks.db")
|
||||||
|
|
||||||
|
conn = bUtil.open_benchmark_db(file)
|
||||||
|
|
||||||
|
jobs = []
|
||||||
|
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||||
|
for id, fts_method in enumerate(fts_methods):
|
||||||
|
|
||||||
|
kwargs['steps_ahead'] = max(steps_ahead)
|
||||||
|
parameters = {}
|
||||||
|
if methods_parameters is not None:
|
||||||
|
parameters = methods_parameters[id]
|
||||||
|
|
||||||
|
vars = []
|
||||||
|
tvar = None
|
||||||
|
|
||||||
|
for key, value in variables.items():
|
||||||
|
var = variable.Variable(key, data=train, **value)
|
||||||
|
vars.append(var)
|
||||||
|
if key == target_variable:
|
||||||
|
tvar = var
|
||||||
|
|
||||||
|
|
||||||
|
model = fts_method(explanatory_variables=vars, target_variable=tvar,
|
||||||
|
**parameters)
|
||||||
|
|
||||||
|
if not distributed:
|
||||||
|
try:
|
||||||
|
job = experiment_method(model, train, test, ct, **kwargs)
|
||||||
|
synthesis_method(dataset, tag, job, conn)
|
||||||
|
except Exception as ex:
|
||||||
|
print('EXCEPTION! ', fts_method)
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
job = cluster.submit(model, train, test, ct, **kwargs)
|
||||||
|
job.id = id
|
||||||
|
jobs.append(job)
|
||||||
|
|
||||||
|
if distributed:
|
||||||
|
for job in jobs:
|
||||||
|
job()
|
||||||
|
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
|
||||||
|
tmp = job.result
|
||||||
|
synthesis_method(dataset, tag, tmp, conn)
|
||||||
|
else:
|
||||||
|
print("status", job.status)
|
||||||
|
print("result", job.result)
|
||||||
|
print("stdout", job.stdout)
|
||||||
|
print("stderr", job.exception)
|
||||||
|
|
||||||
|
cluster.wait() # wait for all jobs to finish
|
||||||
|
dispy.stop_dispy_cluster(cluster, http_server)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
||||||
tag = __pop('tag', None, kwargs)
|
tag = __pop('tag', None, kwargs)
|
||||||
dataset = __pop('dataset', None, kwargs)
|
dataset = __pop('dataset', None, kwargs)
|
||||||
@ -630,6 +729,63 @@ def __build_model(fts_method, order, parameters, partitioner_method, partitions,
|
|||||||
return mfts, pttr
|
return mfts, pttr
|
||||||
|
|
||||||
|
|
||||||
|
def mv_run_point2(mfts, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
import time
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||||
|
|
||||||
|
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
if steps_ahead == 1:
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
|
||||||
|
'steps': steps_ahead, 'method': method}
|
||||||
|
else:
|
||||||
|
_start = time.time()
|
||||||
|
forecasts = mfts.predict(test_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order + steps_ahead], forecasts)
|
||||||
|
|
||||||
|
for key in eval.keys():
|
||||||
|
eval[key]["time"] = times
|
||||||
|
eval[key]["method"] = method
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||||
|
'ahead_results': eval
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
|
||||||
import time
|
import time
|
||||||
@ -698,6 +854,67 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def mv_run_interval2(mfts,train_data, test_data, window_key=None, **kwargs):
|
||||||
|
import time
|
||||||
|
from pyFTS.models import hofts,ifts,pwfts
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS ]
|
||||||
|
|
||||||
|
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
parameters = kwargs.get('parameters',{})
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
if steps_ahead == 1:
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
|
||||||
|
'time': times,'pinball05': metrics[3], 'pinball25': metrics[4], 'pinball75': metrics[5], 'pinball95': metrics[6],
|
||||||
|
'winkler05': metrics[7], 'winkler25': metrics[8],
|
||||||
|
'window': window_key,'steps': steps_ahead, 'method': method}
|
||||||
|
else:
|
||||||
|
_start = time.time()
|
||||||
|
intervals = mfts.predict(test_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
eval = Measures.get_interval_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], intervals)
|
||||||
|
|
||||||
|
for key in eval.keys():
|
||||||
|
eval[key]["time"] = times
|
||||||
|
eval[key]["method"] = method
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||||
|
'ahead_results': eval
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
import time
|
import time
|
||||||
from pyFTS.models import hofts,ifts,pwfts
|
from pyFTS.models import hofts,ifts,pwfts
|
||||||
@ -760,6 +977,63 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def mv_run_probabilistic2(mfts, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
import time
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
from pyFTS.models.multivariate import mvfts, wmvfts, granular
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||||
|
|
||||||
|
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
parameters = kwargs.get('parameters', {})
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
if steps_ahead == 1:
|
||||||
|
|
||||||
|
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
|
||||||
|
times += _t1
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'crps': _crps1, 'brier': _brier, 'window': window_key,
|
||||||
|
'steps': steps_ahead, 'method': method}
|
||||||
|
else:
|
||||||
|
_start = time.time()
|
||||||
|
distributions = mfts.predict(test_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
eval = Measures.get_distribution_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], distributions)
|
||||||
|
|
||||||
|
for key in eval.keys():
|
||||||
|
eval[key]["time"] = times
|
||||||
|
eval[key]["method"] = method
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None,
|
||||||
|
'transformation': None,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'window': window_key, 'steps': steps_ahead, 'method': method,
|
||||||
|
'ahead_results': eval
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -202,6 +202,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
|||||||
scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)
|
scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)
|
||||||
|
|
||||||
for ct in np.arange(1, int(lq / 2) + 1):
|
for ct in np.arange(1, int(lq / 2) + 1):
|
||||||
|
try:
|
||||||
y = [[data[start_at], data[start_at]]]
|
y = [[data[start_at], data[start_at]]]
|
||||||
for pd in probabilitydist:
|
for pd in probabilitydist:
|
||||||
qts = pd.quantile([qt[ct - 1], qt[-ct]])
|
qts = pd.quantile([qt[ct - 1], qt[-ct]])
|
||||||
@ -209,6 +210,8 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
|||||||
|
|
||||||
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
|
ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
|
||||||
facecolor=scalarMap.to_rgba(ct / lq))
|
facecolor=scalarMap.to_rgba(ct / lq))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if kwargs.get('median',True):
|
if kwargs.get('median',True):
|
||||||
y = [data[start_at]]
|
y = [data[start_at]]
|
||||||
|
@ -110,7 +110,7 @@ class EnsembleFTS(fts.FTS):
|
|||||||
ret = np.nanpercentile(forecasts, 50)
|
ret = np.nanpercentile(forecasts, 50)
|
||||||
elif self.point_method == 'quantile':
|
elif self.point_method == 'quantile':
|
||||||
alpha = kwargs.get("alpha",0.05)
|
alpha = kwargs.get("alpha",0.05)
|
||||||
ret = np.percentile(forecasts, alpha*100)
|
ret = np.nanpercentile(forecasts, alpha*100)
|
||||||
elif self.point_method == 'exponential':
|
elif self.point_method == 'exponential':
|
||||||
l = len(self.models)
|
l = len(self.models)
|
||||||
if l == 1:
|
if l == 1:
|
||||||
|
@ -80,9 +80,9 @@ class IntervalFTS(hofts.HighOrderFTS):
|
|||||||
affected_flrgs_memberships.append(mv)
|
affected_flrgs_memberships.append(mv)
|
||||||
|
|
||||||
# gerar o intervalo
|
# gerar o intervalo
|
||||||
norm = sum(affected_flrgs_memberships)
|
norm = np.nansum(affected_flrgs_memberships)
|
||||||
lo_ = sum(lo) / norm
|
lo_ = np.nansum(lo) / norm
|
||||||
up_ = sum(up) / norm
|
up_ = np.nansum(up) / norm
|
||||||
ret.append([lo_, up_])
|
ret.append([lo_, up_])
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
@ -165,9 +165,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
|||||||
affected_flrgs_memberships.append(mv)
|
affected_flrgs_memberships.append(mv)
|
||||||
|
|
||||||
# gerar o intervalo
|
# gerar o intervalo
|
||||||
norm = sum(affected_flrgs_memberships)
|
norm = np.nansum(affected_flrgs_memberships)
|
||||||
lo_ = sum(lo) / norm
|
lo_ = np.nansum(lo) / norm
|
||||||
up_ = sum(up) / norm
|
up_ = np.nansum(up) / norm
|
||||||
ret.append([lo_, up_])
|
ret.append([lo_, up_])
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
@ -43,7 +43,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
|
|||||||
for fset, mv in val:
|
for fset, mv in val:
|
||||||
num.append(self.sets[fset].centroid * mv)
|
num.append(self.sets[fset].centroid * mv)
|
||||||
den.append(mv)
|
den.append(mv)
|
||||||
ret.append(np.sum(num) / np.sum(den))
|
ret.append(np.nansum(num) / np.nansum(den))
|
||||||
elif mode == 'both':
|
elif mode == 'both':
|
||||||
num = np.mean([self.sets[fset].centroid for fset in val])
|
num = np.mean([self.sets[fset].centroid for fset in val])
|
||||||
ret.append(num)
|
ret.append(num)
|
||||||
@ -53,7 +53,7 @@ class GridCluster(partitioner.MultivariatePartitioner):
|
|||||||
for fset, mv in enumerate(val):
|
for fset, mv in enumerate(val):
|
||||||
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
||||||
den.append(mv)
|
den.append(mv)
|
||||||
ret.append(np.sum(num) / np.sum(den))
|
ret.append(np.nansum(num) / np.nansum(den))
|
||||||
else:
|
else:
|
||||||
raise Exception('Unknown deffuzyfication mode')
|
raise Exception('Unknown deffuzyfication mode')
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ class MVFTS(fts.FTS):
|
|||||||
mv = np.array(mvs)
|
mv = np.array(mvs)
|
||||||
mp = np.array(mps)
|
mp = np.array(mps)
|
||||||
|
|
||||||
ret.append(np.dot(mv,mp.T)/np.sum(mv))
|
ret.append(np.dot(mv,mp.T)/np.nansum(mv))
|
||||||
|
|
||||||
ret = self.target_variable.apply_inverse_transformations(ret,
|
ret = self.target_variable.apply_inverse_transformations(ret,
|
||||||
params=data[self.target_variable.data_label].values)
|
params=data[self.target_variable.data_label].values)
|
||||||
|
@ -73,21 +73,21 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
|||||||
def get_midpoint(self, sets):
|
def get_midpoint(self, sets):
|
||||||
'''Return the expectation of the PWFLRG, the weighted sum'''
|
'''Return the expectation of the PWFLRG, the weighted sum'''
|
||||||
if self.midpoint is None:
|
if self.midpoint is None:
|
||||||
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
||||||
for s in self.RHS.keys()]))
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.midpoint
|
return self.midpoint
|
||||||
|
|
||||||
def get_upper(self, sets):
|
def get_upper(self, sets):
|
||||||
if self.upper is None:
|
if self.upper is None:
|
||||||
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
|
self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
|
||||||
for s in self.RHS.keys()]))
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.upper
|
return self.upper
|
||||||
|
|
||||||
def get_lower(self, sets):
|
def get_lower(self, sets):
|
||||||
if self.lower is None:
|
if self.lower is None:
|
||||||
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
|
self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
|
||||||
for s in self.RHS.keys()]))
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.lower
|
return self.lower
|
||||||
@ -159,7 +159,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
||||||
mvs.append(mv)
|
mvs.append(mv)
|
||||||
|
|
||||||
tmp_fq = sum([lhs_mv * kk for kk in mvs if kk > 0])
|
tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0])
|
||||||
|
|
||||||
self.global_frequency_count += tmp_fq
|
self.global_frequency_count += tmp_fq
|
||||||
|
|
||||||
@ -226,7 +226,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
|
||||||
mvs.append(mv)
|
mvs.append(mv)
|
||||||
|
|
||||||
tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0])
|
tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0])
|
||||||
|
|
||||||
self.global_frequency_count += tmp_fq
|
self.global_frequency_count += tmp_fq
|
||||||
|
|
||||||
@ -268,7 +268,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
else:
|
else:
|
||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
pi = 1 / len(flrg.LHS)
|
pi = 1 / len(flrg.LHS)
|
||||||
ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
|
ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
|
||||||
else:
|
else:
|
||||||
ret = np.nan
|
ret = np.nan
|
||||||
return ret
|
return ret
|
||||||
@ -282,10 +282,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
_set = self.partitioner.sets[s]
|
_set = self.partitioner.sets[s]
|
||||||
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
|
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
|
||||||
cond.append(tmp)
|
cond.append(tmp)
|
||||||
ret = sum(np.array(cond))
|
ret = np.nansum(np.array(cond))
|
||||||
else:
|
else:
|
||||||
pi = 1 / len(flrg.LHS)
|
pi = 1 / len(flrg.LHS)
|
||||||
ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
|
ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_upper(self, flrg):
|
def get_upper(self, flrg):
|
||||||
@ -307,12 +307,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
def forecast(self, data, **kwargs):
|
def forecast(self, data, **kwargs):
|
||||||
method = kwargs.get('method','heuristic')
|
method = kwargs.get('method','heuristic')
|
||||||
|
|
||||||
l = len(data)
|
l = len(data)+1
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for k in np.arange(self.max_lag - 1, l):
|
for k in np.arange(self.max_lag, l):
|
||||||
sample = data[k - (self.max_lag - 1): k + 1]
|
sample = data[k - self.max_lag: k]
|
||||||
|
|
||||||
if method == 'heuristic':
|
if method == 'heuristic':
|
||||||
ret.append(self.point_heuristic(sample, **kwargs))
|
ret.append(self.point_heuristic(sample, **kwargs))
|
||||||
@ -360,9 +360,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
mp.append(norm * self.get_midpoint(flrg))
|
mp.append(norm * self.get_midpoint(flrg))
|
||||||
norms.append(norm)
|
norms.append(norm)
|
||||||
|
|
||||||
norm = sum(norms)
|
norm = np.nansum(norms)
|
||||||
|
|
||||||
final = sum(mp) / norm if norm != 0 else 0
|
final = np.nansum(mp) / norm if norm != 0 else 0
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Deffuzyfied value: {} \n".format(final))
|
print("Deffuzyfied value: {} \n".format(final))
|
||||||
@ -431,12 +431,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
norms.append(norm)
|
norms.append(norm)
|
||||||
|
|
||||||
# gerar o intervalo
|
# gerar o intervalo
|
||||||
norm = sum(norms)
|
norm = np.nansum(norms)
|
||||||
if norm == 0:
|
if norm == 0:
|
||||||
return [0, 0]
|
return [0, 0]
|
||||||
else:
|
else:
|
||||||
lo_ = sum(lo) / norm
|
lo_ = np.nansum(lo) / norm
|
||||||
up_ = sum(up) / norm
|
up_ = np.nansum(up) / norm
|
||||||
return [lo_, up_]
|
return [lo_, up_]
|
||||||
|
|
||||||
def forecast_distribution(self, ndata, **kwargs):
|
def forecast_distribution(self, ndata, **kwargs):
|
||||||
@ -496,7 +496,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
else:
|
else:
|
||||||
num.append(0.0)
|
num.append(0.0)
|
||||||
den.append(0.000000001)
|
den.append(0.000000001)
|
||||||
pf = sum(num) / sum(den)
|
pf = np.nansum(num) / np.nansum(den)
|
||||||
|
|
||||||
dist.set(bin, pf)
|
dist.set(bin, pf)
|
||||||
|
|
||||||
|
@ -202,9 +202,9 @@ class Partitioner(object):
|
|||||||
raise Exception('Unknown deffuzyfication mode')
|
raise Exception('Unknown deffuzyfication mode')
|
||||||
|
|
||||||
if mode in ('both','vector'):
|
if mode in ('both','vector'):
|
||||||
return np.sum(num) / np.sum(den)
|
return np.nansum(num) / np.nansum(den)
|
||||||
else:
|
else:
|
||||||
return np.mean(num)
|
return np.nanmean(num)
|
||||||
|
|
||||||
def check_bounds(self, data):
|
def check_bounds(self, data):
|
||||||
"""
|
"""
|
||||||
|
@ -209,11 +209,17 @@ class ProbabilityDistribution(object):
|
|||||||
if isinstance(values, list):
|
if isinstance(values, list):
|
||||||
ret = []
|
ret = []
|
||||||
for val in values:
|
for val in values:
|
||||||
|
try:
|
||||||
k = self.bin_index.find_ge(val)
|
k = self.bin_index.find_ge(val)
|
||||||
ret.append(self.cdf[k])
|
ret.append(self.cdf[k])
|
||||||
|
except:
|
||||||
|
ret.append(np.nan)
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
k = self.bin_index.find_ge(values)
|
k = self.bin_index.find_ge(values)
|
||||||
return self.cdf[values]
|
return self.cdf[values]
|
||||||
|
except:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
def quantile(self, values):
|
def quantile(self, values):
|
||||||
"""
|
"""
|
||||||
@ -229,11 +235,17 @@ class ProbabilityDistribution(object):
|
|||||||
if isinstance(values, list):
|
if isinstance(values, list):
|
||||||
ret = []
|
ret = []
|
||||||
for val in values:
|
for val in values:
|
||||||
|
try:
|
||||||
k = self.quantile_index.find_ge(val)
|
k = self.quantile_index.find_ge(val)
|
||||||
ret.append(self.qtl[str(k)][0])
|
ret.append(self.qtl[str(k)][0])
|
||||||
|
except:
|
||||||
|
ret.append(np.nan)
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
k = self.quantile_index.find_ge(values)
|
k = self.quantile_index.find_ge(values)
|
||||||
ret = self.qtl[str(k)]
|
ret = self.qtl[str(k)]
|
||||||
|
except:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -243,7 +255,7 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
:return:the entropy of the probability distribution
|
:return:the entropy of the probability distribution
|
||||||
"""
|
"""
|
||||||
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
h = -np.nansum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
|
||||||
for k in self.bins])
|
for k in self.bins])
|
||||||
return h
|
return h
|
||||||
|
|
||||||
@ -255,7 +267,7 @@ class ProbabilityDistribution(object):
|
|||||||
:param q: a probabilistic.ProbabilityDistribution object
|
:param q: a probabilistic.ProbabilityDistribution object
|
||||||
:return: Cross entropy between this probability distribution and the given distribution
|
:return: Cross entropy between this probability distribution and the given distribution
|
||||||
"""
|
"""
|
||||||
h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
|
h = -np.nansum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||||
for k in self.bins])
|
for k in self.bins])
|
||||||
return h
|
return h
|
||||||
|
|
||||||
@ -267,7 +279,7 @@ class ProbabilityDistribution(object):
|
|||||||
:param q: a probabilistic.ProbabilityDistribution object
|
:param q: a probabilistic.ProbabilityDistribution object
|
||||||
:return: Kullback-Leibler divergence
|
:return: Kullback-Leibler divergence
|
||||||
"""
|
"""
|
||||||
h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
|
h = np.nansum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
|
||||||
for k in self.bins])
|
for k in self.bins])
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
@ -7,13 +7,12 @@ from pyFTS.partitioners import Grid, Simple, Entropy
|
|||||||
from pyFTS.common import Util
|
from pyFTS.common import Util
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
||||||
from pyFTS.benchmarks import Measures
|
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||||
from pyFTS.common import Util as cUtil
|
from pyFTS.common import Util as cUtil
|
||||||
|
from pyFTS.models import pwfts
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
from pyFTS.models.multivariate import common, variable, mvfts
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.common import Membership
|
from pyFTS.common import Membership
|
||||||
@ -22,54 +21,45 @@ from pyFTS.common import Membership
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
from pyFTS.data import SONDA, Malaysia
|
||||||
|
|
||||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
train_uv = dataset['glo_avg'].values[:24505]
|
|
||||||
test_uv = dataset['glo_avg'].values[24505:]
|
|
||||||
|
|
||||||
train_mv = dataset.iloc[:24505]
|
|
||||||
test_mv = dataset.iloc[24505:]
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
|
||||||
|
|
||||||
from itertools import product
|
|
||||||
|
|
||||||
levels = ['VL', 'L', 'M', 'H', 'VH']
|
|
||||||
sublevels = [str(k) for k in np.arange(0, 7)]
|
|
||||||
names = []
|
|
||||||
for combination in product(*[levels, sublevels]):
|
|
||||||
names.append(combination[0] + combination[1])
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
|
|
||||||
'Jun','Jul', 'Aug','Sep','Oct',
|
|
||||||
'Nov','Dec']}
|
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
|
||||||
data=train_mv, partitioner_specific=sp)
|
|
||||||
|
|
||||||
|
df = Malaysia.get_dataframe()
|
||||||
|
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
variables = {
|
||||||
data=train_mv, partitioner_specific=sp)
|
"Hour": dict(data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
|
partitioner_specific=sp, alpha_cut=.3),
|
||||||
|
"Temperature": dict(data_label="temperature", alias='temp',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
|
||||||
|
alpha_cut=.25),
|
||||||
|
"Load": dict(data_label="load", alias='load',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf,
|
||||||
|
alpha_cut=.25)
|
||||||
|
}
|
||||||
|
|
||||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
||||||
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
|
#methods = [granular.GranularWMVFTS]
|
||||||
data=train_mv)
|
|
||||||
fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
|
||||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs,
|
|
||||||
order=2, knn=1)
|
|
||||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
|
||||||
|
|
||||||
model.fit(train_mv)
|
parameters = [
|
||||||
generator = lambda x : x + pd.to_timedelta(1, unit='h')
|
{},{},
|
||||||
forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} )
|
dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
||||||
|
order=1, knn=1)
|
||||||
|
]
|
||||||
|
|
||||||
print(forecasts)
|
bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25,
|
||||||
|
methods=methods,
|
||||||
|
methods_parameters=parameters,
|
||||||
|
variables=variables,
|
||||||
|
target_variable='Load',
|
||||||
|
type='interval',
|
||||||
|
steps_ahead=[1],
|
||||||
|
distributed=False,
|
||||||
|
nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
||||||
|
file="experiments.db", dataset='Malaysia',
|
||||||
|
tag="experiments"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -14,71 +14,68 @@ from pyFTS.models import pwfts,hofts,ifts
|
|||||||
from pyFTS.models.multivariate import granular, grid
|
from pyFTS.models.multivariate import granular, grid
|
||||||
from pyFTS.partitioners import Grid, Util as pUtil
|
from pyFTS.partitioners import Grid, Util as pUtil
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
from pyFTS.common import Membership
|
from pyFTS.common import Membership
|
||||||
|
|
||||||
from pyFTS.data import SONDA, Malaysia
|
def sample_by_hour(data):
|
||||||
|
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
df = Malaysia.get_dataframe()
|
def sample_date_by_hour(data):
|
||||||
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
return [data[k] for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
train_mv = df.iloc[:8000]
|
from pyFTS.data import SONDA
|
||||||
test_mv = df.iloc[8000:10000]
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
||||||
|
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
||||||
|
sonda = sonda.dropna()
|
||||||
|
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
|
var = {
|
||||||
|
'datahora': sample_date_by_hour(sonda['datahora'].values),
|
||||||
|
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
|
||||||
|
'ws_10m': sample_by_hour(sonda['ws_10m'].values),
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.DataFrame(var)
|
||||||
|
|
||||||
|
train_mv = df.iloc[:9000]
|
||||||
|
test_mv = df.iloc[9000:10000]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3])
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']}
|
||||||
|
|
||||||
|
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
||||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
|
|
||||||
data=train_mv, alpha_cut=.3)
|
|
||||||
vload = variable.Variable("Load", data_label="load", alias='load',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf,
|
|
||||||
data=train_mv, alpha_cut=.3)
|
|
||||||
|
|
||||||
order = 1
|
vmonth.partitioner.plot(ax[0])
|
||||||
knn = 1
|
|
||||||
|
|
||||||
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
|
vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
|
||||||
|
data=train_mv, alpha_cut=.25)
|
||||||
|
|
||||||
|
vwin.partitioner.plot(ax[1])
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
order = 3
|
||||||
|
knn = 2
|
||||||
|
|
||||||
|
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin,
|
||||||
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
||||||
order=order, knn=knn)
|
order=order, knn=knn)
|
||||||
|
|
||||||
model.fit(train_mv)
|
model.fit(train_mv)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
|
||||||
|
ax.plot(test_mv['ws_10m'].values[:100], label='original')
|
||||||
|
|
||||||
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=1)
|
forecasts = model.predict(test_mv.iloc[:100], type='distribution')
|
||||||
temp_generator.fit(train_mv['temperature'].values)
|
|
||||||
|
|
||||||
#print(model)
|
Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax)
|
||||||
|
|
||||||
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
|
||||||
#temp_generator = lambda x : x
|
|
||||||
|
|
||||||
generators = {'time': time_generator, 'temperature': temp_generator}
|
|
||||||
|
|
||||||
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
|
|
||||||
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
|
|
||||||
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
#forecasts1 = model.predict(test_mv, type='multivariate')
|
|
||||||
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
|
|
||||||
# steps_ahead=200)
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
from pyFTS.data import Enrollments
|
|
||||||
train = Enrollments.get_data()
|
|
||||||
|
|
||||||
fs = Grid.GridPartitioner(data=train, npart=10) #, transformation=tdiff)
|
|
||||||
|
|
||||||
model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)
|
|
||||||
model.fit(train)
|
|
||||||
print(model)
|
|
||||||
|
|
||||||
print(model.predict(train))
|
|
||||||
'''
|
|
Loading…
Reference in New Issue
Block a user