Minor bugfixes on pwfts.models
This commit is contained in:
parent
f3c6eda2ec
commit
33dbeb8965
@ -215,6 +215,17 @@ def pinball_mean(tau, targets, forecasts):
|
||||
print(ex)
|
||||
|
||||
|
||||
def brier_score(targets, densities):
|
||||
'''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
|
||||
ret = []
|
||||
for ct, d in enumerate(densities):
|
||||
v = d.bin_index.find_ge(targets[ct])
|
||||
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
|
||||
score += (d.distribution[v] - 1) ** 2
|
||||
ret.append(score)
|
||||
return sum(ret)/len(ret)
|
||||
|
||||
|
||||
def pmf_to_cdf(density):
|
||||
ret = []
|
||||
for row in density.index:
|
||||
@ -236,7 +247,6 @@ def heavyside_cdf(bins, targets):
|
||||
df = pd.DataFrame(ret, columns=bins)
|
||||
return df
|
||||
|
||||
|
||||
def crps(targets, densities):
|
||||
'''
|
||||
Continuous Ranked Probability Score
|
||||
@ -277,6 +287,7 @@ def get_point_statistics(data, model, **kwargs):
|
||||
'''
|
||||
|
||||
steps_ahead = kwargs.get('steps_ahead',1)
|
||||
kwargs['type'] = 'point'
|
||||
|
||||
indexer = kwargs.get('indexer', None)
|
||||
|
||||
@ -301,7 +312,7 @@ def get_point_statistics(data, model, **kwargs):
|
||||
nforecasts = []
|
||||
for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler):
|
||||
sample = ndata[k - model.order: k]
|
||||
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
|
||||
tmp = model.predict(sample, **kwargs)
|
||||
nforecasts.append(tmp[-1])
|
||||
|
||||
start = model.order + steps_ahead -1
|
||||
@ -323,6 +334,7 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
'''
|
||||
|
||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||
kwargs['type'] = 'interval'
|
||||
|
||||
ret = list()
|
||||
|
||||
@ -339,7 +351,7 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
forecasts = []
|
||||
for k in np.arange(model.order, len(data) - steps_ahead):
|
||||
sample = data[k - model.order: k]
|
||||
tmp = model.predict(sample, steps_ahead, **kwargs)
|
||||
tmp = model.predict(sample, **kwargs)
|
||||
forecasts.append(tmp[-1])
|
||||
|
||||
start = model.order + steps_ahead -1
|
||||
@ -362,12 +374,13 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
:return: a list with the CRPS and execution time
|
||||
'''
|
||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||
kwargs['type'] = 'distribution'
|
||||
|
||||
ret = list()
|
||||
|
||||
if steps_ahead == 1:
|
||||
_s1 = time.time()
|
||||
forecasts = model.forecast_distribution(data, **kwargs)
|
||||
forecasts = model.predict(data, **kwargs)
|
||||
_e1 = time.time()
|
||||
ret.append(round(crps(data, forecasts), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
@ -377,7 +390,7 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
_s1 = time.time()
|
||||
for k in np.arange(model.order, len(data) - steps_ahead, skip):
|
||||
sample = data[k - model.order: k]
|
||||
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
|
||||
tmp = model.predict(sample, **kwargs)
|
||||
forecasts.append(tmp[-1])
|
||||
_e1 = time.time()
|
||||
|
||||
|
@ -56,10 +56,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
|
||||
steps ahead and FTS method a foreasting model will be trained.
|
||||
|
||||
Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for
|
||||
posterior analysis.
|
||||
Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
|
||||
(identified by the 'file' parameter) for posterior analysis.
|
||||
|
||||
The number of experiments is determined by the windowsize and inc.
|
||||
All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
|
||||
informing the list of dispy nodes on 'nodes' parameter.
|
||||
|
||||
The number of experiments is determined by 'windowsize' and 'inc' parameters.
|
||||
|
||||
:param data: test data
|
||||
:param windowsize: size of sliding window
|
||||
@ -67,35 +70,31 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
:param kwargs: dict, optional arguments
|
||||
|
||||
:keyword
|
||||
benchmark_methods: a list with Non FTS models to benchmark. The default is None.
|
||||
benchmark_methods_parameters: a list with Non FTS models parameters. The default is None.
|
||||
dataset: the dataset name to identify the current set of benchmarks results on database.
|
||||
distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
|
||||
file: file path to save the results. The default is benchmarks.db.
|
||||
inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
|
||||
models: a list with prebuilt FTS objects. The default is None.
|
||||
methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
|
||||
models: a list with prebuilt FTS objects. The default is None.
|
||||
nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
|
||||
orders: a list with orders of the models (for high order models). The default is [1,2,3].
|
||||
partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
|
||||
partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
|
||||
partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
|
||||
partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
|
||||
orders: a list with orders of the models (for high order models). The default is [1,2,3].
|
||||
type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point.
|
||||
steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
|
||||
start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
|
||||
transformation: data transformation . The default is None.
|
||||
indexer: seasonal indexer. . The default is None.
|
||||
progress: If true a progress bar will be displayed during the benchmarks. The default is False.
|
||||
distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
|
||||
nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
|
||||
benchmark_methods: a list with Non FTS models to benchmark. The default is None.
|
||||
benchmark_methods_parameters: a list with Non FTS models parameters. . The default is None.
|
||||
save: save results. The default is False.
|
||||
file: file path to save the results. The default is None.
|
||||
sintetic: if true only the average and standard deviation of the results. The de fault is False.
|
||||
|
||||
:return: DataFrame with the benchmark results
|
||||
start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
|
||||
steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
|
||||
tag: a name to identify the current set of benchmarks results on database.
|
||||
type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
|
||||
transformations: a list with data transformations do apply . The default is [None].
|
||||
"""
|
||||
|
||||
tag = __pop('tag', None, kwargs)
|
||||
dataset = __pop('dataset', None, kwargs)
|
||||
|
||||
distributed = __pop('distributed', False, kwargs)
|
||||
save = __pop('save', False, kwargs)
|
||||
|
||||
transformations = kwargs.get('transformations', [None])
|
||||
progress = kwargs.get('progress', None)
|
||||
|
@ -71,6 +71,7 @@ class Differential(Transformation):
|
||||
def inverse(self, data, param, **kwargs):
|
||||
|
||||
type = kwargs.get("type","point")
|
||||
steps_ahead = kwargs.get("steps_ahead", 1)
|
||||
|
||||
if isinstance(data, (np.ndarray, np.generic)):
|
||||
data = data.tolist()
|
||||
@ -83,14 +84,30 @@ class Differential(Transformation):
|
||||
# print(n)
|
||||
# print(len(param))
|
||||
|
||||
if type == "point":
|
||||
inc = [data[t] + param[t] for t in np.arange(0, n)]
|
||||
elif type == "interval":
|
||||
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
|
||||
elif type == "distribution":
|
||||
for t in np.arange(0, n):
|
||||
data[t].differential_offset(param[t])
|
||||
inc = data
|
||||
if steps_ahead == 1:
|
||||
if type == "point":
|
||||
inc = [data[t] + param[t] for t in np.arange(0, n)]
|
||||
elif type == "interval":
|
||||
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
|
||||
elif type == "distribution":
|
||||
for t in np.arange(0, n):
|
||||
data[t].differential_offset(param[t])
|
||||
inc = data
|
||||
else:
|
||||
if type == "point":
|
||||
inc = [data[0] + param[0]]
|
||||
for t in np.arange(1, steps_ahead):
|
||||
inc.append(data[t] + inc[t-1])
|
||||
elif type == "interval":
|
||||
inc = [[data[0][0] + param[0], data[0][1] + param[0]]]
|
||||
for t in np.arange(1, steps_ahead):
|
||||
inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])])
|
||||
elif type == "distribution":
|
||||
data[0].differential_offset(param[0])
|
||||
for t in np.arange(1, steps_ahead):
|
||||
ex = data[t-1].expected_value()
|
||||
data[t].differential_offset(ex)
|
||||
inc = data
|
||||
|
||||
if n == 1:
|
||||
return inc[0]
|
||||
|
@ -114,9 +114,9 @@ class FTS(object):
|
||||
|
||||
ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches)
|
||||
|
||||
if type != 'distribution' and not self.is_multivariate:
|
||||
interval = True if type == 'interval' else False
|
||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=interval)
|
||||
if not self.is_multivariate:
|
||||
kwargs['type'] = type
|
||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], **kwargs)
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -50,7 +50,10 @@ class ConventionalFTS(fts.FTS):
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
|
@ -97,6 +97,8 @@ class HighOrderFTS(fts.FTS):
|
||||
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
self.generate_flrg(data)
|
||||
|
||||
|
@ -63,8 +63,10 @@ class ImprovedWeightedFTS(fts.FTS):
|
||||
def train(self, ndata, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method="maximum")
|
||||
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
|
@ -69,6 +69,9 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
self.c = kwargs.get('parameters', default_c)
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generate_flrg(flrs, self.c)
|
||||
@ -78,7 +81,7 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
|
||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
||||
|
||||
data = np.array(data)
|
||||
data = np.array(ndata)
|
||||
|
||||
l = len(ndata)
|
||||
|
||||
|
@ -39,7 +39,7 @@ class ConventionalFTS(fts.FTS):
|
||||
|
||||
def operation_matrix(self, flrs):
|
||||
l = len(self.sets)
|
||||
if self.R is None:
|
||||
if self.R is None or len(self.R) == 0 :
|
||||
self.R = np.zeros((l, l))
|
||||
for k in flrs:
|
||||
mm = self.flr_membership_matrix(k)
|
||||
@ -51,6 +51,8 @@ class ConventionalFTS(fts.FTS):
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
|
@ -60,8 +60,10 @@ class WeightedFTS(fts.FTS):
|
||||
def train(self, ndata, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generate_FLRG(flrs)
|
||||
|
||||
|
@ -15,33 +15,37 @@ from pyFTS.data import TAIEX
|
||||
|
||||
dataset = TAIEX.get_data()
|
||||
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
|
||||
|
||||
from pyFTS.models import pwfts
|
||||
|
||||
'''
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
||||
|
||||
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
|
||||
#model.append_transformation(tdiff)
|
||||
model.append_transformation(tdiff)
|
||||
model.fit(dataset[:800])
|
||||
print(model.predict(dataset[800:1000], type='interval'))
|
||||
|
||||
|
||||
print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
|
||||
#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
|
||||
#for tmp2 in tmp:
|
||||
# print(tmp2)
|
||||
'''
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
|
||||
#'''
|
||||
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
|
||||
#methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
#transformations=[tdiff],
|
||||
orders=[1, 2, 3],
|
||||
partitions=np.arange(10, 100, 5),
|
||||
progress=False, type='distribution',
|
||||
orders=[1], #[1, 2, 3],
|
||||
partitions=[20], #np.arange(10, 100, 5),
|
||||
progress=True, type='point',
|
||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'],
|
||||
file="benchmarks.db", dataset="TAIEX", tag="partitioning")
|
||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
|
||||
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
|
||||
#save=True, file="tmp.db")
|
||||
|
||||
|
||||
'''
|
||||
#'''
|
||||
'''
|
||||
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||
print(bUtil.analytic_tabular_dataframe(dat))
|
||||
|
Loading…
Reference in New Issue
Block a user