Minor bugfixes on pwfts.models

This commit is contained in:
Petrônio Cândido 2018-04-24 12:57:40 -03:00
parent f3c6eda2ec
commit 33dbeb8965
11 changed files with 102 additions and 55 deletions

View File

@ -215,6 +215,17 @@ def pinball_mean(tau, targets, forecasts):
print(ex) print(ex)
def brier_score(targets, densities):
'''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 13. '''
ret = []
for ct, d in enumerate(densities):
v = d.bin_index.find_ge(targets[ct])
score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
score += (d.distribution[v] - 1) ** 2
ret.append(score)
return sum(ret)/len(ret)
def pmf_to_cdf(density): def pmf_to_cdf(density):
ret = [] ret = []
for row in density.index: for row in density.index:
@ -236,7 +247,6 @@ def heavyside_cdf(bins, targets):
df = pd.DataFrame(ret, columns=bins) df = pd.DataFrame(ret, columns=bins)
return df return df
def crps(targets, densities): def crps(targets, densities):
''' '''
Continuous Ranked Probability Score Continuous Ranked Probability Score
@ -277,6 +287,7 @@ def get_point_statistics(data, model, **kwargs):
''' '''
steps_ahead = kwargs.get('steps_ahead',1) steps_ahead = kwargs.get('steps_ahead',1)
kwargs['type'] = 'point'
indexer = kwargs.get('indexer', None) indexer = kwargs.get('indexer', None)
@ -301,7 +312,7 @@ def get_point_statistics(data, model, **kwargs):
nforecasts = [] nforecasts = []
for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler): for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler):
sample = ndata[k - model.order: k] sample = ndata[k - model.order: k]
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs) tmp = model.predict(sample, **kwargs)
nforecasts.append(tmp[-1]) nforecasts.append(tmp[-1])
start = model.order + steps_ahead -1 start = model.order + steps_ahead -1
@ -323,6 +334,7 @@ def get_interval_statistics(data, model, **kwargs):
''' '''
steps_ahead = kwargs.get('steps_ahead', 1) steps_ahead = kwargs.get('steps_ahead', 1)
kwargs['type'] = 'interval'
ret = list() ret = list()
@ -339,7 +351,7 @@ def get_interval_statistics(data, model, **kwargs):
forecasts = [] forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead): for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k] sample = data[k - model.order: k]
tmp = model.predict(sample, steps_ahead, **kwargs) tmp = model.predict(sample, **kwargs)
forecasts.append(tmp[-1]) forecasts.append(tmp[-1])
start = model.order + steps_ahead -1 start = model.order + steps_ahead -1
@ -362,12 +374,13 @@ def get_distribution_statistics(data, model, **kwargs):
:return: a list with the CRPS and execution time :return: a list with the CRPS and execution time
''' '''
steps_ahead = kwargs.get('steps_ahead', 1) steps_ahead = kwargs.get('steps_ahead', 1)
kwargs['type'] = 'distribution'
ret = list() ret = list()
if steps_ahead == 1: if steps_ahead == 1:
_s1 = time.time() _s1 = time.time()
forecasts = model.forecast_distribution(data, **kwargs) forecasts = model.predict(data, **kwargs)
_e1 = time.time() _e1 = time.time()
ret.append(round(crps(data, forecasts), 3)) ret.append(round(crps(data, forecasts), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
@ -377,7 +390,7 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time() _s1 = time.time()
for k in np.arange(model.order, len(data) - steps_ahead, skip): for k in np.arange(model.order, len(data) - steps_ahead, skip):
sample = data[k - model.order: k] sample = data[k - model.order: k]
tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs) tmp = model.predict(sample, **kwargs)
forecasts.append(tmp[-1]) forecasts.append(tmp[-1])
_e1 = time.time() _e1 = time.time()

View File

@ -56,10 +56,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
partitions and partitioning method will be created a partitioner model. And for each partitioner, order, partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
steps ahead and FTS method a foreasting model will be trained. steps ahead and FTS method a foreasting model will be trained.
Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
posterior analysis. (identified by the 'file' parameter) for posterior analysis.
The number of experiments is determined by the windowsize and inc. All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
informing the list of dispy nodes on 'nodes' parameter.
The number of experiments is determined by 'windowsize' and 'inc' parameters.
:param data: test data :param data: test data
:param windowsize: size of sliding window :param windowsize: size of sliding window
@ -67,35 +70,31 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
:param kwargs: dict, optional arguments :param kwargs: dict, optional arguments
:keyword :keyword
benchmark_methods: a list with Non FTS models to benchmark. The default is None.
benchmark_methods_parameters: a list with Non FTS models parameters. The default is None.
dataset: the dataset name to identify the current set of benchmarks results on database.
distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
file: file path to save the results. The default is benchmarks.db.
inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
models: a list with prebuilt FTS objects. The default is None.
methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods. methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
models: a list with prebuilt FTS objects. The default is None.
nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
orders: a list with orders of the models (for high order models). The default is [1,2,3].
partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None. partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner]. partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
orders: a list with orders of the models (for high order models). The default is [1,2,3].
type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point.
steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
transformation: data transformation . The default is None.
indexer: seasonal indexer. . The default is None.
progress: If true a progress bar will be displayed during the benchmarks. The default is False. progress: If true a progress bar will be displayed during the benchmarks. The default is False.
distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1]. steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
benchmark_methods: a list with Non FTS models to benchmark. The default is None. tag: a name to identify the current set of benchmarks results on database.
benchmark_methods_parameters: a list with Non FTS models parameters. . The default is None. type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
save: save results. The default is False. transformations: a list with data transformations do apply . The default is [None].
file: file path to save the results. The default is None.
sintetic: if true only the average and standard deviation of the results. The de fault is False.
:return: DataFrame with the benchmark results
""" """
tag = __pop('tag', None, kwargs) tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs) dataset = __pop('dataset', None, kwargs)
distributed = __pop('distributed', False, kwargs) distributed = __pop('distributed', False, kwargs)
save = __pop('save', False, kwargs)
transformations = kwargs.get('transformations', [None]) transformations = kwargs.get('transformations', [None])
progress = kwargs.get('progress', None) progress = kwargs.get('progress', None)

View File

@ -71,6 +71,7 @@ class Differential(Transformation):
def inverse(self, data, param, **kwargs): def inverse(self, data, param, **kwargs):
type = kwargs.get("type","point") type = kwargs.get("type","point")
steps_ahead = kwargs.get("steps_ahead", 1)
if isinstance(data, (np.ndarray, np.generic)): if isinstance(data, (np.ndarray, np.generic)):
data = data.tolist() data = data.tolist()
@ -83,6 +84,7 @@ class Differential(Transformation):
# print(n) # print(n)
# print(len(param)) # print(len(param))
if steps_ahead == 1:
if type == "point": if type == "point":
inc = [data[t] + param[t] for t in np.arange(0, n)] inc = [data[t] + param[t] for t in np.arange(0, n)]
elif type == "interval": elif type == "interval":
@ -91,6 +93,21 @@ class Differential(Transformation):
for t in np.arange(0, n): for t in np.arange(0, n):
data[t].differential_offset(param[t]) data[t].differential_offset(param[t])
inc = data inc = data
else:
if type == "point":
inc = [data[0] + param[0]]
for t in np.arange(1, steps_ahead):
inc.append(data[t] + inc[t-1])
elif type == "interval":
inc = [[data[0][0] + param[0], data[0][1] + param[0]]]
for t in np.arange(1, steps_ahead):
inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])])
elif type == "distribution":
data[0].differential_offset(param[0])
for t in np.arange(1, steps_ahead):
ex = data[t-1].expected_value()
data[t].differential_offset(ex)
inc = data
if n == 1: if n == 1:
return inc[0] return inc[0]

View File

@ -114,9 +114,9 @@ class FTS(object):
ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches) ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches)
if type != 'distribution' and not self.is_multivariate: if not self.is_multivariate:
interval = True if type == 'interval' else False kwargs['type'] = type
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=interval) ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], **kwargs)
return ret return ret

View File

@ -50,7 +50,10 @@ class ConventionalFTS(fts.FTS):
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.generate_flrg(flrs) self.generate_flrg(flrs)

View File

@ -97,6 +97,8 @@ class HighOrderFTS(fts.FTS):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
self.generate_flrg(data) self.generate_flrg(data)

View File

@ -63,8 +63,10 @@ class ImprovedWeightedFTS(fts.FTS):
def train(self, ndata, **kwargs): def train(self, ndata, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method="maximum") tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generate_flrg(flrs) self.generate_flrg(flrs)

View File

@ -69,6 +69,9 @@ class ExponentialyWeightedFTS(fts.FTS):
self.c = kwargs.get('parameters', default_c) self.c = kwargs.get('parameters', default_c)
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generate_flrg(flrs, self.c) self.generate_flrg(flrs, self.c)
@ -78,7 +81,7 @@ class ExponentialyWeightedFTS(fts.FTS):
ordered_sets = FuzzySet.set_ordered(self.sets) ordered_sets = FuzzySet.set_ordered(self.sets)
data = np.array(data) data = np.array(ndata)
l = len(ndata) l = len(ndata)

View File

@ -39,7 +39,7 @@ class ConventionalFTS(fts.FTS):
def operation_matrix(self, flrs): def operation_matrix(self, flrs):
l = len(self.sets) l = len(self.sets)
if self.R is None: if self.R is None or len(self.R) == 0 :
self.R = np.zeros((l, l)) self.R = np.zeros((l, l))
for k in flrs: for k in flrs:
mm = self.flr_membership_matrix(k) mm = self.flr_membership_matrix(k)
@ -51,6 +51,8 @@ class ConventionalFTS(fts.FTS):
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)

View File

@ -60,8 +60,10 @@ class WeightedFTS(fts.FTS):
def train(self, ndata, **kwargs): def train(self, ndata, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generate_FLRG(flrs) self.generate_FLRG(flrs)

View File

@ -15,33 +15,37 @@ from pyFTS.data import TAIEX
dataset = TAIEX.get_data() dataset = TAIEX.get_data()
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
from pyFTS.models import pwfts from pyFTS.models import pwfts
'''
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner) model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
#model.append_transformation(tdiff) model.append_transformation(tdiff)
model.fit(dataset[:800]) model.fit(dataset[:800])
print(model.predict(dataset[800:1000], type='interval')) print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
#for tmp2 in tmp:
# print(tmp2)
''' '''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
#'''
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
#methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, benchmark_models=False,
#transformations=[tdiff], #transformations=[tdiff],
orders=[1, 2, 3], orders=[1], #[1, 2, 3],
partitions=np.arange(10, 100, 5), partitions=[20], #np.arange(10, 100, 5),
progress=False, type='distribution', progress=True, type='point',
#steps_ahead=[1,4,7,10], #steps_ahead=[1] #steps_ahead=[1,4,7,10], #steps_ahead=[1]
distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'], #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
file="benchmarks.db", dataset="TAIEX", tag="partitioning") file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
#save=True, file="tmp.db") #save=True, file="tmp.db")
''' #'''
''' '''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat)) print(bUtil.analytic_tabular_dataframe(dat))