PEP 256 documentation

This commit is contained in:
Petrônio Cândido 2018-04-14 10:22:02 -03:00
parent 67a9d7e324
commit ecfc9db862
35 changed files with 241 additions and 54 deletions

View File

@ -53,7 +53,6 @@ def rmse_interval(targets, forecasts):
return np.sqrt(np.nanmean((fmean - targets) ** 2))
def mape(targets, forecasts):
"""
Mean Average Percentual Error
@ -68,7 +67,6 @@ def mape(targets, forecasts):
return np.mean(np.abs(targets - forecasts) / targets) * 100
def smape(targets, forecasts, type=2):
"""
Symmetric Mean Average Percentual Error
@ -89,13 +87,11 @@ def smape(targets, forecasts, type=2):
return sum(np.abs(forecasts - targets)) / sum(forecasts + targets)
def mape_interval(targets, forecasts):
fmean = [np.mean(i) for i in forecasts]
return np.mean(abs(fmean - targets) / fmean) * 100
def UStatistic(targets, forecasts):
"""
Theil's U Statistic
@ -117,7 +113,6 @@ def UStatistic(targets, forecasts):
return np.sqrt(sum(y) / sum(naive))
def TheilsInequality(targets, forecasts):
"""
Theils Inequality Coefficient
@ -194,7 +189,7 @@ def pinball(tau, target, forecast):
:param tau: quantile value in the range (0,1)
:param target:
:param forecast:
:return: distance of forecast to the tau-quantile of the target
:return: float, distance of forecast to the tau-quantile of the target
"""
if target >= forecast:
return (target - forecast) * tau
@ -208,7 +203,7 @@ def pinball_mean(tau, targets, forecasts):
:param tau: quantile value in the range (0,1)
:param targets: list of target values
:param forecasts: list of prediction intervals
:return:
:return: float, the pinball loss mean for tau quantile
"""
try:
if tau <= 0.5:
@ -220,7 +215,6 @@ def pinball_mean(tau, targets, forecasts):
print(ex)
def pmf_to_cdf(density):
ret = []
for row in density.index:
@ -244,7 +238,12 @@ def heavyside_cdf(bins, targets):
def crps(targets, densities):
"""Continuous Ranked Probability Score"""
'''
Continuous Ranked Probability Score
:param targets: a list with the target values
:param densities: a list with pyFTS.probabil objectsistic.ProbabilityDistribution
:return: float
'''
_crps = float(0.0)
if isinstance(densities, pd.DataFrame):
l = len(densities.columns)
@ -269,7 +268,13 @@ def crps(targets, densities):
def get_point_statistics(data, model, **kwargs):
"""Condensate all measures for point forecasters"""
'''
Condensate all measures for point forecasters
:param data: test data
:param model: FTS model with point forecasting capability
:param kwargs:
:return: a list with the RMSE, SMAPE and U Statistic
'''
steps_ahead = kwargs.get('steps_ahead',1)
@ -307,7 +312,14 @@ def get_point_statistics(data, model, **kwargs):
def get_interval_statistics(data, model, **kwargs):
"""Condensate all measures for point_to_interval forecasters"""
'''
Condensate all measures for point interval forecasters
:param data: test data
:param model: FTS model with interval forecasting capability
:param kwargs:
:return: a list with the sharpness, resolution, coverage, .05 pinball mean,
.25 pinball mean, .75 pinball mean and .95 pinball mean.
'''
steps_ahead = kwargs.get('steps_ahead', 1)
@ -341,6 +353,13 @@ def get_interval_statistics(data, model, **kwargs):
def get_distribution_statistics(data, model, **kwargs):
'''
Get CRPS statistic and time for a forecasting model
:param data: test data
:param model: FTS model with probabilistic forecasting capability
:param kwargs:
:return: a list with the CRPS and execution time
'''
steps_ahead = kwargs.get('steps_ahead', 1)
ret = list()

View File

@ -31,7 +31,7 @@ def chi_squared(q, h):
def compare_residuals(data, models):
"""
Compare residual's statistics of several models
:param data:
:param data: test data
:param models:
:return:
"""

View File

@ -1,5 +1,5 @@
"""
Benchmark utility functions
Facilities for pyFTS Benchmark module
"""
import matplotlib as plt

View File

@ -51,22 +51,31 @@ def __pop(key, default, kwargs):
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
"""
Sliding window benchmarks for FTS point forecasters
:param data:
:param data: test data
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param benchmark_methods: Non FTS models to benchmark
:param benchmark_methods_parameters: Non FTS models parameters
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
:param kwargs: dict, optional arguments
:keyword
models: FTS point forecasters
partitioners: Universe of Discourse partitioner
partitions: the max number of partitions on the Universe of Discourse
max_order: the max order of the models (for high order models)
type: the forecasting type, one of these values: point(default), interval or distribution.
steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
start: in the multi step forecasting, the index of the data where to start forecasting
transformation: data transformation
indexer: seasonal indexer
progress: If true a progress bar will be displayed during the benchmarks
distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
nodes: a list with the dispy cluster nodes addresses
benchmark_methods: Non FTS models to benchmark
benchmark_methods_parameters: Non FTS models parameters
save: save results
file: file path to save the results
sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the benchmark results
"""
distributed = __pop('distributed', False, kwargs)
save = __pop('save', False, kwargs)
@ -226,6 +235,12 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if job.status == dispy.DispyJob.Finished and job is not None:
tmp = job()
jobs2.append(tmp)
print(tmp)
else:
print("status",job.status)
print("result",job.result)
print("stdout",job.stdout)
print("stderr",job.exception)
jobs = deepcopy(jobs2)
@ -234,6 +249,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
file = kwargs.get('file', None)
sintetic = kwargs.get('sintetic', False)
print(jobs)
return synthesis_method(jobs, experiments, save, file, sintetic)

View File

@ -1,12 +1,17 @@
import numpy as np
from pyFTS.common import FuzzySet
"""
This module implements functions for Fuzzy Logical Relationship generation
"""
import numpy as np
from pyFTS.common import FuzzySet
class FLR(object):
"""Fuzzy Logical Relationship"""
"""
Fuzzy Logical Relationship
Represents a temporal transition of the fuzzy set LHS on time t for the fuzzy set RHS on time t+1.
"""
def __init__(self, LHS, RHS):
"""
Creates a Fuzzy Logical Relationship

View File

@ -1,3 +1,7 @@
"""
Common data transformation used on pre and post processing of the FTS
"""
import numpy as np
import math
from pyFTS import *
@ -5,7 +9,7 @@ from pyFTS import *
class Transformation(object):
"""
Data transformation used to pre and post processing of the FTS
Data transformation used on pre and post processing of the FTS
"""
def __init__(self, **kwargs):
@ -13,9 +17,23 @@ class Transformation(object):
self.minimal_length = 1
def apply(self, data, param, **kwargs):
"""
Apply the transformation on input data
:param data: input data
:param param:
:param kwargs:
:return: numpy array with transformed data
"""
pass
def inverse(self,data, param, **kwargs):
"""
:param data: transformed data
:param param:
:param kwargs:
:return: numpy array with inverse transformed data
"""
pass
def __str__(self):
@ -73,6 +91,11 @@ class Differential(Transformation):
class Scale(Transformation):
"""
Scale data inside a interval [min, max]
"""
def __init__(self, min=0, max=1):
super(Scale, self).__init__()
self.data_max = None
@ -130,6 +153,9 @@ class AdaptiveExpectation(Transformation):
class BoxCox(Transformation):
"""
Box-Cox power transformation
"""
def __init__(self, plambda):
super(BoxCox, self).__init__()
self.plambda = plambda

View File

@ -1,3 +1,7 @@
"""
Common facilities for pyFTS
"""
import time
import matplotlib.pyplot as plt
import dill

View File

@ -2,6 +2,12 @@ import numpy as np
class FLRG(object):
"""
Fuzzy Logical Relationship Group
Group a set of FLR's with the same LHS. Represents the temporal patterns for time t+1 (the RHS fuzzy sets)
when the LHS pattern is identified on time t.
"""
def __init__(self, order, **kwargs):
self.LHS = None

View File

@ -120,7 +120,6 @@ class FTS(object):
return ret
def forecast(self, data, **kwargs):
"""
Point forecast one step ahead

View File

@ -1,9 +1,17 @@
"""
Tree data structure
"""
from pyFTS import *
from functools import reduce
import numpy as np
class FLRGTreeNode:
"""
Tree node for
"""
def __init__(self, value):
self.isRoot = False
self.children = []

View File

@ -4,11 +4,19 @@ import numpy as np
def get_data():
"""
Get a simple univariate time series data.
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat["Passengers"])
return dat
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('AirPassengers.csv',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/AirPassengers.csv',
sep=",")

View File

@ -4,6 +4,10 @@ import numpy as np
def get_data():
"""
Get a simple univariate time series data.
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat["Enrollments"])
return dat

View File

@ -18,6 +18,10 @@ import pandas as pd
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('INMET.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/INMET.csv.bz2',
sep=";", compression='bz2')

View File

@ -4,12 +4,21 @@ import numpy as np
def get_data(field):
"""
Get a simple univariate time series data.
:param field: the dataset field name to extract
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat[field])
return dat
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('NASDAQ.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/NASDAQ.csv.bz2',
sep=";", compression='bz2')

View File

@ -4,12 +4,21 @@ import numpy as np
def get_data(field):
"""
Get a simple univariate time series data.
:param field: the dataset field name to extract
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat[field])
return dat
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('SONDA_BSB.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SONDA_BSB.csv.bz2',
sep=";", compression='bz2')

View File

@ -4,6 +4,10 @@ import numpy as np
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('SP500.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SP500.csv.bz2',
sep=",", compression='bz2')

View File

@ -4,12 +4,20 @@ import numpy as np
def get_data():
"""
:param field: the dataset field name to extract
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat["avg"])
return dat
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('TAIEX.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/TAIEX.csv.bz2',
sep=",", compression='bz2')

View File

@ -0,0 +1,3 @@
"""
Module for pyFTS standard datasets facilities
"""

View File

@ -1,3 +1,7 @@
"""
Facilities to generate synthetic stochastic processes
"""
import numpy as np

View File

@ -1,3 +1,4 @@
import pandas as pd
import numpy as np
import os
@ -7,7 +8,16 @@ from urllib import request
def get_dataframe(filename, url, sep=";", compression='infer'):
#filename = pkg_resources.resource_filename('pyFTS', path)
"""
This method check if filename already exists, read the file and return its data.
If the file don't already exists, it will be downloaded and decompressed.
:param filename: dataset local filename
:param url: dataset internet URL
:param sep: CSV field separator
:param compression: type of compression
:return: Pandas dataset
"""
tmp_file = Path(filename)
if tmp_file.is_file():

View File

@ -3,11 +3,19 @@ import pandas as pd
import numpy as np
def get_data():
"""
Get a simple univariate time series data.
:return: numpy array
"""
dat = get_dataframe()
dat = np.array(dat["SUNACTIVITY"])
return dat
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
dat = common.get_dataframe('sunspots.csv',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/sunspots.csv',
sep=",")

View File

@ -0,0 +1,3 @@
"""
Fuzzy Time Series methods
"""

View File

@ -0,0 +1,3 @@
"""
Meta FTS that aggregates other FTS methods
"""

View File

@ -1,6 +1,13 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
"""
High Order Interval Fuzzy Time Series
SILVA, Petrônio CL; SADAEI, Hossein Javedani; GUIMARÃES, Frederico Gadelha. Interval Forecasting with Fuzzy Time Series.
In: Computational Intelligence (SSCI), 2016 IEEE Symposium Series on. IEEE, 2016. p. 1-8.
"""
import numpy as np
from pyFTS.common import FuzzySet, FLR, fts, tree
from pyFTS.models import hofts
@ -9,9 +16,6 @@ from pyFTS.models import hofts
class IntervalFTS(hofts.HighOrderFTS):
"""
High Order Interval Fuzzy Time Series
SILVA, Petrônio CL; SADAEI, Hossein Javedani; GUIMARÃES, Frederico Gadelha. Interval Forecasting with Fuzzy Time Series.
In: Computational Intelligence (SSCI), 2016 IEEE Symposium Series on. IEEE, 2016. p. 1-8.
"""
def __init__(self, name, **kwargs):
super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs)

View File

@ -0,0 +1,3 @@
"""
Multivariate Fuzzy Time Series methods
"""

View File

@ -0,0 +1,3 @@
"""
Fuzzy time series with nonstationary fuzzy sets, for heteroskedastic data
"""

View File

@ -0,0 +1,3 @@
"""
Jupyter notebooks with pyFTS usage examples
"""

View File

@ -1,3 +1,8 @@
"""
C. H. Cheng, R. J. Chang, and C. A. Yeh, Entropy-based and trapezoidal fuzzification-based fuzzy time series approach for forecasting IT project cost,
Technol. Forecast. Social Change, vol. 73, no. 5, pp. 524542, Jun. 2006.
"""
import numpy as np
import math
import random as rnd
@ -5,9 +10,6 @@ import functools, operator
from pyFTS.common import FuzzySet, Membership
from pyFTS.partitioners import partitioner
# C. H. Cheng, R. J. Chang, and C. A. Yeh, “Entropy-based and trapezoidal fuzzification-based fuzzy time series approach for forecasting IT project cost,”
# Technol. Forecast. Social Change, vol. 73, no. 5, pp. 524542, Jun. 2006.
def splitBelow(data,threshold):
return [k for k in data if k <= threshold]

View File

@ -1,3 +1,7 @@
"""
S. T. Li, Y. C. Cheng, and S. Y. Lin, A FCM-based deterministic forecasting model for fuzzy time series,
Comput. Math. Appl., vol. 56, no. 12, pp. 30523063, Dec. 2008. DOI: 10.1016/j.camwa.2008.07.033.
"""
import numpy as np
import math
import random as rnd
@ -6,11 +10,6 @@ from pyFTS.common import FuzzySet, Membership
from pyFTS.partitioners import partitioner
# import CMeans
# S. T. Li, Y. C. Cheng, and S. Y. Lin, “A FCM-based deterministic forecasting model for fuzzy time series,”
# Comput. Math. Appl., vol. 56, no. 12, pp. 30523063, Dec. 2008. DOI: 10.1016/j.camwa.2008.07.033.
def fuzzy_distance(x, y):
if isinstance(x, list):
tmp = functools.reduce(operator.add, [(x[k] - y[k]) ** 2 for k in range(0, len(x))])

View File

@ -1,3 +1,5 @@
"""Even Length Grid Partitioner"""
import numpy as np
import math
import random as rnd

View File

@ -1,15 +1,16 @@
"""
K. H. Huarng, Effective lengths of intervals to improve forecasting in fuzzy time series,
Fuzzy Sets Syst., vol. 123, no. 3, pp. 387394, Nov. 2001.
"""
import numpy as np
import math
import random as rnd
import functools, operator
from pyFTS.common import FuzzySet, Membership, Transformations
# K. H. Huarng, “Effective lengths of intervals to improve forecasting in fuzzy time series,”
# Fuzzy Sets Syst., vol. 123, no. 3, pp. 387394, Nov. 2001.
from pyFTS.partitioners import partitioner
class HuarngPartitioner(partitioner.Partitioner):
"""Huarng Empirical Partitioner"""
def __init__(self, **kwargs):

View File

@ -1,3 +1,7 @@
"""
Facility methods for pyFTS partitioners module
"""
import numpy as np
import pandas as pd
import matplotlib as plt

View File

@ -0,0 +1,3 @@
"""
Probability Distribution objects
"""

View File

@ -1,10 +1,12 @@
from pyFTS.common import Transformations
import numpy as np
# -*- coding: utf8 -*-
"""
Kernel Density Estimation
"""
from pyFTS.common import Transformations
import numpy as np
class KernelSmoothing(object):
"""Kernel Density Estimation"""

View File

@ -20,10 +20,10 @@ from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import pwfts
#'''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2),
bchmk.sliding_window_benchmarks(dataset[:2000], 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1,2,3], partitions=[30,50,70], #np.arange(10,100,2),
progress=False, type='distribution', steps_ahead=[1,4,7,10],
#distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'],
distributed=True, nodes=['192.168.0.102','192.168.0.106','192.168.0.110'],
save=True, file="pwfts_taiex_distribution.csv")
#'''