diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 3138232..7378702 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/modules.doctree b/docs/build/doctrees/modules.doctree index 3c1e189..42d411d 100644 Binary files a/docs/build/doctrees/modules.doctree and b/docs/build/doctrees/modules.doctree differ diff --git a/docs/build/doctrees/pyFTS.common.doctree b/docs/build/doctrees/pyFTS.common.doctree index deb5d40..dcefe67 100644 Binary files a/docs/build/doctrees/pyFTS.common.doctree and b/docs/build/doctrees/pyFTS.common.doctree differ diff --git a/docs/build/doctrees/pyFTS.data.doctree b/docs/build/doctrees/pyFTS.data.doctree index 1c3e3b1..3b0bcc4 100644 Binary files a/docs/build/doctrees/pyFTS.data.doctree and b/docs/build/doctrees/pyFTS.data.doctree differ diff --git a/docs/build/doctrees/pyFTS.distributed.doctree b/docs/build/doctrees/pyFTS.distributed.doctree new file mode 100644 index 0000000..5f48926 Binary files /dev/null and b/docs/build/doctrees/pyFTS.distributed.doctree differ diff --git a/docs/build/doctrees/pyFTS.doctree b/docs/build/doctrees/pyFTS.doctree index 532ad27..6602bdc 100644 Binary files a/docs/build/doctrees/pyFTS.doctree and b/docs/build/doctrees/pyFTS.doctree differ diff --git a/docs/build/doctrees/pyFTS.hyperparam.doctree b/docs/build/doctrees/pyFTS.hyperparam.doctree index 2878a1b..8a7a957 100644 Binary files a/docs/build/doctrees/pyFTS.hyperparam.doctree and b/docs/build/doctrees/pyFTS.hyperparam.doctree differ diff --git a/docs/build/doctrees/pyFTS.models.incremental.doctree b/docs/build/doctrees/pyFTS.models.incremental.doctree index 7c32ee0..124e213 100644 Binary files a/docs/build/doctrees/pyFTS.models.incremental.doctree and b/docs/build/doctrees/pyFTS.models.incremental.doctree differ diff --git a/docs/build/doctrees/pyFTS.models.multivariate.doctree b/docs/build/doctrees/pyFTS.models.multivariate.doctree index 0668e00..366c79c 100644 Binary files a/docs/build/doctrees/pyFTS.models.multivariate.doctree and b/docs/build/doctrees/pyFTS.models.multivariate.doctree differ diff --git a/docs/build/doctrees/pyFTS.models.seasonal.doctree b/docs/build/doctrees/pyFTS.models.seasonal.doctree index 5dc3cc7..f39997e 100644 Binary files a/docs/build/doctrees/pyFTS.models.seasonal.doctree and b/docs/build/doctrees/pyFTS.models.seasonal.doctree differ diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html index 7daa466..de57c13 100644 --- a/docs/build/html/_modules/index.html +++ b/docs/build/html/_modules/index.html @@ -111,6 +111,7 @@
  • pyFTS.data.mackey_glass
  • pyFTS.data.rossler
  • pyFTS.data.sunspots
  • +
  • pyFTS.distributed.spark
  • pyFTS.hyperparam.GridSearch
  • pyFTS.hyperparam.Util
  • pyFTS.models.chen
  • @@ -120,7 +121,8 @@
  • pyFTS.models.hofts
  • pyFTS.models.hwang
  • pyFTS.models.ifts
  • -
  • pyFTS.models.incremental.Retrainer
  • +
  • pyFTS.models.incremental.IncrementalEnsemble
  • +
  • pyFTS.models.incremental.TimeVariant
  • pyFTS.models.ismailefendi
  • pyFTS.models.multivariate.FLR
  • pyFTS.models.multivariate.cmvfts
  • diff --git a/docs/build/html/_modules/pyFTS/benchmarks/Measures.html b/docs/build/html/_modules/pyFTS/benchmarks/Measures.html index 72c6486..79841b7 100644 --- a/docs/build/html/_modules/pyFTS/benchmarks/Measures.html +++ b/docs/build/html/_modules/pyFTS/benchmarks/Measures.html @@ -93,7 +93,7 @@ :param k: :return: """ - mu = np.mean(data) + mu = np.nanmean(data) sigma = np.var(data) n = len(data) s = 0 @@ -142,7 +142,7 @@ targets = np.array(targets) if isinstance(forecasts, list): forecasts = np.array(forecasts) - return np.mean(np.abs(np.divide((targets - forecasts), targets))) * 100 + return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
    [docs]def smape(targets, forecasts, type=2): @@ -159,11 +159,11 @@ if isinstance(forecasts, list): forecasts = np.array(forecasts) if type == 1: - return np.mean(np.abs(forecasts - targets) / ((forecasts + targets) / 2)) + return np.nanmean(np.abs(forecasts - targets) / ((forecasts + targets) / 2)) elif type == 2: - return np.mean(np.abs(forecasts - targets) / (abs(forecasts) + abs(targets))) * 100 + return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100 else: - return sum(np.abs(forecasts - targets)) / sum(forecasts + targets)
    + return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
    [docs]def mape_interval(targets, forecasts): @@ -188,9 +188,9 @@ naive = [] y = [] for k in np.arange(0, l - 1): - y.append((forecasts[k] - targets[k]) ** 2) - naive.append((targets[k + 1] - targets[k]) ** 2) - return np.sqrt(sum(y) / sum(naive))
    + y.append(np.subtract(forecasts[k], targets[k]) ** 2) + naive.append(np.subtract(targets[k + 1], targets[k]) ** 2) + return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
    [docs]def TheilsInequality(targets, forecasts): @@ -262,7 +262,7 @@ preds.append(1) else: preds.append(0) - return np.mean(preds)
    + return np.nanmean(preds)
    [docs]def pinball(tau, target, forecast): @@ -275,9 +275,9 @@ :return: float, distance of forecast to the tau-quantile of the target """ if target >= forecast: - return (target - forecast) * tau + return np.subtract(target, forecast) * tau else: - return (forecast - target) * (1 - tau)
    + return np.subtract(forecast, target) * (1 - tau)
    [docs]def pinball_mean(tau, targets, forecasts): diff --git a/docs/build/html/_modules/pyFTS/common/Composite.html b/docs/build/html/_modules/pyFTS/common/Composite.html index 8ac600b..4d5b74c 100644 --- a/docs/build/html/_modules/pyFTS/common/Composite.html +++ b/docs/build/html/_modules/pyFTS/common/Composite.html @@ -148,7 +148,13 @@ self.upper = set.upper if self.centroid is None or self.centroid < set.centroid: - self.centroid = set.centroid
    + self.centroid = set.centroid + + + def __str__(self): + tmp = str([str(k) for k in self.sets]) + return "{}: {}".format(self.name, tmp) + diff --git a/docs/build/html/_modules/pyFTS/common/Util.html b/docs/build/html/_modules/pyFTS/common/Util.html index 97a5965..c506c4d 100644 --- a/docs/build/html/_modules/pyFTS/common/Util.html +++ b/docs/build/html/_modules/pyFTS/common/Util.html @@ -251,8 +251,11 @@ :param obj: object on memory :param file: file name to store the object """ - with open(file, 'wb') as _file: - dill.dump(obj, _file) + try: + with open(file, 'wb') as _file: + dill.dump(obj, _file) + except Exception as ex: + print("File {} could not be saved due exception {}".format(file, ex))
    [docs]def load_obj(file): diff --git a/docs/build/html/_modules/pyFTS/common/fts.html b/docs/build/html/_modules/pyFTS/common/fts.html index 3650ef7..ae79ca3 100644 --- a/docs/build/html/_modules/pyFTS/common/fts.html +++ b/docs/build/html/_modules/pyFTS/common/fts.html @@ -98,6 +98,8 @@ """A string with the model name""" self.detail = kwargs.get('name',"") """A string with the model detailed information""" + self.is_wrapper = False + """Indicates that this model is a wrapper for other(s) method(s)""" self.is_high_order = False """A boolean value indicating if the model support orders greater than 1, default: False""" self.min_order = 1 @@ -174,8 +176,9 @@ :keyword nodes: a list with the dispy cluster nodes addresses :keyword explain: try to explain, step by step, the one-step-ahead point forecasting result given the input data. :keyword generators: for multivariate methods on multi step ahead forecasting, generators is a dict where the keys - are the variables names (except the target_variable) and the values are lambda functions that - accept one value (the actual value of the variable) and return the next value. + are the dataframe columun names (except the target_variable) and the values are lambda functions that + accept one value (the actual value of the variable) and return the next value or trained FTS + models that accept the actual values and forecast new ones. :return: a numpy array with the forecasted data """ @@ -296,13 +299,11 @@ :return: a list with the forecasted values """ - - if isinstance(data, np.ndarray): data = data.tolist() ret = [] - for k in np.arange(0,steps): + for k in np.arange(0, steps): tmp = self.forecast(data[-self.max_lag:], **kwargs) if isinstance(tmp,(list, np.ndarray)): @@ -388,11 +389,12 @@ if 'partitioner' in kwargs: self.partitioner = kwargs.pop('partitioner') - if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only and not self.is_multivariate: - if self.partitioner is not None: - self.sets = self.partitioner.sets - else: - raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ") + if not self.is_wrapper: + if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only and not self.is_multivariate: + if self.partitioner is not None: + self.sets = self.partitioner.sets + else: + raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ") if 'order' in kwargs: self.order = kwargs.pop('order') @@ -600,7 +602,7 @@ for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)): tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) else: - for r in self.model.flrgs: + for r in self.flrgs: tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r])) return tmp diff --git a/docs/build/html/_modules/pyFTS/data/artificial.html b/docs/build/html/_modules/pyFTS/data/artificial.html index de1b772..9f8755f 100644 --- a/docs/build/html/_modules/pyFTS/data/artificial.html +++ b/docs/build/html/_modules/pyFTS/data/artificial.html @@ -79,6 +79,148 @@ import numpy as np +
    [docs]class SignalEmulator(object): + """ + Emulate a complex signal built from several additive and non-additive components + """ + + def __init__(self, **kwargs): + super(SignalEmulator, self).__init__() + + self.components = [] + """Components of the signal""" + +
    [docs] def stationary_gaussian(self, mu, sigma, **kwargs): + """ + Creates a continuous Gaussian signal with mean mu and variance sigma. + + :param mu: mean + :param sigma: variance + :keyword additive: If False it cancels the previous signal and start this one, if True + this signal is added to the previous one + :keyword start: lag index to start this signal, the default value is 0 + :keyword it: Number of iterations, the default value is 1 + :keyword length: Number of samples generated on each iteration, the default value is 100 + :keyword vmin: Lower bound value of generated data, the default value is None + :keyword vmax: Upper bound value of generated data, the default value is None + :return: the current SignalEmulator instance, for method chaining + """ + parameters = {'mu': mu, 'sigma': sigma} + self.components.append({'dist': 'gaussian', 'type': 'constant', + 'parameters': parameters, 'args': kwargs}) + return self
    + +
    [docs] def incremental_gaussian(self, mu, sigma, **kwargs): + """ + Creates an additive gaussian interference on a previous signal + + :param mu: increment on mean + :param sigma: increment on variance + :keyword start: lag index to start this signal, the default value is 0 + :keyword it: Number of iterations, the default value is 1 + :keyword length: Number of samples generated on each iteration, the default value is 100 + :keyword vmin: Lower bound value of generated data, the default value is None + :keyword vmax: Upper bound value of generated data, the default value is None + :return: the current SignalEmulator instance, for method chaining + """ + parameters = {'mu': mu, 'sigma': sigma} + self.components.append({'dist': 'gaussian', 'type': 'incremental', + 'parameters': parameters, 'args': kwargs}) + return self
    + +
    [docs] def periodic_gaussian(self, type, period, mu_min, sigma_min, mu_max, sigma_max, **kwargs): + """ + Creates an additive periodic gaussian interference on a previous signal + + :param type: 'linear' or 'sinoidal' + :param period: the period of recurrence + :param mu: increment on mean + :param sigma: increment on variance + :keyword start: lag index to start this signal, the default value is 0 + :keyword it: Number of iterations, the default value is 1 + :keyword length: Number of samples generated on each iteration, the default value is 100 + :keyword vmin: Lower bound value of generated data, the default value is None + :keyword vmax: Upper bound value of generated data, the default value is None + :return: the current SignalEmulator instance, for method chaining + """ + parameters = {'type':type, 'period':period, + 'mu_min': mu_min, 'sigma_min': sigma_min, 'mu_max': mu_max, 'sigma_max': sigma_max} + self.components.append({'dist': 'gaussian', 'type': 'periodic', + 'parameters': parameters, 'args': kwargs}) + return self
    + +
    [docs] def blip(self, **kwargs): + """ + Creates an outlier greater than the maximum or lower then the minimum previous values of the signal, + and insert it on a random location of the signal. + + :return: the current SignalEmulator instance, for method chaining + """ + parameters = {} + self.components.append({'dist': 'blip', 'type': 'blip', + 'parameters': parameters, 'args':kwargs}) + return self
    + +
    [docs] def run(self): + """ + Render the signal + + :return: a list of float values + """ + signal = [] + last_it = 10 + last_num = 10 + for ct, component in enumerate(self.components): + parameters = component['parameters'] + kwargs = component['args'] + additive = kwargs.get('additive', True) + start = kwargs.get('start', 0) + it = kwargs.get('it', last_it) + num = kwargs.get('length', last_num) + vmin = kwargs.get('vmin',None) + vmax = kwargs.get('vmax', None) + if component['type'] == 'constant': + tmp = generate_gaussian_linear(parameters['mu'], parameters['sigma'], 0, 0, + it=it, num=num, vmin=vmin, vmax=vmax) + elif component['type'] == 'incremental': + tmp = generate_gaussian_linear(0, 0, parameters['mu'], parameters['sigma'], + it=num, num=1, vmin=vmin, vmax=vmax) + elif component['type'] == 'periodic': + period = parameters['period'] + mu_min, sigma_min = parameters['mu_min'],parameters['sigma_min'] + mu_max, sigma_max = parameters['mu_max'],parameters['sigma_max'] + + if parameters['type'] == 'sinoidal': + tmp = generate_sinoidal_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, + it=num, num=1, vmin=vmin, vmax=vmax) + else: + tmp = generate_linear_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, + it=num, num=1, vmin=vmin, vmax=vmax) + elif component['type'] == 'blip': + _mx = np.nanmax(signal) + _mn = np.nanmin(signal) + + _mx += 2*_mx if _mx > 0 else -2*_mx + _mn += -2*_mn if _mn > 0 else 2*_mn + + if vmax is not None: + _mx = min(_mx, vmax) if vmax > 0 else max(_mx, vmax) + if vmin is not None: + _mn = max(_mn, vmin) if vmin > 0 else min(_mn, vmin) + + start = np.random.randint(0, len(signal)) + tmp = [_mx] if np.random.rand() >= .5 else [-_mn] + + last_num = num + last_it = it + + signal = _append(additive, start, signal, tmp) + + return signal
    + + + +
    [docs]def generate_gaussian_linear(mu_ini, sigma_ini, mu_inc, sigma_inc, it=100, num=10, vmin=None, vmax=None): """ Generate data sampled from Gaussian distribution, with constant or linear changing parameters @@ -108,6 +250,88 @@ return ret
    +
    [docs]def generate_linear_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, it=100, num=10, vmin=None, vmax=None): + """ + Generates a periodic linear variation on mean and variance + + :param period: the period of recurrence + :param mu_min: initial (and minimum) mean of each period + :param sigma_min: initial (and minimum) variance of each period + :param mu_max: final (and maximum) mean of each period + :param sigma_max: final (and maximum) variance of each period + :param it: Number of iterations + :param num: Number of samples generated on each iteration + :param vmin: Lower bound value of generated data + :param vmax: Upper bound value of generated data + :return: A list of it*num float values + """ + + if period > it: + raise("The 'period' parameter must be lesser than 'it' parameter") + + mu_inc = (mu_max - mu_min)/period + sigma_inc = (sigma_max - sigma_min) / period + mu = mu_min + sigma = sigma_min + ret = [] + signal = True + + for k in np.arange(0, it): + tmp = np.random.normal(mu, sigma, num) + if vmin is not None: + tmp = np.maximum(np.full(num, vmin), tmp) + if vmax is not None: + tmp = np.minimum(np.full(num, vmax), tmp) + ret.extend(tmp) + + if k % period == 0: + signal = not signal + + mu += (mu_inc if signal else -mu_inc) + sigma += (sigma_inc if signal else -sigma_inc) + + sigma = max(sigma, 0.005) + + return ret
    + + +
    [docs]def generate_sinoidal_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, it=100, num=10, vmin=None, vmax=None): + """ + Generates a periodic sinoidal variation on mean and variance + + :param period: the period of recurrence + :param mu_min: initial (and minimum) mean of each period + :param sigma_min: initial (and minimum) variance of each period + :param mu_max: final (and maximum) mean of each period + :param sigma_max: final (and maximum) variance of each period + :param it: Number of iterations + :param num: Number of samples generated on each iteration + :param vmin: Lower bound value of generated data + :param vmax: Upper bound value of generated data + :return: A list of it*num float values + """ + mu_range = mu_max - mu_min + sigma_range = sigma_max - sigma_min + mu = mu_min + sigma = sigma_min + ret = [] + + for k in np.arange(0, it): + tmp = np.random.normal(mu, sigma, num) + if vmin is not None: + tmp = np.maximum(np.full(num, vmin), tmp) + if vmax is not None: + tmp = np.minimum(np.full(num, vmax), tmp) + ret.extend(tmp) + + mu += mu_range * np.sin(period * k) + sigma += sigma_range * np.sin(period * k) + + sigma = max(sigma, 0.005) + + return ret
    + +
    [docs]def generate_uniform_linear(min_ini, max_ini, min_inc, max_inc, it=100, num=10, vmin=None, vmax=None): """ Generate data sampled from Uniform distribution, with constant or linear changing bounds @@ -138,10 +362,22 @@
    [docs]def white_noise(n=500): + """ + Simple Gaussian noise signal + :param n: number of samples + :return: + """ return np.random.normal(0, 1, n)
    [docs]def random_walk(n=500, type='gaussian'): + """ + Simple random walk + + :param n: number of samples + :param type: 'gaussian' or 'uniform' + :return: + """ if type == 'gaussian': tmp = generate_gaussian_linear(0, 1, 0, 0, it=1, num=n) else: @@ -152,6 +388,30 @@ return ret
    + +def _append(additive, start, before, new): + if not additive: + before.extend(new) + return before + else: + for k in range(start): + new.insert(0,0) + + l1 = len(before) + l2 = len(new) + + if l2 < l1: + new.extend(np.zeros(l1 - l2).tolist()) + elif 0 < l1 < l2: + new = new[:l1] + + if len(before) == 0: + tmp = np.array(new) + else: + tmp = np.array(before) + np.array(new) + return tmp.tolist() + +
    diff --git a/docs/build/html/_modules/pyFTS/distributed/spark.html b/docs/build/html/_modules/pyFTS/distributed/spark.html new file mode 100644 index 0000000..c0075eb --- /dev/null +++ b/docs/build/html/_modules/pyFTS/distributed/spark.html @@ -0,0 +1,432 @@ + + + + + + + + + pyFTS.distributed.spark — pyFTS 1.4 documentation + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +

    Source code for pyFTS.distributed.spark

    +import numpy as np
    +import pandas as pd
    +
    +from pyFTS.data import Enrollments, TAIEX
    +from pyFTS.partitioners import Grid, Simple
    +from pyFTS.models.multivariate import partitioner as mv_partitioner
    +from pyFTS.models import hofts
    +
    +from pyspark import SparkConf
    +from pyspark import SparkContext
    +
    +import os
    +# make sure pyspark tells workers to use python3 not 2 if both are installed
    +SPARK_ADDR = 'spark://192.168.0.110:7077'
    +
    +os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
    +os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
    +
    +
    [docs]def create_spark_conf(**kwargs): + spark_executor_memory = kwargs.get("spark_executor_memory", "2g") + spark_driver_memory = kwargs.get("spark_driver_memory", "2g") + url = kwargs.get("url", SPARK_ADDR) + app = kwargs.get("app", 'pyFTS') + + conf = SparkConf() + conf.setMaster(url) + conf.setAppName(app) + conf.set("spark.executor.memory", spark_executor_memory) + conf.set("spark.driver.memory", spark_driver_memory) + conf.set("spark.memory.offHeap.enabled",True) + conf.set("spark.memory.offHeap.size","16g") + + return conf
    + +
    [docs]def get_partitioner(shared_partitioner, type='common', variables=[]): + """ + + :param part: + :return: + """ + if type=='common': + fs_tmp = Simple.SimplePartitioner() + + for fset in shared_partitioner.value.keys(): + fz = shared_partitioner.value[fset] + if type=='common': + fs_tmp.append_complex(fz) + elif type == 'multivariate': + fs_tmp.append(fz) + + return fs_tmp
    + + +
    [docs]def get_clustered_partitioner(explanatory_variables, target_variable, **parameters): + from pyFTS.models.multivariate.common import MultivariateFuzzySet + fs_tmp = mv_partitioner.MultivariatePartitioner(explanatory_variables=explanatory_variables, + target_variable=target_variable) + for tmp in parameters['partitioner_names'].value: + fs = MultivariateFuzzySet(target_variable=target_variable) + for var, fset in parameters['partitioner_{}'.format(tmp)].value: + fs.append_set(var, fset) + fs_tmp.append(fs) + + fs_tmp.build_index() + + return fs_tmp
    + + +
    [docs]def get_variables(**parameters): + explanatory_variables = [] + target_variable = None + for name in parameters['variables'].value: + from pyFTS.models.multivariate import common, variable + var = variable.Variable(name, + type=parameters['{}_type'.format(name)].value, + data_label=parameters['{}_label'.format(name)].value, + alpha_cut=parameters['{}_alpha'.format(name)].value, + #data_type=parameters['{}_data_type'.format(name)].value, + #mask=parameters['{}_mask'.format(name)].value, + ) + var.partitioner = get_partitioner(parameters['{}_partitioner'.format(name)]) + var.partitioner.type = parameters['{}_partitioner_type'.format(name)].value + + explanatory_variables.append(var) + + if var.name == parameters['target'].value: + target_variable = var + + return (explanatory_variables, target_variable)
    + +
    [docs]def create_univariate_model(**parameters): + if parameters['order'].value > 1: + model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']), + order=parameters['order'].value, alpha_cut=parameters['alpha_cut'].value, + lags=parameters['lags'].value) + else: + model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']), + alpha_cut=parameters['alpha_cut'].value) + + return model
    + +
    [docs]def slave_train_univariate(data, **parameters): + """ + + :param data: + :return: + """ + + model = create_univariate_model(**parameters) + + ndata = [k for k in data] + + model.train(ndata) + + return [(k, model.flrgs[k]) for k in model.flrgs.keys()]
    + + +
    [docs]def slave_forecast_univariate(data, **parameters): + """ + + :param data: + :return: + """ + + model = create_univariate_model(**parameters) + + ndata = [k for k in data] + + forecasts = model.predict(ndata) + + return [(k, k) for k in forecasts]
    + + +
    [docs]def create_multivariate_model(**parameters): + explanatory_variables, target_variable = get_variables(**parameters) + #vars = [(v.name, v.name) for v in explanatory_variables] + + #return [('vars', vars), ('target',[target_variable.name])] + + if parameters['type'].value == 'clustered': + fs = get_clustered_partitioner(explanatory_variables, target_variable, **parameters) + model = parameters['method'].value(explanatory_variables=explanatory_variables, + target_variable=target_variable, + partitioner=fs, + order=parameters['order'].value, + alpha_cut=parameters['alpha_cut'].value, + lags=parameters['lags'].value) + else: + + if parameters['order'].value > 1: + model = parameters['method'].value(explanatory_variables=explanatory_variables, + target_variable=target_variable, + order=parameters['order'].value, + alpha_cut=parameters['alpha_cut'].value, + lags=parameters['lags'].value) + else: + model = parameters['method'].value(explanatory_variables=explanatory_variables, + target_variable=target_variable, + alpha_cut=parameters['alpha_cut'].value) + + return model
    + + +
    [docs]def slave_train_multivariate(data, **parameters): + + model = create_multivariate_model(**parameters) + + rows = [k for k in data] + ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value) + + model.train(ndata) + + if parameters['type'].value == 'clustered': + counts = [(fset, count) for fset,count in model.partitioner.count.items()] + flrgs = [(k, v) for k,v in model.flrgs.items()] + + return [('counts', counts), ('flrgs', flrgs)] + else: + return [(k, v) for k,v in model.flrgs.items()]
    + + +
    [docs]def slave_forecast_multivariate(data, **parameters): + + model = create_multivariate_model(**parameters) + + rows = [k for k in data] + ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value) + + forecasts = model.predict(ndata) + + return [(k, k) for k in forecasts]
    + + +
    [docs]def share_parameters(model, context, data): + parameters = {} + if not model.is_multivariate: + parameters['type'] = context.broadcast('common') + parameters['partitioner'] = context.broadcast(model.partitioner.sets) + parameters['alpha_cut'] = context.broadcast(model.alpha_cut) + parameters['order'] = context.broadcast(model.order) + parameters['method'] = context.broadcast(type(model)) + parameters['lags'] = context.broadcast(model.lags) + parameters['max_lag'] = context.broadcast(model.max_lag) + else: + if model.is_clustered: + parameters['type'] = context.broadcast('clustered') + names = [] + for name, fset in model.partitioner.sets.items(): + names.append(name) + parameters['partitioner_{}'.format(name)] = context.broadcast([(k,v) for k,v in fset.sets.items()]) + + parameters['partitioner_names'] = context.broadcast(names) + + else: + parameters['type'] = context.broadcast('multivariate') + names = [] + for var in model.explanatory_variables: + #if var.data_type is None: + # raise Exception("It is mandatory to inform the data_type parameter for each variable when the training is distributed! ") + names.append(var.name) + parameters['{}_type'.format(var.name)] = context.broadcast(var.type) + #parameters['{}_data_type'.format(var.name)] = context.broadcast(var.data_type) + #parameters['{}_mask'.format(var.name)] = context.broadcast(var.mask) + parameters['{}_label'.format(var.name)] = context.broadcast(var.data_label) + parameters['{}_alpha'.format(var.name)] = context.broadcast(var.alpha_cut) + parameters['{}_partitioner'.format(var.name)] = context.broadcast(var.partitioner.sets) + parameters['{}_partitioner_type'.format(var.name)] = context.broadcast(var.partitioner.type) + + parameters['variables'] = context.broadcast(names) + parameters['target'] = context.broadcast(model.target_variable.name) + + parameters['columns'] = context.broadcast(data.columns.values) + + parameters['alpha_cut'] = context.broadcast(model.alpha_cut) + parameters['order'] = context.broadcast(model.order) + parameters['method'] = context.broadcast(type(model)) + parameters['lags'] = context.broadcast(model.lags) + parameters['max_lag'] = context.broadcast(model.max_lag) + + return parameters
    + + +
    [docs]def distributed_train(model, data, **kwargs): + """ + + + :param model: + :param data: + :param url: + :param app: + :return: + """ + + num_batches = kwargs.get("num_batches", 4) + + conf = create_spark_conf(**kwargs) + + with SparkContext(conf=conf) as context: + + nodes = context.defaultParallelism + + parameters = share_parameters(model, context, data) + + if not model.is_multivariate: + func = lambda x: slave_train_univariate(x, **parameters) + + flrgs = context.parallelize(data).repartition(nodes*num_batches).mapPartitions(func) + + for k in flrgs.collect(): + model.append_rule(k[1]) + + else: + + data = data.to_dict(orient='records') + + func = lambda x: slave_train_multivariate(x, **parameters) + + flrgs = context.parallelize(data).mapPartitions(func) + + for k in flrgs.collect(): + if parameters['type'].value == 'clustered': + if k[0] == 'counts': + for fset, count in k[1]: + model.partitioner.count[fset] = count + elif k[0] == 'flrgs': + model.append_rule(k[1]) + else: + model.append_rule(k[1]) + + return model
    + + +
    [docs]def distributed_predict(data, model, **kwargs): + """ + + + :param model: + :param data: + :param url: + :param app: + :return: + """ + + num_batches = kwargs.get("num_batches", 4) + + conf = create_spark_conf(**kwargs) + + ret = [] + + with SparkContext(conf=conf) as context: + + nodes = context.defaultParallelism + + parameters = share_parameters(model, context) + + if not model.is_multivariate: + func = lambda x: slave_forecast_univariate(x, **parameters) + + forecasts = context.parallelize(data).repartition(nodes * num_batches).mapPartitions(func) + + else: + + data = data.to_dict(orient='records') + + func = lambda x: slave_forecast_multivariate(x, **parameters) + + forecasts = context.parallelize(data).repartition(nodes * num_batches).mapPartitions(func) + + for k in forecasts.collect(): + ret.extend(k) + + return ret
    +
    + +
    +
    +
    +
    +
    + + + + \ No newline at end of file diff --git a/docs/build/html/_modules/pyFTS/models/ensemble/ensemble.html b/docs/build/html/_modules/pyFTS/models/ensemble/ensemble.html index 973112c..ccd49ac 100644 --- a/docs/build/html/_modules/pyFTS/models/ensemble/ensemble.html +++ b/docs/build/html/_modules/pyFTS/models/ensemble/ensemble.html @@ -105,6 +105,7 @@ self.shortname = "EnsembleFTS" self.name = "Ensemble FTS" self.flrgs = {} + self.is_wrapper = True self.has_point_forecasting = True self.has_interval_forecasting = True self.has_probability_forecasting = True @@ -209,7 +210,7 @@ ret = [] for k in np.arange(self.order, l+1): - sample = data[k - self.order : k] + sample = data[k - self.max_lag : k] tmp = self.get_models_forecasts(sample) point = self.get_point(tmp) ret.append(point) diff --git a/docs/build/html/_modules/pyFTS/models/hofts.html b/docs/build/html/_modules/pyFTS/models/hofts.html index a64b177..cc14f55 100644 --- a/docs/build/html/_modules/pyFTS/models/hofts.html +++ b/docs/build/html/_modules/pyFTS/models/hofts.html @@ -83,6 +83,7 @@ from pyFTS.common import FuzzySet, FLR, fts, flrg from itertools import product +
    [docs]class HighOrderFLRG(flrg.FLRG): """Conventional High Order Fuzzy Logical Relationship Group""" def __init__(self, order, **kwargs): @@ -258,33 +259,44 @@ explain = kwargs.get('explain', False) + fuzzyfied = kwargs.get('fuzzyfied', False) + + mode = kwargs.get('mode', 'mean') + ret = [] l = len(ndata) if not explain else self.max_lag + 1 if l < self.max_lag: return ndata + elif l == self.max_lag: + l += 1 - for k in np.arange(self.max_lag, l+1): + for k in np.arange(self.max_lag, l): + + sample = ndata[k - self.max_lag: k] if explain: print("Fuzzyfication \n") - if not kwargs.get('fuzzyfied', False): - flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k], explain) + if not fuzzyfied: + flrgs = self.generate_lhs_flrg(sample, explain) else: - flrgs = self.generate_lhs_flrg_fuzzyfied(ndata[k - self.max_lag: k], explain) + flrgs = self.generate_lhs_flrg_fuzzyfied(sample, explain) if explain: print("Rules:\n") - tmp = [] + midpoints = [] + memberships = [] for flrg in flrgs: if flrg.get_key() not in self.flrgs: if len(flrg.LHS) > 0: mp = self.partitioner.sets[flrg.LHS[-1]].centroid - tmp.append(mp) + mv = self.partitioner.sets[flrg.LHS[-1]].membership(sample[-1]) if not fuzzyfied else None + midpoints.append(mp) + memberships.append(mv) if explain: print("\t {} -> {} (Naïve)\t Midpoint: {}\n".format(str(flrg.LHS), flrg.LHS[-1], @@ -292,12 +304,19 @@ else: flrg = self.flrgs[flrg.get_key()] mp = flrg.get_midpoint(self.partitioner.sets) - tmp.append(mp) + mv = flrg.get_membership(sample, self.partitioner.sets) if not fuzzyfied else None + midpoints.append(mp) + memberships.append(mv) if explain: print("\t {} \t Midpoint: {}\n".format(str(flrg), mp)) + print("\t {} \t Membership: {}\n".format(str(flrg), mv)) + + if mode == "mean" or fuzzyfied: + final = np.nanmean(midpoints) + else: + final = np.dot(midpoints, memberships) - final = np.nanmean(tmp) ret.append(final) if explain: diff --git a/docs/build/html/_modules/pyFTS/models/incremental/IncrementalEnsemble.html b/docs/build/html/_modules/pyFTS/models/incremental/IncrementalEnsemble.html new file mode 100644 index 0000000..2d34de9 --- /dev/null +++ b/docs/build/html/_modules/pyFTS/models/incremental/IncrementalEnsemble.html @@ -0,0 +1,191 @@ + + + + + + + + + pyFTS.models.incremental.IncrementalEnsemble — pyFTS 1.4 documentation + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +

    Source code for pyFTS.models.incremental.IncrementalEnsemble

    +'''
    +Time Variant/Incremental Ensemble of FTS methods
    +'''
    +
    +
    +import numpy as np
    +import pandas as pd
    +from pyFTS.common import FuzzySet, FLR, fts, flrg
    +from pyFTS.partitioners import Grid
    +from pyFTS.models import hofts
    +from pyFTS.models.ensemble import ensemble
    +
    +
    +
    [docs]class IncrementalEnsembleFTS(ensemble.EnsembleFTS): + """ + Time Variant/Incremental Ensemble of FTS methods + """ + def __init__(self, **kwargs): + super(IncrementalEnsembleFTS, self).__init__(**kwargs) + self.shortname = "IncrementalEnsembleFTS" + self.name = "Incremental Ensemble FTS" + + self.order = kwargs.get('order',1) + + self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner) + """The partitioner method to be called when a new model is build""" + self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10}) + """The partitioner method parameters""" + + self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) + """The FTS method to be called when a new model is build""" + self.fts_params = kwargs.get('fts_params', {}) + """The FTS method specific parameters""" + + self.window_length = kwargs.get('window_length', 100) + """The memory window length""" + + self.batch_size = kwargs.get('batch_size', 10) + """The batch interval between each retraining""" + + self.is_high_order = True + self.uod_clip = False + #self.max_lag = self.window_length + self.max_lag + +
    [docs] def train(self, data, **kwargs): + + partitioner = self.partitioner_method(data=data, **self.partitioner_params) + model = self.fts_method(partitioner=partitioner, **self.fts_params) + if model.is_high_order: + model = self.fts_method(partitioner=partitioner, order=self.order, **self.fts_params) + model.fit(data, **kwargs) + if len(self.models) > 0: + self.models.pop(0) + self.models.append(model)
    + + def _point_smoothing(self, forecasts): + l = len(self.models) + + ret = np.nansum([np.exp(-(l-k)) * forecasts[k] for k in range(l)]) + + return ret + +
    [docs] def forecast(self, data, **kwargs): + l = len(data) + + data_window = [] + + ret = [] + + for k in np.arange(self.max_lag, l): + + data_window.append(data[k - self.max_lag]) + + if k >= self.window_length: + data_window.pop(0) + + if k % self.batch_size == 0 and k >= self.window_length: + self.train(data_window, **kwargs) + + sample = data[k - self.max_lag: k] + tmp = self.get_models_forecasts(sample) + point = self._point_smoothing(tmp) + ret.append(point) + + return ret
    + + + + + + +
    + +
    +
    +
    +
    +
    + + + + \ No newline at end of file diff --git a/docs/build/html/_modules/pyFTS/models/incremental/Retrainer.html b/docs/build/html/_modules/pyFTS/models/incremental/TimeVariant.html similarity index 94% rename from docs/build/html/_modules/pyFTS/models/incremental/Retrainer.html rename to docs/build/html/_modules/pyFTS/models/incremental/TimeVariant.html index 07d37d1..529e7d9 100644 --- a/docs/build/html/_modules/pyFTS/models/incremental/Retrainer.html +++ b/docs/build/html/_modules/pyFTS/models/incremental/TimeVariant.html @@ -17,7 +17,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - pyFTS.models.incremental.Retrainer — pyFTS 1.4 documentation + pyFTS.models.incremental.TimeVariant — pyFTS 1.4 documentation @@ -71,7 +71,7 @@
    -

    Source code for pyFTS.models.incremental.Retrainer

    +  

    Source code for pyFTS.models.incremental.TimeVariant

     """
     Meta model that wraps another FTS method and continously retrain it using a data window with the most recent data
     """
    @@ -81,7 +81,7 @@
     from pyFTS.partitioners import Grid
     
     
    -
    [docs]class Retrainer(fts.FTS): +
    [docs]class Retrainer(fts.FTS): """ Meta model for incremental/online learning """ @@ -112,7 +112,7 @@ self.uod_clip = False self.max_lag = self.window_length + self.order -
    [docs] def train(self, data, **kwargs): +
    [docs] def train(self, data, **kwargs): self.partitioner = self.partitioner_method(data=data, **self.partitioner_params) self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params) if self.model.is_high_order: @@ -121,7 +121,7 @@ self.model.fit(data, **kwargs) self.shortname = self.model.shortname
    -
    [docs] def forecast(self, data, **kwargs): +
    [docs] def forecast(self, data, **kwargs): l = len(data) horizon = self.window_length + self.order diff --git a/docs/build/html/_modules/pyFTS/models/multivariate/cmvfts.html b/docs/build/html/_modules/pyFTS/models/multivariate/cmvfts.html index 69bc780..88c65d6 100644 --- a/docs/build/html/_modules/pyFTS/models/multivariate/cmvfts.html +++ b/docs/build/html/_modules/pyFTS/models/multivariate/cmvfts.html @@ -101,7 +101,7 @@ self.order = kwargs.get("order", 2) self.lags = kwargs.get("lags", None) - self.alpha_cut = kwargs.get('alpha_cut', 0.25) + self.alpha_cut = kwargs.get('alpha_cut', 0.0) self.shortname = "ClusteredMVFTS" self.name = "Clustered Multivariate FTS" @@ -112,7 +112,8 @@ ndata = [] for index, row in data.iterrows(): data_point = self.format_data(row) - ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut)) + ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, + alpha_cut=self.alpha_cut)) return ndata
    diff --git a/docs/build/html/_modules/pyFTS/models/multivariate/common.html b/docs/build/html/_modules/pyFTS/models/multivariate/common.html index 3a7acfd..bd6ae09 100644 --- a/docs/build/html/_modules/pyFTS/models/multivariate/common.html +++ b/docs/build/html/_modules/pyFTS/models/multivariate/common.html @@ -116,9 +116,12 @@ return np.nanmin(mv)
    -
    [docs]def fuzzyfy_instance(data_point, var): +
    [docs]def fuzzyfy_instance(data_point, var, tuples=True): fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut) - return [(var.name, fs) for fs in fsets]
    + if tuples: + return [(var.name, fs) for fs in fsets] + else: + return fsets
    [docs]def fuzzyfy_instance_clustered(data_point, cluster, **kwargs): diff --git a/docs/build/html/_modules/pyFTS/models/multivariate/mvfts.html b/docs/build/html/_modules/pyFTS/models/multivariate/mvfts.html index a974b0f..bb11981 100644 --- a/docs/build/html/_modules/pyFTS/models/multivariate/mvfts.html +++ b/docs/build/html/_modules/pyFTS/models/multivariate/mvfts.html @@ -72,14 +72,28 @@

    Source code for pyFTS.models.multivariate.mvfts

    -from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
    +from pyFTS.common import fts, FuzzySet, FLR, Membership
     from pyFTS.partitioners import Grid
     from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
    +from itertools import product
    +from types import LambdaType
     
     import numpy as np
     import pandas as pd
     
     
    +
    [docs]def product_dict(**kwargs): + ''' + Code by Seth Johnson + :param kwargs: + :return: + ''' + keys = kwargs.keys() + vals = kwargs.values() + for instance in product(*vals): + yield dict(zip(keys, instance))
    + +
    [docs]class MVFTS(fts.FTS): """ Multivariate extension of Chen's ConventionalFTS method @@ -113,11 +127,15 @@
    [docs] def apply_transformations(self, data, params=None, updateUoD=False, **kwargs): ndata = data.copy(deep=True) for var in self.explanatory_variables: - if self.uod_clip and var.partitioner.type == 'common': - ndata[var.data_label] = np.clip(ndata[var.data_label].values, - var.partitioner.min, var.partitioner.max) + try: + values = ndata[var.data_label].values #if isinstance(ndata, pd.DataFrame) else ndata[var.data_label] + if self.uod_clip and var.partitioner.type == 'common': + ndata[var.data_label] = np.clip(values, + var.partitioner.min, var.partitioner.max) - ndata[var.data_label] = var.apply_transformations(ndata[var.data_label].values) + ndata[var.data_label] = var.apply_transformations(values) + except: + pass return ndata
    @@ -125,23 +143,19 @@ flrs = [] lags = {} for vc, var in enumerate(self.explanatory_variables): - data_point = data[var.data_label] - lags[vc] = common.fuzzyfy_instance(data_point, var) - - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) + data_point = data[var.name] + lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False) + for path in product_dict(**lags): flr = MVFLR.FLR() - for v, s in path: - flr.set_lhs(v, s) + for var, fset in path.items(): + flr.set_lhs(var, fset) if len(flr.LHS.keys()) == len(self.explanatory_variables): flrs.append(flr) + else: + print(flr) return flrs
    @@ -149,7 +163,7 @@ flrs = [] for ct in range(1, len(data.index)): ix = data.index[ct-1] - data_point = data.loc[ix] + data_point = self.format_data( data.loc[ix] ) tmp_flrs = self.generate_lhs_flrs(data_point) @@ -184,17 +198,28 @@
    [docs] def forecast(self, data, **kwargs): ret = [] ndata = self.apply_transformations(data) - for index, row in ndata.iterrows(): - flrs = self.generate_lhs_flrs(row) + c = 0 + for index, row in ndata.iterrows() if isinstance(ndata, pd.DataFrame) else enumerate(ndata): + data_point = self.format_data(row) + flrs = self.generate_lhs_flrs(data_point) mvs = [] mps = [] for flr in flrs: flrg = mvflrg.FLRG(lhs=flr.LHS) if flrg.get_key() not in self.flrgs: - mvs.append(0.) - mps.append(0.) + #Naïve approach is applied when no rules were found + if self.target_variable.name in flrg.LHS: + fs = flrg.LHS[self.target_variable.name] + fset = self.target_variable.partitioner.sets[fs] + mp = fset.centroid + mv = fset.membership(data_point[self.target_variable.name]) + mvs.append(mv) + mps.append(mp) + else: + mvs.append(0.) + mps.append(0.) else: - mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables)) + mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) mv = np.array(mvs) @@ -211,9 +236,10 @@ if generators is None: raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' + - ' are the variables names (except the target_variable) and the values are ' + + ' are the dataframe column names (except the target_variable) and the values are ' + 'lambda functions that accept one value (the actual value of the variable) ' - ' and return the next value.') + ' and return the next value or trained FTS models that accept the actual values and ' + 'forecast new ones.') ndata = self.apply_transformations(data) @@ -228,13 +254,20 @@ ret.append(tmp) - last_data_point = sample.loc[sample.index[-1]] - new_data_point = {} - for var in self.explanatory_variables: - if var.name != self.target_variable.name: - new_data_point[var.data_label] = generators[var.name](last_data_point[var.data_label]) + for data_label in generators.keys(): + if data_label != self.target_variable.data_label: + if isinstance(generators[data_label], LambdaType): + last_data_point = ndata.loc[sample.index[-1]] + new_data_point[data_label] = generators[data_label](last_data_point[data_label]) + elif isinstance(generators[data_label], fts.FTS): + model = generators[data_label] + last_data_point = ndata.loc[[sample.index[-model.order]]] + if not model.is_multivariate: + last_data_point = last_data_point[data_label].values + + new_data_point[data_label] = model.forecast(last_data_point)[0] new_data_point[self.target_variable.data_label] = tmp diff --git a/docs/build/html/_modules/pyFTS/models/multivariate/variable.html b/docs/build/html/_modules/pyFTS/models/multivariate/variable.html index 6a58361..1a4f2c5 100644 --- a/docs/build/html/_modules/pyFTS/models/multivariate/variable.html +++ b/docs/build/html/_modules/pyFTS/models/multivariate/variable.html @@ -104,9 +104,13 @@ self.mask = kwargs.get('mask', None) """The mask for format the data column on Pandas Dataframe""" self.transformation = kwargs.get('transformation', None) + """Pre processing transformation for the variable""" self.transformation_params = kwargs.get('transformation_params', None) self.partitioner = None + """UoD partitioner for the variable data""" self.alpha_cut = kwargs.get('alpha_cut', 0.0) + """Minimal membership value to be considered on fuzzyfication process""" + if kwargs.get('data', None) is not None: self.build(**kwargs) diff --git a/docs/build/html/_modules/pyFTS/models/seasonal/common.html b/docs/build/html/_modules/pyFTS/models/seasonal/common.html index af960ab..13ba11d 100644 --- a/docs/build/html/_modules/pyFTS/models/seasonal/common.html +++ b/docs/build/html/_modules/pyFTS/models/seasonal/common.html @@ -77,12 +77,18 @@ from enum import Enum from pyFTS.common import FuzzySet, Membership from pyFTS.partitioners import partitioner, Grid -from datetime import date as dt - +from datetime import date as dt, datetime as dtm
    [docs]class DateTime(Enum): + """ + Data and Time granularity for time granularity and seasonality identification + """ year = 1 + half = 2 # six months + third = 3 # four months + quarter = 4 # three months + sixth = 6 # two months month = 12 day_of_month = 30 day_of_year = 364 @@ -104,11 +110,15 @@ second_of_day = 86400
    -
    [docs]def strip_datepart(date, date_part): +
    [docs]def strip_datepart(date, date_part, mask=''): + if isinstance(date, str): + date = dtm.strptime(date, mask) if date_part == DateTime.year: tmp = date.year elif date_part == DateTime.month: tmp = date.month + elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth): + tmp = (date.month // date_part.value) + 1 elif date_part == DateTime.day_of_year: tmp = date.timetuple().tm_yday elif date_part == DateTime.day_of_month: diff --git a/docs/build/html/_modules/pyFTS/models/seasonal/partitioner.html b/docs/build/html/_modules/pyFTS/models/seasonal/partitioner.html index 701d6e1..afeaa93 100644 --- a/docs/build/html/_modules/pyFTS/models/seasonal/partitioner.html +++ b/docs/build/html/_modules/pyFTS/models/seasonal/partitioner.html @@ -94,6 +94,10 @@ super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs) self.season = kwargs.get('seasonality', DateTime.day_of_year) + '''Seasonality, a pyFTS.models.seasonal.common.DateTime object''' + self.mask = kwargs.get('mask', '%Y-%m-%d %H:%M:%S') + '''A string with datetime formating mask''' + data = kwargs.get('data', None) if self.season == DateTime.year: ndata = [strip_datepart(k, self.season) for k in data] @@ -114,7 +118,7 @@ self.ordered_sets = FS.set_ordered(self.sets) if self.type == 'seasonal': - self.extractor = lambda x: strip_datepart(x, self.season) + self.extractor = lambda x: strip_datepart(x, self.season, self.mask)
    [docs] def build(self, data): sets = {} @@ -124,6 +128,14 @@ if self.season == DateTime.year: dlen = (self.max - self.min) partlen = dlen / self.partitions + elif self.season == DateTime.day_of_week: + self.min, self.max, partlen, pl2 = 0, 7, 1, 1 + elif self.season == DateTime.hour: + self.min, self.max, partlen, pl2 = 0, 24, 1, 1 + elif self.season == DateTime.month: + self.min, self.max, partlen, pl2 = 1, 13, 1, 1 + elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth): + self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1 else: partlen = self.season.value / self.partitions pl2 = partlen / 2 diff --git a/docs/build/html/_modules/pyFTS/partitioners/CMeans.html b/docs/build/html/_modules/pyFTS/partitioners/CMeans.html index 9a19ca2..865045a 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/CMeans.html +++ b/docs/build/html/_modules/pyFTS/partitioners/CMeans.html @@ -157,6 +157,9 @@
    [docs] def build(self, data): sets = {} + + kwargs = {'type': self.type, 'variable': self.variable} + centroides = c_means(self.partitions, data, 1) centroides.append(self.max) centroides.append(self.min) @@ -166,7 +169,7 @@ _name = self.get_name(c) sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, [round(centroides[c - 1], 3), round(centroides[c], 3), round(centroides[c + 1], 3)], - round(centroides[c], 3)) + round(centroides[c], 3), **kwargs) return sets
    diff --git a/docs/build/html/_modules/pyFTS/partitioners/Entropy.html b/docs/build/html/_modules/pyFTS/partitioners/Entropy.html index d685e5a..27e65f3 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/Entropy.html +++ b/docs/build/html/_modules/pyFTS/partitioners/Entropy.html @@ -161,23 +161,25 @@
    [docs] def build(self, data): sets = {} + kwargs = {'type': self.type, 'variable': self.variable} + partitions = bestSplit(data, self.partitions) partitions.append(self.min) partitions.append(self.max) partitions = list(set(partitions)) partitions.sort() - for c in np.arange(1, len(partitions) - 1): - _name = self.get_name(c) + for c in np.arange(1, len(partitions)-1): + _name = self.get_name(c-1) if self.membership_function == Membership.trimf: sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, - [partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c]) + [partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c], **kwargs) elif self.membership_function == Membership.trapmf: b1 = (partitions[c] - partitions[c - 1])/2 b2 = (partitions[c + 1] - partitions[c]) / 2 sets[_name] = FuzzySet.FuzzySet(_name, Membership.trapmf, [partitions[c - 1], partitions[c] - b1, partitions[c] + b2, partitions[c + 1]], - partitions[c]) + partitions[c], **kwargs) return sets
    diff --git a/docs/build/html/_modules/pyFTS/partitioners/FCM.html b/docs/build/html/_modules/pyFTS/partitioners/FCM.html index e92b525..2a531b1 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/FCM.html +++ b/docs/build/html/_modules/pyFTS/partitioners/FCM.html @@ -187,6 +187,8 @@
    [docs] def build(self, data): sets = {} + kwargs = {'type': self.type, 'variable': self.variable} + centroids = fuzzy_cmeans(self.partitions, data, 1, 2) centroids.append(self.max) centroids.append(self.min) @@ -198,14 +200,14 @@ sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, [round(centroids[c - 1], 3), round(centroids[c], 3), round(centroids[c + 1], 3)], - round(centroids[c], 3)) + round(centroids[c], 3), **kwargs) elif self.membership_function == Membership.trapmf: q1 = (round(centroids[c], 3) - round(centroids[c - 1], 3)) / 2 q2 = (round(centroids[c + 1], 3) - round(centroids[c], 3)) / 2 sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, [round(centroids[c - 1], 3), round(centroids[c], 3) - q1, round(centroids[c], 3) + q2, round(centroids[c + 1], 3)], - round(centroids[c], 3)) + round(centroids[c], 3), **kwargs) return sets
    diff --git a/docs/build/html/_modules/pyFTS/partitioners/Huarng.html b/docs/build/html/_modules/pyFTS/partitioners/Huarng.html index 6000319..fd45d9e 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/Huarng.html +++ b/docs/build/html/_modules/pyFTS/partitioners/Huarng.html @@ -106,6 +106,8 @@ sets = {} + kwargs = {'type': self.type, 'variable': self.variable} + dlen = self.max - self.min npart = math.ceil(dlen / base) partition = math.ceil(self.min) @@ -113,14 +115,14 @@ _name = self.get_name(c) if self.membership_function == Membership.trimf: sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, - [partition - base, partition, partition + base], partition) + [partition - base, partition, partition + base], partition, **kwargs) elif self.membership_function == Membership.gaussmf: sets[_name] = FuzzySet.FuzzySet(_name, Membership.gaussmf, [partition, base/2], partition) elif self.membership_function == Membership.trapmf: sets[_name] = FuzzySet.FuzzySet(_name, Membership.trapmf, [partition - base, partition - (base/2), - partition + (base / 2), partition + base], partition) + partition + (base / 2), partition + base], partition, **kwargs) partition += base diff --git a/docs/build/html/_modules/pyFTS/partitioners/Singleton.html b/docs/build/html/_modules/pyFTS/partitioners/Singleton.html index 0d59d38..59c284f 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/Singleton.html +++ b/docs/build/html/_modules/pyFTS/partitioners/Singleton.html @@ -94,11 +94,11 @@
    [docs] def build(self, data): sets = {} + kwargs = {'type': self.type, 'variable': self.variable} + for count, instance in enumerate(data): _name = self.get_name(count) - sets[_name] = FuzzySet.FuzzySet(_name, Membership.singleton, [instance], instance) - - kwargs = {'type': self.type, 'variable': self.variable} + sets[_name] = FuzzySet.FuzzySet(_name, Membership.singleton, [instance], instance, **kwargs) return sets
    diff --git a/docs/build/html/_modules/pyFTS/partitioners/partitioner.html b/docs/build/html/_modules/pyFTS/partitioners/partitioner.html index 1c61b54..3b9d77b 100644 --- a/docs/build/html/_modules/pyFTS/partitioners/partitioner.html +++ b/docs/build/html/_modules/pyFTS/partitioners/partitioner.html @@ -139,7 +139,7 @@ self.sets = self.build(ndata) if self.ordered_sets is None and self.setnames is not None: - self.ordered_sets = self.setnames + self.ordered_sets = self.setnames[:len(self.sets)] else: self.ordered_sets = FuzzySet.set_ordered(self.sets) diff --git a/docs/build/html/_sources/modules.rst.txt b/docs/build/html/_sources/modules.rst.txt index 9ae9899..23b48a8 100644 --- a/docs/build/html/_sources/modules.rst.txt +++ b/docs/build/html/_sources/modules.rst.txt @@ -2,6 +2,6 @@ pyFTS ===== .. toctree:: - :maxdepth: 4 + :maxdepth: 5 pyFTS diff --git a/docs/build/html/_sources/pyFTS.data.rst.txt b/docs/build/html/_sources/pyFTS.data.rst.txt index 554897c..467ae85 100644 --- a/docs/build/html/_sources/pyFTS.data.rst.txt +++ b/docs/build/html/_sources/pyFTS.data.rst.txt @@ -29,6 +29,14 @@ pyFTS.data.common module Datasets -------- +Artificial and synthetic data generators +---------------------------------------- + +.. automodule:: pyFTS.data.artificial + :members: + :undoc-members: + :show-inheritance: + AirPassengers dataset ------------------------------- @@ -143,14 +151,6 @@ TAIEX dataset :undoc-members: :show-inheritance: -pyFTS.data.artificial module ----------------------------- - -.. automodule:: pyFTS.data.artificial - :members: - :undoc-members: - :show-inheritance: - Henon chaotic time series ------------------------- diff --git a/docs/build/html/_sources/pyFTS.distributed.rst.txt b/docs/build/html/_sources/pyFTS.distributed.rst.txt new file mode 100644 index 0000000..548504b --- /dev/null +++ b/docs/build/html/_sources/pyFTS.distributed.rst.txt @@ -0,0 +1,32 @@ +pyFTS.distributed package +========================= + +Module contents +--------------- + +.. automodule:: pyFTS.distributed + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +pyFTS.distributed.dispy module +------------------------------ + +.. automodule:: pyFTS.distributed.dispy + :members: + :undoc-members: + :show-inheritance: + +pyFTS.distributed.spark module +---------------------------------- + +.. automodule:: pyFTS.distributed.spark + :members: + :undoc-members: + :show-inheritance: + + + diff --git a/docs/build/html/_sources/pyFTS.hyperparam.rst.txt b/docs/build/html/_sources/pyFTS.hyperparam.rst.txt index 14d6d15..54b05b8 100644 --- a/docs/build/html/_sources/pyFTS.hyperparam.rst.txt +++ b/docs/build/html/_sources/pyFTS.hyperparam.rst.txt @@ -27,5 +27,13 @@ pyFTS.hyperparam.GridSearch module :members: :undoc-members: :show-inheritance: + +pyFTS.hyperparam.Evolutionary module +------------------------------------ + +.. automodule:: pyFTS.hyperparam.Evolutionary + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/pyFTS.models.incremental.rst.txt b/docs/build/html/_sources/pyFTS.models.incremental.rst.txt index ae8b9ba..50f750a 100644 --- a/docs/build/html/_sources/pyFTS.models.incremental.rst.txt +++ b/docs/build/html/_sources/pyFTS.models.incremental.rst.txt @@ -1,5 +1,5 @@ pyFTS.models.incremental package -============================= +================================ Module contents --------------- @@ -13,10 +13,19 @@ Module contents Submodules ---------- -pyFTS.models.incremental.Retrainer module -------------------------------------- +pyFTS.models.incremental.TimeVariant module +------------------------------------------- -.. automodule:: pyFTS.models.incremental.Retrainer +.. automodule:: pyFTS.models.incremental.TimeVariant + :members: + :undoc-members: + :show-inheritance: + + +pyFTS.models.incremental.IncrementalEnsemble module +--------------------------------------------------- + +.. automodule:: pyFTS.models.incremental.IncrementalEnsemble :members: :undoc-members: :show-inheritance: diff --git a/docs/build/html/_sources/pyFTS.rst.txt b/docs/build/html/_sources/pyFTS.rst.txt index c3fd9db..0e45061 100644 --- a/docs/build/html/_sources/pyFTS.rst.txt +++ b/docs/build/html/_sources/pyFTS.rst.txt @@ -9,6 +9,7 @@ Subpackages pyFTS.benchmarks pyFTS.common pyFTS.data + pyFTS.distributed pyFTS.hyperparam pyFTS.models pyFTS.partitioners diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index c84c0a8..4470ab1 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -123,7 +123,11 @@
  • alpha_cut (pyFTS.common.fts.FTS attribute) + +
  • analytic_tabular_dataframe() (in module pyFTS.benchmarks.Util)
  • analytical_data_columns() (in module pyFTS.benchmarks.Util) @@ -235,7 +239,7 @@
  • auto_update (pyFTS.common.fts.FTS attribute)
  • averageloglikelihood() (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution method) @@ -248,8 +252,12 @@ + - - + - +
  • generate_lhs_flrs() (pyFTS.models.multivariate.mvfts.MVFTS method) +
  • +
  • generate_linear_periodic_gaussian() (in module pyFTS.data.artificial)
  • generate_non_recurrent_flrs() (in module pyFTS.common.FLR)
  • generate_recurrent_flrs() (in module pyFTS.common.FLR) +
  • +
  • generate_sinoidal_periodic_gaussian() (in module pyFTS.data.artificial)
  • generate_uniform_linear() (in module pyFTS.data.artificial)
  • @@ -793,6 +825,8 @@
  • get_benchmark_point_methods() (in module pyFTS.benchmarks.benchmarks)
  • get_benchmark_probabilistic_methods() (in module pyFTS.benchmarks.benchmarks) +
  • +
  • get_clustered_partitioner() (in module pyFTS.distributed.spark)
  • get_data() (in module pyFTS.data.AirPassengers) @@ -989,6 +1023,8 @@
  • get_models_forecasts() (pyFTS.models.ensemble.ensemble.EnsembleFTS method)
  • get_name() (pyFTS.partitioners.partitioner.Partitioner method) +
  • +
  • get_partitioner() (in module pyFTS.distributed.spark)
  • get_point() (pyFTS.models.ensemble.ensemble.EnsembleFTS method)
  • @@ -1040,6 +1076,8 @@
  • (pyFTS.models.pwfts.ProbabilisticWeightedFTS method)
  • +
  • get_variables() (in module pyFTS.distributed.spark) +
  • getChildren() (pyFTS.common.tree.FLRGTreeNode method)
  • getStr() (pyFTS.common.tree.FLRGTreeNode method) @@ -1055,6 +1093,8 @@ + -
    @@ -1233,7 +1277,11 @@
  • mape_interval() (in module pyFTS.benchmarks.Measures)
  • mask (pyFTS.models.multivariate.variable.Variable attribute) + +
  • max_lag (pyFTS.common.fts.FTS attribute)
  • membership() (in module pyFTS.partitioners.FCM) @@ -1251,11 +1299,11 @@
  • membership_function (pyFTS.partitioners.partitioner.Partitioner attribute)
  • merge() (pyFTS.common.fts.FTS method) -
  • -
  • mf (pyFTS.common.FuzzySet.FuzzySet attribute)
  • set_ordered() (in module pyFTS.common.FuzzySet)
  • - - +
  • Transformation (class in pyFTS.common.Transformations)
  • -
  • transformation (pyFTS.partitioners.partitioner.Partitioner attribute) +
  • transformation (pyFTS.models.multivariate.variable.Variable attribute) + +
  • transformations (pyFTS.common.fts.FTS attribute)
  • transformations_param (pyFTS.common.fts.FTS attribute) @@ -2074,8 +2172,12 @@
  • window_index() (in module pyFTS.models.nonstationary.common)
  • -
  • window_length (pyFTS.models.incremental.Retrainer.Retrainer attribute) +
  • window_length (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) + +
  • winkler_mean() (in module pyFTS.benchmarks.Measures)
  • winkler_score() (in module pyFTS.benchmarks.Measures) diff --git a/docs/build/html/modules.html b/docs/build/html/modules.html index e9dbb87..3b22708 100644 --- a/docs/build/html/modules.html +++ b/docs/build/html/modules.html @@ -130,6 +130,7 @@
  • Submodules
  • pyFTS.data.common module
  • Datasets
  • +
  • Artificial and synthetic data generators
  • AirPassengers dataset
  • Bitcoin dataset
  • DowJones dataset
  • @@ -144,7 +145,6 @@
  • SONDA dataset
  • S&P 500 dataset
  • TAIEX dataset
  • -
  • pyFTS.data.artificial module
  • Henon chaotic time series
  • Logistic_map chaotic time series
  • Lorentz chaotic time series
  • @@ -153,16 +153,31 @@
  • Sunspots dataset
  • +
  • pyFTS.distributed package +
  • pyFTS.hyperparam package
  • pyFTS.models package diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 051947e..4b6dc12 100644 Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ diff --git a/docs/build/html/py-modindex.html b/docs/build/html/py-modindex.html index 85942a4..d76a9c0 100644 --- a/docs/build/html/py-modindex.html +++ b/docs/build/html/py-modindex.html @@ -310,6 +310,16 @@     pyFTS.data.TAIEX + + +     + pyFTS.distributed + + + +     + pyFTS.distributed.spark +     @@ -378,7 +388,12 @@     - pyFTS.models.incremental.Retrainer + pyFTS.models.incremental.IncrementalEnsemble + + + +     + pyFTS.models.incremental.TimeVariant diff --git a/docs/build/html/pyFTS.common.html b/docs/build/html/pyFTS.common.html index e2c7ebc..9f29b92 100644 --- a/docs/build/html/pyFTS.common.html +++ b/docs/build/html/pyFTS.common.html @@ -1811,6 +1811,12 @@ a monovariate method, default: False

    A boolean value indicating if the model support multivariate time series (Pandas DataFrame), default: False

    +
    +
    +is_wrapper = None¶
    +

    Indicates that this model is a wrapper for other(s) method(s)

    +
    +
    lags = None¶
    @@ -1907,8 +1913,9 @@ needed to forecast a single step ahead

  • nodes – a list with the dispy cluster nodes addresses
  • explain – try to explain, step by step, the one-step-ahead point forecasting result given the input data.
  • generators – for multivariate methods on multi step ahead forecasting, generators is a dict where the keys -are the variables names (except the target_variable) and the values are lambda functions that -accept one value (the actual value of the variable) and return the next value.
  • +are the dataframe columun names (except the target_variable) and the values are lambda functions that +accept one value (the actual value of the variable) and return the next value or trained FTS +models that accept the actual values and forecast new ones.
  • diff --git a/docs/build/html/pyFTS.data.html b/docs/build/html/pyFTS.data.html index 59b25f2..c6fde7a 100644 --- a/docs/build/html/pyFTS.data.html +++ b/docs/build/html/pyFTS.data.html @@ -28,7 +28,7 @@ - + + + + + +
    +
    +
    +
    + +
    +

    pyFTS.distributed package¶

    +
    +

    Module contents¶

    +
    +
    +

    Submodules¶

    +
    +
    +

    pyFTS.distributed.dispy module¶

    +
    +
    +

    pyFTS.distributed.spark module¶

    +
    +
    +pyFTS.distributed.spark.create_multivariate_model(**parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.create_spark_conf(**kwargs)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.create_univariate_model(**parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.distributed_predict(data, model, **kwargs)[source]¶
    +
    +++ + + + + + +
    Parameters:
      +
    • model –
    • +
    • data –
    • +
    • url –
    • +
    • app –
    • +
    +
    Returns:

    +
    +
    + +
    +
    +pyFTS.distributed.spark.distributed_train(model, data, **kwargs)[source]¶
    +
    +++ + + + + + +
    Parameters:
      +
    • model –
    • +
    • data –
    • +
    • url –
    • +
    • app –
    • +
    +
    Returns:

    +
    +
    + +
    +
    +pyFTS.distributed.spark.get_clustered_partitioner(explanatory_variables, target_variable, **parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.get_partitioner(shared_partitioner, type='common', variables=[])[source]¶
    +
    +++ + + + + + +
    Parameters:part –
    Returns:
    +
    + +
    +
    +pyFTS.distributed.spark.get_variables(**parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.share_parameters(model, context, data)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.slave_forecast_multivariate(data, **parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.slave_forecast_univariate(data, **parameters)[source]¶
    +
    +++ + + + + + +
    Parameters:data –
    Returns:
    +
    + +
    +
    +pyFTS.distributed.spark.slave_train_multivariate(data, **parameters)[source]¶
    +
    + +
    +
    +pyFTS.distributed.spark.slave_train_univariate(data, **parameters)[source]¶
    +
    +++ + + + + + +
    Parameters:data –
    Returns:
    +
    + +
    +
    + + +
    +
    +
    +
    +
    + + + + \ No newline at end of file diff --git a/docs/build/html/pyFTS.html b/docs/build/html/pyFTS.html index 89ede86..162aee5 100644 --- a/docs/build/html/pyFTS.html +++ b/docs/build/html/pyFTS.html @@ -142,6 +142,7 @@
  • Submodules
  • pyFTS.data.common module
  • Datasets
  • +
  • Artificial and synthetic data generators
  • AirPassengers dataset
  • Bitcoin dataset
  • DowJones dataset
  • @@ -156,7 +157,6 @@
  • SONDA dataset
  • S&P 500 dataset
  • TAIEX dataset
  • -
  • pyFTS.data.artificial module
  • Henon chaotic time series
  • Logistic_map chaotic time series
  • Lorentz chaotic time series
  • @@ -165,11 +165,19 @@
  • Sunspots dataset
  • +
  • pyFTS.distributed package +
  • pyFTS.hyperparam package
  • pyFTS.models package