Refactoring to remove FTS.sets attribure; Refactoring to move Partitioner.extract to a class method; Bugfixes in distributed fit and predict
This commit is contained in:
parent
f2bf2b5d87
commit
354a3131c9
@ -824,6 +824,17 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def common_process_time_jobs(conn, data, job):
|
||||||
|
dta = deepcopy(data)
|
||||||
|
dta.append(job['steps'])
|
||||||
|
dta.append(job['method'])
|
||||||
|
for key in ["time"]:
|
||||||
|
if key in job:
|
||||||
|
data2 = deepcopy(dta)
|
||||||
|
data2.extend([key, job[key]])
|
||||||
|
bUtil.insert_benchmark(data2, conn)
|
||||||
|
|
||||||
|
|
||||||
def common_process_point_jobs(conn, data, job):
|
def common_process_point_jobs(conn, data, job):
|
||||||
dta = deepcopy(data)
|
dta = deepcopy(data)
|
||||||
dta.append(job['steps'])
|
dta.append(job['steps'])
|
||||||
@ -1416,3 +1427,50 @@ def pftsExploreOrderAndPartitions(data,save=False, file=None):
|
|||||||
|
|
||||||
cUtil.show_and_save_image(fig, file, save)
|
cUtil.show_and_save_image(fig, file, save)
|
||||||
|
|
||||||
|
|
||||||
|
def train_test_time(data, windowsize, train=0.8, **kwargs):
|
||||||
|
import time
|
||||||
|
|
||||||
|
tag = __pop('tag', None, kwargs)
|
||||||
|
steps = __pop('steps', 0, kwargs)
|
||||||
|
dataset = __pop('dataset', None, kwargs)
|
||||||
|
|
||||||
|
partitions = __pop('partitions', 10, kwargs)
|
||||||
|
|
||||||
|
fts_methods = __pop('methods', [], kwargs)
|
||||||
|
|
||||||
|
file = kwargs.get('file', "benchmarks.db")
|
||||||
|
|
||||||
|
inc = __pop("inc", 0.1, kwargs)
|
||||||
|
|
||||||
|
conn = bUtil.open_benchmark_db(file)
|
||||||
|
|
||||||
|
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||||
|
partitioner = Grid.GridPartitioner(data=train, npart=partitions)
|
||||||
|
for id, fts_method in enumerate(fts_methods):
|
||||||
|
print(dataset, fts_method, ct)
|
||||||
|
times = []
|
||||||
|
model = fts_method(partitioner = partitioner, **kwargs)
|
||||||
|
_start = time.time()
|
||||||
|
model.fit(train, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times.append( _end - _start )
|
||||||
|
_start = time.time()
|
||||||
|
model.predict(train, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times.append(_end - _start)
|
||||||
|
for ct, method in enumerate(['train','test']):
|
||||||
|
job = {
|
||||||
|
'steps': steps, 'method': method, 'time': times[ct],
|
||||||
|
'model': model.shortname, 'transformation': None,
|
||||||
|
'order': model.order, 'partitioner': partitioner.name,
|
||||||
|
'partitions': partitions, 'size': len(model)
|
||||||
|
}
|
||||||
|
|
||||||
|
data = bUtil.process_common_data2(dataset, tag, 'train', job)
|
||||||
|
common_process_time_jobs(conn, data, job)
|
||||||
|
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,8 +13,6 @@ from pyFTS.probabilistic import ProbabilityDistribution
|
|||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
|
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
|
||||||
save=False, file=None, tam=[20, 5], resolution=None,
|
save=False, file=None, tam=[20, 5], resolution=None,
|
||||||
cmap='Blues', linewidth=1.5):
|
cmap='Blues', linewidth=1.5):
|
||||||
@ -126,8 +124,9 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
|
|||||||
handles0, labels0 = ax.get_legend_handles_labels()
|
handles0, labels0 = ax.get_legend_handles_labels()
|
||||||
ax.legend(handles0, labels0)
|
ax.legend(handles0, labels0)
|
||||||
|
|
||||||
|
|
||||||
def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=None):
|
def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=None):
|
||||||
'''
|
"""
|
||||||
Plot forecasted ProbabilityDistribution objects on a matplotlib axis
|
Plot forecasted ProbabilityDistribution objects on a matplotlib axis
|
||||||
|
|
||||||
:param ax: matplotlib axis
|
:param ax: matplotlib axis
|
||||||
@ -137,7 +136,7 @@ def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=
|
|||||||
:param time_from: starting time (on x axis) to begin the plots
|
:param time_from: starting time (on x axis) to begin the plots
|
||||||
:param reference_data:
|
:param reference_data:
|
||||||
:return:
|
:return:
|
||||||
'''
|
"""
|
||||||
from matplotlib.patches import Rectangle
|
from matplotlib.patches import Rectangle
|
||||||
from matplotlib.collections import PatchCollection
|
from matplotlib.collections import PatchCollection
|
||||||
patches = []
|
patches = []
|
||||||
@ -163,7 +162,7 @@ def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=
|
|||||||
|
|
||||||
|
|
||||||
def plot_distribution2(probabilitydist, data, **kwargs):
|
def plot_distribution2(probabilitydist, data, **kwargs):
|
||||||
'''
|
"""
|
||||||
Plot distributions over the time (x-axis)
|
Plot distributions over the time (x-axis)
|
||||||
|
|
||||||
:param probabilitydist: the forecasted probability distributions to plot
|
:param probabilitydist: the forecasted probability distributions to plot
|
||||||
@ -173,7 +172,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
|||||||
:keyword cmap: a matplotlib colormap name, the default value is 'Blues'
|
:keyword cmap: a matplotlib colormap name, the default value is 'Blues'
|
||||||
:keyword quantiles: the list of quantiles intervals to plot, e. g. [.05, .25, .75, .95]
|
:keyword quantiles: the list of quantiles intervals to plot, e. g. [.05, .25, .75, .95]
|
||||||
:keyword median: a boolean value indicating if the median value will be plot.
|
:keyword median: a boolean value indicating if the median value will be plot.
|
||||||
'''
|
"""
|
||||||
import matplotlib.colorbar as cbar
|
import matplotlib.colorbar as cbar
|
||||||
import matplotlib.cm as cm
|
import matplotlib.cm as cm
|
||||||
|
|
||||||
@ -225,7 +224,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
|
def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
|
||||||
'''
|
"""
|
||||||
Plot forecasted intervals on matplotlib
|
Plot forecasted intervals on matplotlib
|
||||||
|
|
||||||
:param axis: matplotlib axis
|
:param axis: matplotlib axis
|
||||||
@ -237,7 +236,7 @@ def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False
|
|||||||
:param ls: matplotlib line style
|
:param ls: matplotlib line style
|
||||||
:param linewidth: matplotlib width
|
:param linewidth: matplotlib width
|
||||||
:return:
|
:return:
|
||||||
'''
|
"""
|
||||||
lower = [kk[0] for kk in intervals]
|
lower = [kk[0] for kk in intervals]
|
||||||
upper = [kk[1] for kk in intervals]
|
upper = [kk[1] for kk in intervals]
|
||||||
mi = min(lower) * 0.95
|
mi = min(lower) * 0.95
|
||||||
@ -252,7 +251,7 @@ def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False
|
|||||||
|
|
||||||
|
|
||||||
def plot_interval2(intervals, data, **kwargs):
|
def plot_interval2(intervals, data, **kwargs):
|
||||||
'''
|
"""
|
||||||
Plot forecasted intervals on matplotlib
|
Plot forecasted intervals on matplotlib
|
||||||
|
|
||||||
:param intervals: list of forecasted intervals
|
:param intervals: list of forecasted intervals
|
||||||
@ -263,7 +262,7 @@ def plot_interval2(intervals, data, **kwargs):
|
|||||||
:keyword typeonlegend:
|
:keyword typeonlegend:
|
||||||
:keyword ls: matplotlib line style
|
:keyword ls: matplotlib line style
|
||||||
:keyword linewidth: matplotlib width
|
:keyword linewidth: matplotlib width
|
||||||
'''
|
"""
|
||||||
|
|
||||||
l = len(intervals)
|
l = len(intervals)
|
||||||
|
|
||||||
@ -296,7 +295,7 @@ def plot_interval2(intervals, data, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
|
def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
|
||||||
'''
|
"""
|
||||||
Plot the FLRG rules of a FTS model on a matplotlib axis
|
Plot the FLRG rules of a FTS model on a matplotlib axis
|
||||||
|
|
||||||
:param model: FTS model
|
:param model: FTS model
|
||||||
@ -305,7 +304,7 @@ def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
|
|||||||
:param rules_by_axis: number of rules plotted by column
|
:param rules_by_axis: number of rules plotted by column
|
||||||
:param columns: number of columns
|
:param columns: number of columns
|
||||||
:return:
|
:return:
|
||||||
'''
|
"""
|
||||||
if axis is None and rules_by_axis is None:
|
if axis is None and rules_by_axis is None:
|
||||||
rows = 1
|
rows = 1
|
||||||
elif axis is None and rules_by_axis is not None:
|
elif axis is None and rules_by_axis is not None:
|
||||||
@ -412,7 +411,6 @@ def uniquefilename(name):
|
|||||||
return name + str(current_milli_time())
|
return name + str(current_milli_time())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def show_and_save_image(fig, file, flag, lgd=None):
|
def show_and_save_image(fig, file, flag, lgd=None):
|
||||||
"""
|
"""
|
||||||
Show and image and save on file
|
Show and image and save on file
|
||||||
|
@ -11,9 +11,6 @@ class FTS(object):
|
|||||||
"""
|
"""
|
||||||
Create a Fuzzy Time Series model
|
Create a Fuzzy Time Series model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.sets = {}
|
|
||||||
"""The list of fuzzy sets used on this model"""
|
|
||||||
self.flrgs = {}
|
self.flrgs = {}
|
||||||
"""The list of Fuzzy Logical Relationship Groups - FLRG"""
|
"""The list of Fuzzy Logical Relationship Groups - FLRG"""
|
||||||
self.order = kwargs.get('order',1)
|
self.order = kwargs.get('order',1)
|
||||||
@ -83,8 +80,8 @@ class FTS(object):
|
|||||||
"""
|
"""
|
||||||
best = {"fuzzyset": "", "membership": 0.0}
|
best = {"fuzzyset": "", "membership": 0.0}
|
||||||
|
|
||||||
for f in self.sets:
|
for f in self.partitioner.sets:
|
||||||
fset = self.sets[f]
|
fset = self.partitioner.sets[f]
|
||||||
if best["membership"] <= fset.membership(data):
|
if best["membership"] <= fset.membership(data):
|
||||||
best["fuzzyset"] = fset.name
|
best["fuzzyset"] = fset.name
|
||||||
best["membership"] = fset.membership(data)
|
best["membership"] = fset.membership(data)
|
||||||
@ -129,13 +126,13 @@ class FTS(object):
|
|||||||
else:
|
else:
|
||||||
distributed = False
|
distributed = False
|
||||||
|
|
||||||
if distributed is None or distributed == False:
|
|
||||||
|
|
||||||
if 'type' in kwargs:
|
if 'type' in kwargs:
|
||||||
type = kwargs.pop("type")
|
type = kwargs.pop("type")
|
||||||
else:
|
else:
|
||||||
type = 'point'
|
type = 'point'
|
||||||
|
|
||||||
|
if distributed is None or distributed == False:
|
||||||
|
|
||||||
steps_ahead = kwargs.get("steps_ahead", None)
|
steps_ahead = kwargs.get("steps_ahead", None)
|
||||||
|
|
||||||
if steps_ahead == None or steps_ahead == 1:
|
if steps_ahead == None or steps_ahead == 1:
|
||||||
@ -321,25 +318,19 @@ class FTS(object):
|
|||||||
self.original_min = np.nanmin(data)
|
self.original_min = np.nanmin(data)
|
||||||
self.original_max = np.nanmax(data)
|
self.original_max = np.nanmax(data)
|
||||||
|
|
||||||
if 'sets' in kwargs:
|
|
||||||
self.sets = kwargs.pop('sets')
|
|
||||||
|
|
||||||
if 'partitioner' in kwargs:
|
if 'partitioner' in kwargs:
|
||||||
self.partitioner = kwargs.pop('partitioner')
|
self.partitioner = kwargs.pop('partitioner')
|
||||||
|
|
||||||
if not self.is_wrapper:
|
if not self.is_wrapper and not self.benchmark_only:
|
||||||
if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only and not self.is_multivariate:
|
if self.partitioner is None:
|
||||||
if self.partitioner is not None:
|
raise Exception("Fuzzy sets were not provided for the model. Use 'partitioner' parameter. ")
|
||||||
self.sets = self.partitioner.sets
|
|
||||||
else:
|
|
||||||
raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ")
|
|
||||||
|
|
||||||
if 'order' in kwargs:
|
if 'order' in kwargs:
|
||||||
self.order = kwargs.pop('order')
|
self.order = kwargs.pop('order')
|
||||||
|
|
||||||
dump = kwargs.get('dump', None)
|
dump = kwargs.get('dump', None)
|
||||||
|
|
||||||
num_batches = kwargs.get('num_batches', 1)
|
num_batches = kwargs.get('num_batches', 10)
|
||||||
|
|
||||||
save = kwargs.get('save_model', False) # save model on disk
|
save = kwargs.get('save_model', False) # save model on disk
|
||||||
|
|
||||||
@ -419,6 +410,8 @@ class FTS(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self.order = model.order
|
self.order = model.order
|
||||||
|
self.partitioner = model.partitioner
|
||||||
|
self.lags = model.lags
|
||||||
self.shortname = model.shortname
|
self.shortname = model.shortname
|
||||||
self.name = model.name
|
self.name = model.name
|
||||||
self.detail = model.detail
|
self.detail = model.detail
|
||||||
@ -434,8 +427,6 @@ class FTS(object):
|
|||||||
self.transformations_param = model.transformations_param
|
self.transformations_param = model.transformations_param
|
||||||
self.original_max = model.original_max
|
self.original_max = model.original_max
|
||||||
self.original_min = model.original_min
|
self.original_min = model.original_min
|
||||||
self.partitioner = model.partitioner
|
|
||||||
self.sets = model.sets
|
|
||||||
self.auto_update = model.auto_update
|
self.auto_update = model.auto_update
|
||||||
self.benchmark_only = model.benchmark_only
|
self.benchmark_only = model.benchmark_only
|
||||||
self.indexer = model.indexer
|
self.indexer = model.indexer
|
||||||
|
@ -4,6 +4,13 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
def start_dispy_cluster(method, nodes):
|
def start_dispy_cluster(method, nodes):
|
||||||
|
"""
|
||||||
|
Start a new Dispy cluster on 'nodes' to execute the method 'method'
|
||||||
|
|
||||||
|
:param method: function to be executed on each cluster node
|
||||||
|
:param nodes: list of node names or IP's.
|
||||||
|
:return: the dispy cluster instance and the http_server for monitoring
|
||||||
|
"""
|
||||||
|
|
||||||
cluster = dispy.JobCluster(method, nodes=nodes, loglevel=logging.DEBUG, ping_interval=1000)
|
cluster = dispy.JobCluster(method, nodes=nodes, loglevel=logging.DEBUG, ping_interval=1000)
|
||||||
|
|
||||||
@ -13,6 +20,13 @@ def start_dispy_cluster(method, nodes):
|
|||||||
|
|
||||||
|
|
||||||
def stop_dispy_cluster(cluster, http_server):
|
def stop_dispy_cluster(cluster, http_server):
|
||||||
|
"""
|
||||||
|
Stop a dispy cluster and http_server
|
||||||
|
|
||||||
|
:param cluster:
|
||||||
|
:param http_server:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
cluster.wait() # wait for all jobs to finish
|
cluster.wait() # wait for all jobs to finish
|
||||||
|
|
||||||
cluster.print_status()
|
cluster.print_status()
|
||||||
@ -21,7 +35,23 @@ def stop_dispy_cluster(cluster, http_server):
|
|||||||
cluster.close()
|
cluster.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_number_of_cpus(cluster):
|
||||||
|
cpus = 0
|
||||||
|
for dispy_node in cluster.status().nodes:
|
||||||
|
cpus += dispy_node.cpus
|
||||||
|
|
||||||
|
return cpus
|
||||||
|
|
||||||
|
|
||||||
def simple_model_train(model, data, parameters):
|
def simple_model_train(model, data, parameters):
|
||||||
|
"""
|
||||||
|
Cluster function that receives a FTS instance 'model' and train using the 'data' and 'parameters'
|
||||||
|
|
||||||
|
:param model: a FTS instance
|
||||||
|
:param data: training dataset
|
||||||
|
:param parameters: parameters for the training process
|
||||||
|
:return: the trained model
|
||||||
|
"""
|
||||||
model.train(data, **parameters)
|
model.train(data, **parameters)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
@ -38,7 +68,8 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches=
|
|||||||
|
|
||||||
cluster, http_server = start_dispy_cluster(train_method, nodes)
|
cluster, http_server = start_dispy_cluster(train_method, nodes)
|
||||||
|
|
||||||
print("[{0: %H:%M:%S}] Distrituted Train Started".format(datetime.datetime.now()))
|
print("[{0: %H:%M:%S}] Distrituted Train Started with {1} CPU's"
|
||||||
|
.format(datetime.datetime.now(), get_number_of_cpus(cluster)))
|
||||||
|
|
||||||
jobs = []
|
jobs = []
|
||||||
n = len(data)
|
n = len(data)
|
||||||
|
@ -10,8 +10,8 @@ import random
|
|||||||
from pyFTS.common import Util
|
from pyFTS.common import Util
|
||||||
from pyFTS.benchmarks import Measures
|
from pyFTS.benchmarks import Measures
|
||||||
from pyFTS.partitioners import Grid, Entropy # , Huarng
|
from pyFTS.partitioners import Grid, Entropy # , Huarng
|
||||||
from pyFTS.models import hofts
|
|
||||||
from pyFTS.common import Membership
|
from pyFTS.common import Membership
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
from pyFTS.hyperparam import Util as hUtil
|
from pyFTS.hyperparam import Util as hUtil
|
||||||
from pyFTS.distributed import dispy as dUtil
|
from pyFTS.distributed import dispy as dUtil
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ def initial_population(n):
|
|||||||
return pop
|
return pop
|
||||||
|
|
||||||
|
|
||||||
def phenotype(individual, train, fts_method=hofts.WeightedHighOrderFTS, parameters={}):
|
def phenotype(individual, train, fts_method, parameters={}):
|
||||||
"""
|
"""
|
||||||
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
||||||
|
|
||||||
@ -81,6 +81,8 @@ def phenotype(individual, train, fts_method=hofts.WeightedHighOrderFTS, paramete
|
|||||||
:param parameters: dict with model specific arguments for fit method.
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
:return: a fitted FTS model
|
:return: a fitted FTS model
|
||||||
"""
|
"""
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
|
||||||
if individual['mf'] == 1:
|
if individual['mf'] == 1:
|
||||||
mf = Membership.trimf
|
mf = Membership.trimf
|
||||||
elif individual['mf'] == 2:
|
elif individual['mf'] == 2:
|
||||||
@ -117,6 +119,7 @@ def evaluate(dataset, individual, **kwargs):
|
|||||||
:param parameters: dict with model specific arguments for fit method.
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
||||||
"""
|
"""
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
from pyFTS.common import Util
|
from pyFTS.common import Util
|
||||||
from pyFTS.benchmarks import Measures
|
from pyFTS.benchmarks import Measures
|
||||||
from pyFTS.hyperparam.Evolutionary import phenotype, __measures
|
from pyFTS.hyperparam.Evolutionary import phenotype, __measures
|
||||||
@ -306,7 +309,7 @@ def mutation(individual, **kwargs):
|
|||||||
return individual
|
return individual
|
||||||
|
|
||||||
|
|
||||||
def elitism(population, new_population):
|
def elitism(population, new_population, **kwargs):
|
||||||
"""
|
"""
|
||||||
Elitism operation, always select the best individual of the population and discard the worst
|
Elitism operation, always select the best individual of the population and discard the worst
|
||||||
|
|
||||||
@ -418,12 +421,12 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
|
|
||||||
# Selection
|
# Selection
|
||||||
for j in range(int(npop * psel)):
|
for j in range(int(npop * psel)):
|
||||||
new_population.append(selection_operator(population))
|
new_population.append(selection_operator(population, **kwargs))
|
||||||
|
|
||||||
# Crossover
|
# Crossover
|
||||||
new = []
|
new = []
|
||||||
for j in range(int(npop * pcross)):
|
for j in range(int(npop * pcross)):
|
||||||
new.append(crossover_operator(new_population))
|
new.append(crossover_operator(new_population, **kwargs))
|
||||||
|
|
||||||
new_population.extend(new)
|
new_population.extend(new)
|
||||||
|
|
||||||
@ -431,7 +434,7 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
for ct, individual in enumerate(new_population):
|
for ct, individual in enumerate(new_population):
|
||||||
rnd = random.uniform(0, 1)
|
rnd = random.uniform(0, 1)
|
||||||
if rnd < pmut:
|
if rnd < pmut:
|
||||||
new_population[ct] = mutation_operator(individual)
|
new_population[ct] = mutation_operator(individual, **kwargs)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
if collect_statistics:
|
if collect_statistics:
|
||||||
@ -472,7 +475,7 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
|
|
||||||
# Elitism
|
# Elitism
|
||||||
if _elitism:
|
if _elitism:
|
||||||
population = elitism_operator(population, new_population)
|
population = elitism_operator(population, new_population, **kwargs)
|
||||||
|
|
||||||
population = population[:npop]
|
population = population[:npop]
|
||||||
|
|
||||||
@ -503,22 +506,22 @@ def GeneticAlgorithm(dataset, **kwargs):
|
|||||||
return best, statistics
|
return best, statistics
|
||||||
|
|
||||||
|
|
||||||
def process_experiment(result, datasetname, conn):
|
def process_experiment(fts_method, result, datasetname, conn):
|
||||||
log_result(conn, datasetname, result['individual'])
|
log_result(conn, datasetname, fts_method, result['individual'])
|
||||||
persist_statistics(result['statistics'])
|
persist_statistics(datasetname, result['statistics'])
|
||||||
return result['individual']
|
return result['individual']
|
||||||
|
|
||||||
|
|
||||||
def persist_statistics(statistics):
|
def persist_statistics(datasetname, statistics):
|
||||||
import json
|
import json
|
||||||
with open('statistics{}.json'.format(time.time()), 'w') as file:
|
with open('statistics_{}.json'.format(datasetname), 'w') as file:
|
||||||
file.write(json.dumps(statistics))
|
file.write(json.dumps(statistics))
|
||||||
|
|
||||||
|
|
||||||
def log_result(conn, datasetname, result):
|
def log_result(conn, datasetname, fts_method, result):
|
||||||
metrics = ['rmse', 'size', 'time']
|
metrics = ['rmse', 'size', 'time']
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
record = (datasetname, 'Evolutive', fts_method, None, result['mf'],
|
||||||
result['order'], result['partitioner'], result['npart'],
|
result['order'], result['partitioner'], result['npart'],
|
||||||
result['alpha'], str(result['lags']), metric, result[metric])
|
result['alpha'], str(result['lags']), metric, result[metric])
|
||||||
|
|
||||||
@ -568,6 +571,9 @@ def execute(datasetname, dataset, **kwargs):
|
|||||||
|
|
||||||
distributed = kwargs.get('distributed', False)
|
distributed = kwargs.get('distributed', False)
|
||||||
|
|
||||||
|
fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||||
|
shortname = str(fts_method.__module__).split('.')[-1]
|
||||||
|
|
||||||
if distributed == 'dispy':
|
if distributed == 'dispy':
|
||||||
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||||
cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
|
cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
|
||||||
@ -583,7 +589,7 @@ def execute(datasetname, dataset, **kwargs):
|
|||||||
ret['time'] = end - start
|
ret['time'] = end - start
|
||||||
experiment = {'individual': ret, 'statistics': statistics}
|
experiment = {'individual': ret, 'statistics': statistics}
|
||||||
|
|
||||||
ret = process_experiment(experiment, datasetname, conn)
|
ret = process_experiment(shortname, experiment, datasetname, conn)
|
||||||
|
|
||||||
if distributed == 'dispy':
|
if distributed == 'dispy':
|
||||||
dUtil.stop_dispy_cluster(cluster, http_server)
|
dUtil.stop_dispy_cluster(cluster, http_server)
|
||||||
|
@ -64,7 +64,7 @@ class ConventionalFTS(fts.FTS):
|
|||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l):
|
||||||
|
|
||||||
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets)
|
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.partitioner.sets)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
||||||
@ -78,7 +78,7 @@ class ConventionalFTS(fts.FTS):
|
|||||||
else:
|
else:
|
||||||
_flrg = self.flrgs[actual.name]
|
_flrg = self.flrgs[actual.name]
|
||||||
|
|
||||||
mp = _flrg.get_midpoint(self.sets)
|
mp = _flrg.get_midpoint(self.partitioner.sets)
|
||||||
|
|
||||||
ret.append(mp)
|
ret.append(mp)
|
||||||
|
|
||||||
|
@ -33,9 +33,9 @@ class IntervalFTS(hofts.HighOrderFTS):
|
|||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
if flrg.get_key() in self.flrgs:
|
if flrg.get_key() in self.flrgs:
|
||||||
tmp = self.flrgs[flrg.get_key()]
|
tmp = self.flrgs[flrg.get_key()]
|
||||||
ret = tmp.get_upper(self.sets)
|
ret = tmp.get_upper(self.partitioner.sets)
|
||||||
else:
|
else:
|
||||||
ret = self.sets[flrg.LHS[-1]].upper
|
ret = self.partitioner.sets[flrg.LHS[-1]].upper
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_lower(self, flrg):
|
def get_lower(self, flrg):
|
||||||
@ -74,7 +74,7 @@ class IntervalFTS(hofts.HighOrderFTS):
|
|||||||
for flrg in flrgs:
|
for flrg in flrgs:
|
||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
|
|
||||||
mv = flrg.get_membership(sample, self.sets)
|
mv = flrg.get_membership(sample, self.partitioner.sets)
|
||||||
up.append(mv * self.get_upper(flrg))
|
up.append(mv * self.get_upper(flrg))
|
||||||
lo.append(mv * self.get_lower(flrg))
|
lo.append(mv * self.get_lower(flrg))
|
||||||
affected_flrgs_memberships.append(mv)
|
affected_flrgs_memberships.append(mv)
|
||||||
@ -119,9 +119,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
|||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
if flrg.get_key() in self.flrgs:
|
if flrg.get_key() in self.flrgs:
|
||||||
tmp = self.flrgs[flrg.get_key()]
|
tmp = self.flrgs[flrg.get_key()]
|
||||||
ret = tmp.get_upper(self.sets)
|
ret = tmp.get_upper(self.partitioner.sets)
|
||||||
else:
|
else:
|
||||||
ret = self.sets[flrg.LHS[-1]].upper
|
ret = self.partitioner.sets[flrg.LHS[-1]].upper
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_lower(self, flrg):
|
def get_lower(self, flrg):
|
||||||
@ -159,7 +159,7 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
|||||||
for flrg in flrgs:
|
for flrg in flrgs:
|
||||||
if len(flrg.LHS) > 0:
|
if len(flrg.LHS) > 0:
|
||||||
|
|
||||||
mv = flrg.get_membership(sample, self.sets)
|
mv = flrg.get_membership(sample, self.partitioner.sets)
|
||||||
up.append(mv * self.get_upper(flrg))
|
up.append(mv * self.get_upper(flrg))
|
||||||
lo.append(mv * self.get_lower(flrg))
|
lo.append(mv * self.get_lower(flrg))
|
||||||
affected_flrgs_memberships.append(mv)
|
affected_flrgs_memberships.append(mv)
|
||||||
|
@ -74,7 +74,7 @@ class ImprovedWeightedFTS(fts.FTS):
|
|||||||
if self.partitioner is not None:
|
if self.partitioner is not None:
|
||||||
ordered_sets = self.partitioner.ordered_sets
|
ordered_sets = self.partitioner.ordered_sets
|
||||||
else:
|
else:
|
||||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
ordered_sets = FuzzySet.set_ordered(self.partitioner.sets)
|
||||||
|
|
||||||
ndata = np.array(ndata)
|
ndata = np.array(ndata)
|
||||||
|
|
||||||
@ -84,7 +84,7 @@ class ImprovedWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l):
|
||||||
|
|
||||||
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
|
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.partitioner.sets, ordered_sets)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
||||||
@ -97,7 +97,7 @@ class ImprovedWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
flrg = self.flrgs[actual.name]
|
flrg = self.flrgs[actual.name]
|
||||||
mp = flrg.get_midpoints(self.sets)
|
mp = flrg.get_midpoints(self.partitioner.sets)
|
||||||
|
|
||||||
final = mp.dot(flrg.weights())
|
final = mp.dot(flrg.weights())
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ class ConditionalVarianceFTS(hofts.HighOrderFTS):
|
|||||||
self.variance_residual = 0.
|
self.variance_residual = 0.
|
||||||
self.mean_residual = 0.
|
self.mean_residual = 0.
|
||||||
self.memory_window = kwargs.get("memory_window",5)
|
self.memory_window = kwargs.get("memory_window",5)
|
||||||
|
self.sets = {}
|
||||||
|
|
||||||
def train(self, ndata, **kwargs):
|
def train(self, ndata, **kwargs):
|
||||||
|
|
||||||
|
@ -69,11 +69,11 @@ class NonStationaryFTS(fts.FTS):
|
|||||||
|
|
||||||
if self.method == 'unconditional':
|
if self.method == 'unconditional':
|
||||||
window_size = kwargs.get('parameters', 1)
|
window_size = kwargs.get('parameters', 1)
|
||||||
tmpdata = common.fuzzySeries(data, self.sets,
|
tmpdata = common.fuzzySeries(data, self.partitioner.sets,
|
||||||
self.partitioner.ordered_sets,
|
self.partitioner.ordered_sets,
|
||||||
window_size, method='fuzzy')
|
window_size, method='fuzzy')
|
||||||
else:
|
else:
|
||||||
tmpdata = common.fuzzySeries(data, self.sets,
|
tmpdata = common.fuzzySeries(data, self.partitioner.sets,
|
||||||
self.partitioner.ordered_sets,
|
self.partitioner.ordered_sets,
|
||||||
method='fuzzy', const_t=0)
|
method='fuzzy', const_t=0)
|
||||||
|
|
||||||
@ -190,9 +190,9 @@ class NonStationaryFTS(fts.FTS):
|
|||||||
ix = affected_sets[0][0]
|
ix = affected_sets[0][0]
|
||||||
aset = self.partitioner.ordered_sets[ix]
|
aset = self.partitioner.ordered_sets[ix]
|
||||||
if aset in self.flrgs:
|
if aset in self.flrgs:
|
||||||
numerator.append(self.flrgs[aset].get_midpoint(self.sets, perturb[ix]))
|
numerator.append(self.flrgs[aset].get_midpoint(self.partitioner.sets, perturb[ix]))
|
||||||
else:
|
else:
|
||||||
fuzzy_set = self.sets[aset]
|
fuzzy_set = self.partitioner.sets[aset]
|
||||||
numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
|
numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
|
||||||
denominator.append(1)
|
denominator.append(1)
|
||||||
else:
|
else:
|
||||||
@ -201,9 +201,9 @@ class NonStationaryFTS(fts.FTS):
|
|||||||
fs = self.partitioner.ordered_sets[ix]
|
fs = self.partitioner.ordered_sets[ix]
|
||||||
tdisp = perturb[ix]
|
tdisp = perturb[ix]
|
||||||
if fs in self.flrgs:
|
if fs in self.flrgs:
|
||||||
numerator.append(self.flrgs[fs].get_midpoint(self.sets, tdisp) * aset[1])
|
numerator.append(self.flrgs[fs].get_midpoint(self.partitioner.sets, tdisp) * aset[1])
|
||||||
else:
|
else:
|
||||||
fuzzy_set = self.sets[fs]
|
fuzzy_set = self.partitioner.sets[fs]
|
||||||
numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
|
numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
|
||||||
denominator.append(aset[1])
|
denominator.append(aset[1])
|
||||||
|
|
||||||
@ -241,9 +241,9 @@ class NonStationaryFTS(fts.FTS):
|
|||||||
|
|
||||||
tdisp = common.window_index(k + time_displacement, window_size)
|
tdisp = common.window_index(k + time_displacement, window_size)
|
||||||
|
|
||||||
affected_sets = [[self.sets[key], self.sets[key].membership(ndata[k], tdisp)]
|
affected_sets = [[self.partitioner.sets[key], self.partitioner.sets[key].membership(ndata[k], tdisp)]
|
||||||
for key in self.partitioner.ordered_sets
|
for key in self.partitioner.ordered_sets
|
||||||
if self.sets[key].membership(ndata[k], tdisp) > 0.0]
|
if self.partitioner.sets[key].membership(ndata[k], tdisp) > 0.0]
|
||||||
|
|
||||||
if len(affected_sets) == 0:
|
if len(affected_sets) == 0:
|
||||||
affected_sets.append([common.check_bounds(ndata[k], self.partitioner, tdisp), 1.0])
|
affected_sets.append([common.check_bounds(ndata[k], self.partitioner, tdisp), 1.0])
|
||||||
|
@ -78,7 +78,7 @@ class ExponentialyWeightedFTS(fts.FTS):
|
|||||||
if self.partitioner is not None:
|
if self.partitioner is not None:
|
||||||
ordered_sets = self.partitioner.ordered_sets
|
ordered_sets = self.partitioner.ordered_sets
|
||||||
else:
|
else:
|
||||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
ordered_sets = FuzzySet.set_ordered(self.partitioner.sets)
|
||||||
|
|
||||||
data = np.array(ndata)
|
data = np.array(ndata)
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class ExponentialyWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l):
|
||||||
|
|
||||||
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
|
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.partitioner.sets, ordered_sets)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
||||||
@ -101,7 +101,7 @@ class ExponentialyWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
flrg = self.flrgs[actual.name]
|
flrg = self.flrgs[actual.name]
|
||||||
mp = flrg.get_midpoints(self.sets)
|
mp = flrg.get_midpoints(self.partitioner.sets)
|
||||||
|
|
||||||
final = mp.dot(flrg.weights())
|
final = mp.dot(flrg.weights())
|
||||||
|
|
||||||
|
@ -44,8 +44,11 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
else:
|
else:
|
||||||
self.ordered_sets = FS.set_ordered(self.sets)
|
self.ordered_sets = FS.set_ordered(self.sets)
|
||||||
|
|
||||||
|
def extractor(self,x):
|
||||||
if self.type == 'seasonal':
|
if self.type == 'seasonal':
|
||||||
self.extractor = lambda x: strip_datepart(x, self.season, self.mask)
|
return strip_datepart(x, self.season, self.mask)
|
||||||
|
else:
|
||||||
|
return x
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
sets = {}
|
sets = {}
|
||||||
|
@ -79,7 +79,7 @@ class MarkovWeightedFTS(fts.FTS):
|
|||||||
if self.partitioner is not None:
|
if self.partitioner is not None:
|
||||||
ordered_sets = self.partitioner.ordered_sets
|
ordered_sets = self.partitioner.ordered_sets
|
||||||
else:
|
else:
|
||||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
ordered_sets = FuzzySet.set_ordered(self.partitioner.sets)
|
||||||
|
|
||||||
data = np.array(ndata)
|
data = np.array(ndata)
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ class MarkovWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l):
|
||||||
|
|
||||||
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
|
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.partitioner.sets, ordered_sets)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
|
||||||
@ -102,7 +102,7 @@ class MarkovWeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
flrg = self.flrgs[actual.name]
|
flrg = self.flrgs[actual.name]
|
||||||
mp = flrg.get_midpoints(self.sets)
|
mp = flrg.get_midpoints(self.partitioner.sets)
|
||||||
|
|
||||||
final = mp.dot(flrg.weights())
|
final = mp.dot(flrg.weights())
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ class WeightedFTS(fts.FTS):
|
|||||||
if self.partitioner is not None:
|
if self.partitioner is not None:
|
||||||
ordered_sets = self.partitioner.ordered_sets
|
ordered_sets = self.partitioner.ordered_sets
|
||||||
else:
|
else:
|
||||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
ordered_sets = FuzzySet.set_ordered(self.partitioner.sets)
|
||||||
|
|
||||||
ndata = np.array(ndata)
|
ndata = np.array(ndata)
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ class WeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l):
|
||||||
|
|
||||||
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
|
actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.partitioner.sets, ordered_sets)
|
||||||
|
|
||||||
if explain:
|
if explain:
|
||||||
print("Fuzzyfication:\n\n {} -> {} \n\n".format(ndata[k], actual.name))
|
print("Fuzzyfication:\n\n {} -> {} \n\n".format(ndata[k], actual.name))
|
||||||
@ -93,9 +93,9 @@ class WeightedFTS(fts.FTS):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
flrg = self.flrgs[actual.name]
|
flrg = self.flrgs[actual.name]
|
||||||
mp = flrg.get_midpoints(self.sets)
|
mp = flrg.get_midpoints(self.partitioner.sets)
|
||||||
|
|
||||||
final = mp.dot(flrg.weights(self.sets))
|
final = mp.dot(flrg.weights(self.partitioner.sets))
|
||||||
|
|
||||||
ret.append(final)
|
ret.append(final)
|
||||||
|
|
||||||
|
@ -31,8 +31,6 @@ class Partitioner(object):
|
|||||||
"""In a multivariate context, the variable that contains this partitioner"""
|
"""In a multivariate context, the variable that contains this partitioner"""
|
||||||
self.type = kwargs.get('type', 'common')
|
self.type = kwargs.get('type', 'common')
|
||||||
"""The type of fuzzy sets that are generated by this partitioner"""
|
"""The type of fuzzy sets that are generated by this partitioner"""
|
||||||
self.extractor = kwargs.get('extractor', lambda x: x)
|
|
||||||
"""Anonymous function used to extract a single primitive type from an object instance"""
|
|
||||||
self.ordered_sets = None
|
self.ordered_sets = None
|
||||||
"""A ordered list of the fuzzy sets names, sorted by their middle point"""
|
"""A ordered list of the fuzzy sets names, sorted by their middle point"""
|
||||||
self.kdtree = None
|
self.kdtree = None
|
||||||
@ -76,6 +74,10 @@ class Partitioner(object):
|
|||||||
|
|
||||||
del(ndata)
|
del(ndata)
|
||||||
|
|
||||||
|
def extractor(self,x):
|
||||||
|
"""Extract a single primitive type from an structured instance"""
|
||||||
|
return x
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
"""
|
"""
|
||||||
Perform the partitioning of the Universe of Discourse
|
Perform the partitioning of the Universe of Discourse
|
||||||
|
@ -7,122 +7,39 @@ import numpy as np
|
|||||||
import os
|
import os
|
||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from pyFTS.nonstationary import flrg, util, honsfts, partitioners
|
from pyFTS.models import pwfts
|
||||||
from pyFTS.models.nonstationary import nsfts
|
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||||
|
|
||||||
bc = Transformations.BoxCox(0)
|
import time
|
||||||
|
|
||||||
import dispy
|
from pyFTS.data import SONDA, Malaysia
|
||||||
import dispy.httpd
|
|
||||||
|
|
||||||
os.chdir("/home/petronio/Dropbox/Doutorado/Codigos/")
|
datasets = {}
|
||||||
|
|
||||||
|
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
||||||
|
|
||||||
def evaluate_individual_model(model, partitioner, train, test, window_size, time_displacement):
|
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
||||||
import numpy as np
|
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
||||||
from pyFTS.partitioners import Grid
|
sonda = sonda.dropna()
|
||||||
from pyFTS.benchmarks import Measures
|
|
||||||
|
|
||||||
try:
|
malaysia = Malaysia.get_dataframe()
|
||||||
model.train(train, sets=partitioner.sets, order=model.order, parameters=window_size)
|
|
||||||
forecasts = model.forecast(test, time_displacement=time_displacement, window_size=window_size)
|
|
||||||
_rmse = Measures.rmse(test[model.order:], forecasts[:-1])
|
|
||||||
_mape = Measures.mape(test[model.order:], forecasts[:-1])
|
|
||||||
_u = Measures.UStatistic(test[model.order:], forecasts[:-1])
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
_rmse = np.nan
|
|
||||||
_mape = np.nan
|
|
||||||
_u = np.nan
|
|
||||||
|
|
||||||
return {'model': model.shortname, 'partitions': partitioner.partitions, 'order': model.order,
|
datasets['SONDA.ws_10m'] = sonda["ws_10m"].values
|
||||||
'rmse': _rmse, 'mape': _mape, 'u': _u}
|
datasets['SONDA.glo_avg'] = sonda["glo_avg"].values
|
||||||
|
datasets['Malaysia.temperature'] = malaysia["temperature"].values
|
||||||
|
datasets['Malaysia.load'] = malaysia["load"].values
|
||||||
|
|
||||||
|
windows = [600000, 600000, 10000, 10000]
|
||||||
|
|
||||||
data = pd.read_csv("DataSets/synthetic_nonstationary_dataset_A.csv", sep=";")
|
cpus = 3
|
||||||
data = np.array(data["0"][:])
|
|
||||||
|
|
||||||
cluster = dispy.JobCluster(evaluate_individual_model, nodes=['192.168.0.108', '192.168.0.110'])
|
for ct, (dataset_name, dataset) in enumerate(datasets.items()):
|
||||||
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
bchmk.train_test_time(dataset, windowsize=windows[ct], train=0.9, inc=.5,
|
||||||
|
methods=[pwfts.ProbabilisticWeightedFTS],
|
||||||
jobs = []
|
order=2,
|
||||||
|
partitions=50,
|
||||||
models = []
|
steps=cpus,
|
||||||
|
num_batches=cpus,
|
||||||
for order in [1, 2, 3]:
|
distributed='dispy', nodes=['192.168.0.110'], #, '192.168.0.107','192.168.0.106'],
|
||||||
if order == 1:
|
file="experiments.db", dataset=dataset_name,
|
||||||
model = nsfts.NonStationaryFTS("")
|
tag="speedup")
|
||||||
else:
|
|
||||||
model = honsfts.HighOrderNonStationaryFTS("")
|
|
||||||
|
|
||||||
model.order = order
|
|
||||||
|
|
||||||
models.append(model)
|
|
||||||
|
|
||||||
for ct, train, test in cUtil.sliding_window(data, 300):
|
|
||||||
for partition in np.arange(5, 100, 1):
|
|
||||||
tmp_partitioner = Grid.GridPartitioner(train, partition)
|
|
||||||
partitioner = partitioners.PolynomialNonStationaryPartitioner(train, tmp_partitioner,
|
|
||||||
window_size=35, degree=1)
|
|
||||||
for model in models:
|
|
||||||
# print(model.shortname, partition, model.order)
|
|
||||||
#job = evaluate_individual_model(model, train, test)
|
|
||||||
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, 35, 240)
|
|
||||||
job.id = ct + model.order*100
|
|
||||||
jobs.append(job)
|
|
||||||
|
|
||||||
results = {}
|
|
||||||
|
|
||||||
for job in jobs:
|
|
||||||
tmp = job()
|
|
||||||
# print(tmp)
|
|
||||||
if job.status == dispy.DispyJob.Finished and tmp is not None:
|
|
||||||
_m = tmp['model']
|
|
||||||
_o = tmp['order']
|
|
||||||
_p = tmp['partitions']
|
|
||||||
if _m not in results:
|
|
||||||
results[_m] = {}
|
|
||||||
if _o not in results[_m]:
|
|
||||||
results[_m][_o] = {}
|
|
||||||
if _p not in results[_m][_o]:
|
|
||||||
results[_m][_o][_p] = {}
|
|
||||||
results[_m][_o][_p]['rmse'] = []
|
|
||||||
results[_m][_o][_p]['mape'] = []
|
|
||||||
results[_m][_o][_p]['u'] = []
|
|
||||||
|
|
||||||
results[_m][_o][_p]['rmse'].append_rhs(tmp['rmse'])
|
|
||||||
results[_m][_o][_p]['mape'].append_rhs(tmp['mape'])
|
|
||||||
results[_m][_o][_p]['u'].append_rhs(tmp['u'])
|
|
||||||
|
|
||||||
cluster.wait() # wait for all jobs to finish
|
|
||||||
|
|
||||||
cluster.print_status()
|
|
||||||
|
|
||||||
http_server.shutdown() # this waits until browser gets all updates
|
|
||||||
cluster.close()
|
|
||||||
|
|
||||||
dados = []
|
|
||||||
ncolunas = None
|
|
||||||
|
|
||||||
for m in sorted(results.keys()):
|
|
||||||
for o in sorted(results[m].keys()):
|
|
||||||
for p in sorted(results[m][o].keys()):
|
|
||||||
for r in ['rmse', 'mape', 'u']:
|
|
||||||
tmp = []
|
|
||||||
tmp.append(m)
|
|
||||||
tmp.append(o)
|
|
||||||
tmp.append(p)
|
|
||||||
tmp.append(r)
|
|
||||||
tmp.extend(results[m][o][p][r])
|
|
||||||
|
|
||||||
dados.append(tmp)
|
|
||||||
|
|
||||||
if ncolunas is None:
|
|
||||||
ncolunas = len(results[m][o][p][r])
|
|
||||||
|
|
||||||
colunas = ["model", "order", "partitions", "metric"]
|
|
||||||
for k in np.arange(0, ncolunas):
|
|
||||||
colunas.append(str(k))
|
|
||||||
|
|
||||||
dat = pd.DataFrame(dados, columns=colunas)
|
|
||||||
dat.to_csv("experiments/nsfts_partitioning_A.csv", sep=";")
|
|
||||||
|
@ -46,24 +46,71 @@ Util.plot_distribution2(distributions, test[:10], start_at=model.order, ax=ax, c
|
|||||||
print("")
|
print("")
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from pyFTS.data import SONDA, Malaysia
|
||||||
|
|
||||||
|
|
||||||
|
def sample_by_hour(data):
|
||||||
|
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
|
|
||||||
datasets = {}
|
datasets = {}
|
||||||
|
|
||||||
datasets['TAIEX'] = TAIEX.get_data()[:5000]
|
|
||||||
datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
|
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
||||||
datasets['SP500'] = SP500.get_data()[10000:15000]
|
|
||||||
|
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
||||||
|
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
||||||
|
sonda = sonda.dropna()
|
||||||
|
|
||||||
|
|
||||||
|
malaysia = Malaysia.get_dataframe()
|
||||||
|
|
||||||
|
datasets['SONDA.ws_10m'] = sample_by_hour(sonda["ws_10m"].values)
|
||||||
|
datasets['SONDA.glo_avg'] = sample_by_hour(sonda["glo_avg"].values)
|
||||||
|
datasets['Malaysia.temperature'] = malaysia["temperature"].values
|
||||||
|
datasets['Malaysia.load'] = malaysia["load"].values
|
||||||
|
|
||||||
|
|
||||||
#'''
|
#'''
|
||||||
for dataset_name, dataset in datasets.items():
|
for dataset_name, dataset in datasets.items():
|
||||||
bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2,
|
bchmk.sliding_window_benchmarks2(dataset, 10000, train=0.9, inc=0.25,
|
||||||
methods=[pwfts.ProbabilisticWeightedFTS],
|
methods=[hofts.HighOrderFTS, hofts.WeightedHighOrderFTS, pwfts.ProbabilisticWeightedFTS],
|
||||||
benchmark_models=False,
|
benchmark_models=False,
|
||||||
transformations=[None],
|
transformations=[None],
|
||||||
orders=[1, 2, 3],
|
orders=[2],
|
||||||
partitions=np.arange(10, 100, 5),
|
partitions=[50],
|
||||||
progress=False, type='point',
|
progress=False, type='point',
|
||||||
distributed=True, nodes=['192.168.254.113'],
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
file="experiments.db", dataset=dataset_name,
|
file="experiments.db", dataset=dataset_name,
|
||||||
tag="gridsearch")
|
tag="experiments")
|
||||||
|
|
||||||
|
for dataset_name, dataset in datasets.items():
|
||||||
|
bchmk.sliding_window_benchmarks2(dataset, 10000, train=0.9, inc=0.25,
|
||||||
|
methods=[ensemble.SimpleEnsembleFTS, ifts.IntervalFTS,
|
||||||
|
ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS],
|
||||||
|
methods_parameters=[{'partitions': [45, 50, 55], 'alpha':.05},
|
||||||
|
{},{},{}],
|
||||||
|
benchmark_models=False,
|
||||||
|
transformations=[None],
|
||||||
|
orders=[2],
|
||||||
|
partitions=[50],
|
||||||
|
progress=False, type='interval',
|
||||||
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
|
file="experiments.db", dataset=dataset_name,
|
||||||
|
tag="experiments")
|
||||||
|
|
||||||
|
for dataset_name, dataset in datasets.items():
|
||||||
|
bchmk.sliding_window_benchmarks2(dataset, 10000, train=0.9, inc=0.25,
|
||||||
|
methods=[ensemble.SimpleEnsembleFTS, pwfts.ProbabilisticWeightedFTS],
|
||||||
|
methods_parameters=[{'partitions':[45,50,55]}, {}],
|
||||||
|
benchmark_models=False,
|
||||||
|
transformations=[None],
|
||||||
|
orders=[2],
|
||||||
|
partitions=[50],
|
||||||
|
progress=False, type='distribution',
|
||||||
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
|
file="experiments.db", dataset=dataset_name,
|
||||||
|
tag="experiments")
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -3,18 +3,21 @@ import pandas as pd
|
|||||||
from pyFTS.hyperparam import GridSearch, Evolutionary
|
from pyFTS.hyperparam import GridSearch, Evolutionary
|
||||||
from pyFTS.models import pwfts
|
from pyFTS.models import pwfts
|
||||||
|
|
||||||
|
|
||||||
def get_dataset():
|
def get_dataset():
|
||||||
from pyFTS.data import SONDA
|
from pyFTS.data import SONDA
|
||||||
#from pyFTS.data import Malaysia
|
#from pyFTS.data import Malaysia
|
||||||
|
|
||||||
#data = SONDA.get_data('temperature')[:3000]
|
data = [k for k in SONDA.get_data('ws_10m') if k > 0.1 and k != np.nan and k is not None]
|
||||||
data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';')
|
data = [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
#data = Malaysia.get_data('temperature')[:1000]
|
#data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';')
|
||||||
|
#data = Malaysia.get_data('temperature')
|
||||||
|
|
||||||
return 'SONDA.glo_avg', data['glo_avg'].values #train, test
|
return 'SONDA.ws_10m', data
|
||||||
|
#return 'Malaysia.temperature', data #train, test
|
||||||
#return 'Malaysia.temperature', data # train, test
|
#return 'Malaysia.temperature', data # train, test
|
||||||
|
|
||||||
|
'''
|
||||||
hyperparams = {
|
hyperparams = {
|
||||||
'order':[3],
|
'order':[3],
|
||||||
'partitions': np.arange(10,100,3),
|
'partitions': np.arange(10,100,3),
|
||||||
@ -24,7 +27,6 @@ hyperparams = {
|
|||||||
'alpha': np.arange(.0, .5, .05)
|
'alpha': np.arange(.0, .5, .05)
|
||||||
}
|
}
|
||||||
|
|
||||||
'''
|
|
||||||
hyperparams = {
|
hyperparams = {
|
||||||
'order':[3], #[1, 2],
|
'order':[3], #[1, 2],
|
||||||
'partitions': np.arange(10,100,10),
|
'partitions': np.arange(10,100,10),
|
||||||
@ -43,11 +45,11 @@ datsetname, dataset = get_dataset()
|
|||||||
|
|
||||||
ret = Evolutionary.execute(datsetname, dataset,
|
ret = Evolutionary.execute(datsetname, dataset,
|
||||||
ngen=30, npop=20,psel=0.6, pcross=.5, pmut=.3,
|
ngen=30, npop=20,psel=0.6, pcross=.5, pmut=.3,
|
||||||
window_size=10000, train_rate=.9, increment_rate=1,
|
window_size=10000, train_rate=.9, increment_rate=.3,
|
||||||
experiments=1,
|
experiments=2,
|
||||||
fts_method=pwfts.ProbabilisticWeightedFTS,
|
fts_method=pwfts.ProbabilisticWeightedFTS,
|
||||||
database_file='experiments.db',
|
database_file='experiments.db',
|
||||||
distributed=False, nodes=nodes)
|
distributed='dispy', nodes=nodes)
|
||||||
|
|
||||||
#res = GridSearch.cluster_method({'mf':1, 'partitioner': 1, 'npart': 10, 'lags':[1], 'alpha': 0.0, 'order': 1},
|
#res = GridSearch.cluster_method({'mf':1, 'partitioner': 1, 'npart': 10, 'lags':[1], 'alpha': 0.0, 'order': 1},
|
||||||
# dataset, window_size = 10000, train_rate = .9, increment_rate = 1)
|
# dataset, window_size = 10000, train_rate = .9, increment_rate = 1)
|
||||||
|
Loading…
Reference in New Issue
Block a user