Spark Distributed; Hyperparameter optimization
This commit is contained in:
parent
87686e5ff0
commit
2e1d7fa11a
178
docs/build/html/_modules/pyFTS/partitioners/Simple.html
vendored
Normal file
178
docs/build/html/_modules/pyFTS/partitioners/Simple.html
vendored
Normal file
@ -0,0 +1,178 @@
|
||||
|
||||
|
||||
<!doctype html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><script type="text/javascript">
|
||||
|
||||
var _gaq = _gaq || [];
|
||||
_gaq.push(['_setAccount', 'UA-55120145-3']);
|
||||
_gaq.push(['_trackPageview']);
|
||||
|
||||
(function() {
|
||||
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||
})();
|
||||
</script>
|
||||
<title>pyFTS.partitioners.Simple — pyFTS 1.4 documentation</title>
|
||||
<link rel="stylesheet" href="../../../_static/bizstyle.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||
<script type="text/javascript" src="../../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
|
||||
<script type="text/javascript" src="../../../_static/bizstyle.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1.0">
|
||||
<!--[if lt IE 9]>
|
||||
<script type="text/javascript" src="_static/css3-mediaqueries.js"></script>
|
||||
<![endif]-->
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">pyFTS 1.4 documentation</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/logo_heading2.png" alt="Logo"/>
|
||||
</a></p>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3>Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" />
|
||||
<input type="submit" value="Go" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script type="text/javascript">$('#searchbox').show(0);</script>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for pyFTS.partitioners.Simple</h1><div class="highlight"><pre>
|
||||
<span></span><span class="sd">"""Simple Partitioner for manually informed fuzzy sets"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">math</span>
|
||||
<span class="kn">import</span> <span class="nn">random</span> <span class="k">as</span> <span class="nn">rnd</span>
|
||||
<span class="kn">import</span> <span class="nn">functools</span><span class="o">,</span> <span class="nn">operator</span>
|
||||
<span class="kn">from</span> <span class="nn">pyFTS.common</span> <span class="k">import</span> <span class="n">FuzzySet</span><span class="p">,</span> <span class="n">Membership</span>
|
||||
<span class="kn">from</span> <span class="nn">pyFTS.partitioners</span> <span class="k">import</span> <span class="n">partitioner</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="SimplePartitioner"><a class="viewcode-back" href="../../../pyFTS.partitioners.html#pyFTS.partitioners.Simple.SimplePartitioner">[docs]</a><span class="k">class</span> <span class="nc">SimplePartitioner</span><span class="p">(</span><span class="n">partitioner</span><span class="o">.</span><span class="n">Partitioner</span><span class="p">):</span>
|
||||
<span class="sd">"""Simple Partitioner for manually informed fuzzy sets"""</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Simple Partitioner - the fuzzy sets are informed manually</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">kwargs</span><span class="p">[</span><span class="s1">'preprocess'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">SimplePartitioner</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">"Simple"</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">partitions</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
|
||||
<div class="viewcode-block" id="SimplePartitioner.append"><a class="viewcode-back" href="../../../pyFTS.partitioners.html#pyFTS.partitioners.Simple.SimplePartitioner.append">[docs]</a> <span class="k">def</span> <span class="nf">append</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">mf</span><span class="p">,</span> <span class="n">parameters</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Append a new partition (fuzzy set) to the partitioner</span>
|
||||
|
||||
<span class="sd"> :param name: Fuzzy set name</span>
|
||||
<span class="sd"> :param mf: One of the pyFTS.common.Membership functions</span>
|
||||
<span class="sd"> :param parameters: A list with the parameters for the membership function</span>
|
||||
<span class="sd"> :param kwargs: Optional arguments for the fuzzy set</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The name of the fuzzy set cannot be empty"</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"This name has already been used"</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">mf</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">mf</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="n">Membership</span><span class="o">.</span><span class="n">trimf</span><span class="p">,</span> <span class="n">Membership</span><span class="o">.</span><span class="n">gaussmf</span><span class="p">,</span>
|
||||
<span class="n">Membership</span><span class="o">.</span><span class="n">trapmf</span><span class="p">,</span> <span class="n">Membership</span><span class="o">.</span><span class="n">singleton</span><span class="p">,</span>
|
||||
<span class="n">Membership</span><span class="o">.</span><span class="n">sigmf</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The mf parameter should be one of pyFTS.common.Membership functions"</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">trimf</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.trimf"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">gaussmf</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.gaussmf"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">trapmf</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">4</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.trapmf"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">centroid</span> <span class="o">=</span> <span class="p">(</span><span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">parameters</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="mi">2</span>
|
||||
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">singleton</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.singleton"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">sigmf</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.sigmf"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="p">(</span><span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">/</span> <span class="p">(</span><span class="mi">2</span> <span class="o">*</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">FuzzySet</span><span class="o">.</span><span class="n">FuzzySet</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">mf</span><span class="p">,</span> <span class="n">parameters</span><span class="p">,</span> <span class="n">centroid</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">partitions</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">k</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">.</span><span class="n">centroid</span><span class="p">)]</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">min</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">lower</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]</span><span class="o">.</span><span class="n">upper</span></div></div>
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
>index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">pyFTS 1.4 documentation</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright 2018, Machine Intelligence and Data Science Laboratory - UFMG - Brazil.
|
||||
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.7.2.
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -26,6 +26,10 @@ class FuzzySet(FuzzySet.FuzzySet):
|
||||
self.mf = []
|
||||
self.parameters = []
|
||||
|
||||
self.lower = None
|
||||
self.upper = None
|
||||
self.centroid = None
|
||||
|
||||
|
||||
def membership(self, x):
|
||||
"""
|
||||
@ -61,4 +65,14 @@ class FuzzySet(FuzzySet.FuzzySet):
|
||||
:param parameters:
|
||||
:return:
|
||||
"""
|
||||
self.sets.append(set)
|
||||
self.sets.append(set)
|
||||
|
||||
if self.lower is None or self.lower > set.lower:
|
||||
self.lower = set.lower
|
||||
|
||||
if self.upper is None or self.upper < set.upper:
|
||||
self.upper = set.upper
|
||||
|
||||
if self.centroid is None or self.centroid < set.centroid:
|
||||
self.centroid = set.centroid
|
||||
|
||||
|
@ -125,6 +125,7 @@ def fuzzyfy(data, partitioner, **kwargs):
|
||||
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
|
||||
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
|
||||
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
|
||||
|
||||
:returns a list with the fuzzyfied values, depending on the mode
|
||||
"""
|
||||
alpha_cut = kwargs.get('alpha_cut', 0.)
|
||||
|
@ -74,7 +74,7 @@ def sigmf(x, parameters):
|
||||
|
||||
:param x:
|
||||
:param parameters: an list with 2 real values (smoothness and midpoint)
|
||||
:return:
|
||||
:return
|
||||
"""
|
||||
return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1])))
|
||||
|
||||
|
@ -38,6 +38,9 @@ class FTS(object):
|
||||
"""A boolean value indicating if the model support probabilistic forecasting, default: False"""
|
||||
self.is_multivariate = False
|
||||
"""A boolean value indicating if the model support multivariate time series (Pandas DataFrame), default: False"""
|
||||
self.is_clustered = False
|
||||
"""A boolean value indicating if the model support multivariate time series (Pandas DataFrame), but works like
|
||||
a monovariate method, default: False"""
|
||||
self.dump = False
|
||||
self.transformations = []
|
||||
"""A list with the data transformations (common.Transformations) applied on model pre and post processing, default: []"""
|
||||
@ -61,6 +64,8 @@ class FTS(object):
|
||||
"""Flag indicating if the test data will be clipped inside the training Universe of Discourse"""
|
||||
self.alpha_cut = kwargs.get("alpha_cut", 0.0)
|
||||
"""A float with the minimal membership to be considered on fuzzyfication process"""
|
||||
self.lags = kwargs.get("lags", None)
|
||||
"""The list of lag indexes for high order models"""
|
||||
self.max_lag = self.order
|
||||
"""A integer indicating the largest lag used by the model. This value also indicates the minimum number of past lags
|
||||
needed to forecast a single step ahead"""
|
||||
|
@ -3,6 +3,7 @@ import pandas as pd
|
||||
|
||||
from pyFTS.data import Enrollments, TAIEX
|
||||
from pyFTS.partitioners import Grid, Simple
|
||||
from pyFTS.models.multivariate import partitioner as mv_partitioner
|
||||
from pyFTS.models import hofts
|
||||
|
||||
from pyspark import SparkConf
|
||||
@ -10,44 +11,141 @@ from pyspark import SparkContext
|
||||
|
||||
import os
|
||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||
SPARK_ADDR = 'spark://192.168.0.110:7077'
|
||||
|
||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||
|
||||
|
||||
|
||||
def get_partitioner(shared_partitioner):
|
||||
def get_partitioner(shared_partitioner, type='common', variables=[]):
|
||||
"""
|
||||
|
||||
:param part:
|
||||
:return:
|
||||
"""
|
||||
fs_tmp = Simple.SimplePartitioner()
|
||||
if type=='common':
|
||||
fs_tmp = Simple.SimplePartitioner()
|
||||
|
||||
for fset in shared_partitioner.value.keys():
|
||||
fz = shared_partitioner.value[fset]
|
||||
fs_tmp.append(fset, fz.mf, fz.parameters)
|
||||
if type=='common':
|
||||
fs_tmp.append_complex(fz)
|
||||
elif type == 'multivariate':
|
||||
fs_tmp.append(fz)
|
||||
|
||||
return fs_tmp
|
||||
|
||||
|
||||
def slave_train(data, shared_method, shared_partitioner, shared_order):
|
||||
def get_clustered_partitioner(explanatory_variables, target_variable, **parameters):
|
||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet
|
||||
fs_tmp = mv_partitioner.MultivariatePartitioner(explanatory_variables=explanatory_variables,
|
||||
target_variable=target_variable)
|
||||
for tmp in parameters['partitioner_names'].value:
|
||||
fs = MultivariateFuzzySet(target_variable=target_variable)
|
||||
for var, fset in parameters['partitioner_{}'.format(tmp)].value:
|
||||
fs.append_set(var, fset)
|
||||
fs_tmp.append(fs)
|
||||
|
||||
fs_tmp.build_index()
|
||||
|
||||
return fs_tmp
|
||||
|
||||
|
||||
def get_variables(**parameters):
|
||||
explanatory_variables = []
|
||||
target_variable = None
|
||||
for name in parameters['variables'].value:
|
||||
from pyFTS.models.multivariate import common, variable
|
||||
var = variable.Variable(name,
|
||||
type=parameters['{}_type'.format(name)].value,
|
||||
data_label=parameters['{}_label'.format(name)].value,
|
||||
alpha_cut=parameters['{}_alpha'.format(name)].value,
|
||||
#data_type=parameters['{}_data_type'.format(name)].value,
|
||||
#mask=parameters['{}_mask'.format(name)].value,
|
||||
)
|
||||
var.partitioner = get_partitioner(parameters['{}_partitioner'.format(name)])
|
||||
var.partitioner.type = parameters['{}_partitioner_type'.format(name)].value
|
||||
|
||||
explanatory_variables.append(var)
|
||||
|
||||
if var.name == parameters['target'].value:
|
||||
target_variable = var
|
||||
|
||||
return (explanatory_variables, target_variable)
|
||||
|
||||
|
||||
def slave_train_univariate(data, **parameters):
|
||||
"""
|
||||
|
||||
:param data:
|
||||
:return:
|
||||
"""
|
||||
|
||||
model = shared_method.value(partitioner=get_partitioner(shared_partitioner),
|
||||
order=shared_order.value)
|
||||
if parameters['type'].value == 'common':
|
||||
|
||||
ndata = [k for k in data]
|
||||
if parameters['order'].value > 1:
|
||||
model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
|
||||
order=parameters['order'].value, alpha_cut=parameters['alpha_cut'].value,
|
||||
lags=parameters['lags'].value)
|
||||
else:
|
||||
model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
|
||||
alpha_cut=parameters['alpha_cut'].value)
|
||||
|
||||
ndata = [k for k in data]
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
model.train(ndata)
|
||||
|
||||
return [(k, model.flrgs[k]) for k in model.flrgs]
|
||||
return [(k, model.flrgs[k]) for k in model.flrgs.keys()]
|
||||
|
||||
|
||||
def distributed_train(model, data, url='spark://192.168.0.110:7077', app='pyFTS'):
|
||||
def slave_train_multivariate(data, **parameters):
|
||||
explanatory_variables, target_variable = get_variables(**parameters)
|
||||
#vars = [(v.name, v.name) for v in explanatory_variables]
|
||||
|
||||
#return [('vars', vars), ('target',[target_variable.name])]
|
||||
|
||||
if parameters['type'].value == 'clustered':
|
||||
fs = get_clustered_partitioner(explanatory_variables, target_variable, **parameters)
|
||||
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||
target_variable=target_variable,
|
||||
partitioner=fs,
|
||||
order=parameters['order'].value,
|
||||
alpha_cut=parameters['alpha_cut'].value,
|
||||
lags=parameters['lags'].value)
|
||||
else:
|
||||
|
||||
if parameters['order'].value > 1:
|
||||
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||
target_variable=target_variable,
|
||||
order=parameters['order'].value,
|
||||
alpha_cut=parameters['alpha_cut'].value,
|
||||
lags=parameters['lags'].value)
|
||||
else:
|
||||
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||
target_variable=target_variable,
|
||||
alpha_cut=parameters['alpha_cut'].value)
|
||||
|
||||
|
||||
|
||||
rows = [k for k in data]
|
||||
ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value)
|
||||
|
||||
model.train(ndata)
|
||||
|
||||
if parameters['type'].value == 'clustered':
|
||||
counts = [(fset, count) for fset,count in model.partitioner.count.items()]
|
||||
flrgs = [(k, v) for k,v in model.flrgs.items()]
|
||||
|
||||
return [('counts', counts), ('flrgs', flrgs)]
|
||||
else:
|
||||
return [(k, v) for k,v in model.flrgs.items()]
|
||||
|
||||
|
||||
def distributed_train(model, data, url=SPARK_ADDR, app='pyFTS'):
|
||||
"""
|
||||
|
||||
|
||||
@ -61,22 +159,92 @@ def distributed_train(model, data, url='spark://192.168.0.110:7077', app='pyFTS'
|
||||
conf = SparkConf()
|
||||
conf.setMaster(url)
|
||||
conf.setAppName(app)
|
||||
conf.set("spark.executor.memory", "2g")
|
||||
conf.set("spark.driver.memory", "2g")
|
||||
conf.set("spark.memory.offHeap.enabled",True)
|
||||
conf.set("spark.memory.offHeap.size","16g")
|
||||
parameters = {}
|
||||
|
||||
with SparkContext(conf=conf) as context:
|
||||
shared_partitioner = context.broadcast(model.partitioner.sets)
|
||||
shared_order = context.broadcast(model.order)
|
||||
shared_method = context.broadcast(type(model))
|
||||
|
||||
func = lambda x: slave_train(x, shared_method, shared_partitioner, shared_order)
|
||||
nodes = context.defaultParallelism
|
||||
|
||||
flrgs = context.parallelize(data).mapPartitions(func)
|
||||
if not model.is_multivariate:
|
||||
parameters['type'] = context.broadcast('common')
|
||||
parameters['partitioner'] = context.broadcast(model.partitioner.sets)
|
||||
parameters['alpha_cut'] = context.broadcast(model.alpha_cut)
|
||||
parameters['order'] = context.broadcast(model.order)
|
||||
parameters['method'] = context.broadcast(type(model))
|
||||
parameters['lags'] = context.broadcast(model.lags)
|
||||
parameters['max_lag'] = context.broadcast(model.max_lag)
|
||||
|
||||
for k in flrgs.collect():
|
||||
model.append_rule(k[1])
|
||||
func = lambda x: slave_train_univariate(x, **parameters)
|
||||
|
||||
return model
|
||||
flrgs = context.parallelize(data).repartition(nodes*2).mapPartitions(func)
|
||||
|
||||
for k in flrgs.collect():
|
||||
model.append_rule(k[1])
|
||||
|
||||
return model
|
||||
else:
|
||||
if model.is_clustered:
|
||||
parameters['type'] = context.broadcast('clustered')
|
||||
names = []
|
||||
for name, fset in model.partitioner.sets.items():
|
||||
names.append(name)
|
||||
parameters['partitioner_{}'.format(name)] = context.broadcast([(k,v) for k,v in fset.sets.items()])
|
||||
|
||||
parameters['partitioner_names'] = context.broadcast(names)
|
||||
|
||||
else:
|
||||
parameters['type'] = context.broadcast('multivariate')
|
||||
names = []
|
||||
for var in model.explanatory_variables:
|
||||
#if var.data_type is None:
|
||||
# raise Exception("It is mandatory to inform the data_type parameter for each variable when the training is distributed! ")
|
||||
names.append(var.name)
|
||||
parameters['{}_type'.format(var.name)] = context.broadcast(var.type)
|
||||
#parameters['{}_data_type'.format(var.name)] = context.broadcast(var.data_type)
|
||||
#parameters['{}_mask'.format(var.name)] = context.broadcast(var.mask)
|
||||
parameters['{}_label'.format(var.name)] = context.broadcast(var.data_label)
|
||||
parameters['{}_alpha'.format(var.name)] = context.broadcast(var.alpha_cut)
|
||||
parameters['{}_partitioner'.format(var.name)] = context.broadcast(var.partitioner.sets)
|
||||
parameters['{}_partitioner_type'.format(var.name)] = context.broadcast(var.partitioner.type)
|
||||
|
||||
parameters['variables'] = context.broadcast(names)
|
||||
parameters['target'] = context.broadcast(model.target_variable.name)
|
||||
|
||||
parameters['columns'] = context.broadcast(data.columns.values)
|
||||
|
||||
data = data.to_dict(orient='records')
|
||||
|
||||
parameters['alpha_cut'] = context.broadcast(model.alpha_cut)
|
||||
parameters['order'] = context.broadcast(model.order)
|
||||
parameters['method'] = context.broadcast(type(model))
|
||||
parameters['lags'] = context.broadcast(model.lags)
|
||||
parameters['max_lag'] = context.broadcast(model.max_lag)
|
||||
|
||||
func = lambda x: slave_train_multivariate(x, **parameters)
|
||||
|
||||
flrgs = context.parallelize(data).mapPartitions(func)
|
||||
|
||||
for k in flrgs.collect():
|
||||
print(k)
|
||||
#for g in k:
|
||||
# print(g)
|
||||
|
||||
#return
|
||||
if parameters['type'].value == 'clustered':
|
||||
if k[0] == 'counts':
|
||||
for fset, count in k[1]:
|
||||
model.partitioner.count[fset] = count
|
||||
elif k[0] == 'flrgs':
|
||||
model.append_rule(k[1])
|
||||
else:
|
||||
model.append_rule(k[1])
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
||||
def distributed_predict(data, model, url='spark://192.168.0.110:7077', app='pyFTS'):
|
||||
def distributed_predict(data, model, url=SPARK_ADDR, app='pyFTS'):
|
||||
return None
|
||||
|
@ -15,15 +15,32 @@ from pyFTS.common import Membership
|
||||
from pyFTS.hyperparam import Util as hUtil
|
||||
|
||||
|
||||
# Gera indivíduos após operadores
|
||||
#
|
||||
def genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse):
|
||||
ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order, alpha=alpha, lags=lags, len_lags=len_lags,
|
||||
rmse=rmse)
|
||||
'''
|
||||
Create the individual genotype
|
||||
|
||||
:param mf: membership function
|
||||
:param npart: number of partitions
|
||||
:param partitioner: partitioner method
|
||||
:param order: model order
|
||||
:param alpha: alpha-cut
|
||||
:param lags: array with lag indexes
|
||||
:param len_lags: parsimony fitness value
|
||||
:param rmse: accuracy fitness value
|
||||
:return: the genotype, a dictionary with all hyperparameters
|
||||
'''
|
||||
ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order,
|
||||
alpha=alpha, lags=lags, len_lags=len_lags, rmse=rmse)
|
||||
return ind
|
||||
|
||||
|
||||
# Gera indivíduos
|
||||
def random_genotype():
|
||||
'''
|
||||
Create random genotype
|
||||
|
||||
:return: the genotype, a dictionary with all hyperparameters
|
||||
'''
|
||||
order = random.randint(1, 3)
|
||||
return genotype(
|
||||
random.randint(1, 4),
|
||||
@ -32,21 +49,34 @@ def random_genotype():
|
||||
order,
|
||||
random.uniform(0, .5),
|
||||
sorted(random.sample(range(1, 50), order)),
|
||||
[],
|
||||
[]
|
||||
None,
|
||||
None
|
||||
)
|
||||
|
||||
|
||||
# Gera uma população de tamanho n
|
||||
#
|
||||
def initial_population(n):
|
||||
'''
|
||||
Create a random population of size n
|
||||
|
||||
:param n: the size of the population
|
||||
:return: a list with n random individuals
|
||||
'''
|
||||
pop = []
|
||||
for i in range(n):
|
||||
pop.append(random_genotype())
|
||||
return pop
|
||||
|
||||
|
||||
# Função de avaliação
|
||||
def phenotype(individual, train):
|
||||
def phenotype(individual, train, parameters={}):
|
||||
'''
|
||||
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
||||
|
||||
:param individual: a genotype
|
||||
:param train: the training dataset
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: a fitted FTS model
|
||||
'''
|
||||
try:
|
||||
if individual['mf'] == 1:
|
||||
mf = Membership.trimf
|
||||
@ -67,28 +97,48 @@ def phenotype(individual, train):
|
||||
alpha_cut=individual['alpha'],
|
||||
order=individual['order'])
|
||||
|
||||
model.fit(train)
|
||||
model.fit(train, **parameters)
|
||||
|
||||
return model
|
||||
|
||||
except Exception as ex:
|
||||
print("EXCEPTION!", str(ex), str(individual))
|
||||
print("PHENOTYPE EXCEPTION!", str(ex), str(individual))
|
||||
return None
|
||||
|
||||
|
||||
def evaluation1(dataset, individual):
|
||||
def evaluate(dataset, individual, **kwargs):
|
||||
'''
|
||||
Evaluate an individual using a sliding window cross validation over the dataset.
|
||||
|
||||
:param dataset: Evaluation dataset
|
||||
:param individual: genotype to be tested
|
||||
:param window_size: The length of scrolling window for train/test on dataset
|
||||
:param train_rate: The train/test split ([0,1])
|
||||
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
||||
'''
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.benchmarks import Measures
|
||||
|
||||
window_size = kwargs.get('window_size', 800)
|
||||
train_rate = kwargs.get('train_rate', .8)
|
||||
increment_rate = kwargs.get('increment_rate', .2)
|
||||
parameters = kwargs.get('parameters',{})
|
||||
|
||||
if individual['rmse'] is not None and individual['len_lags'] is not None:
|
||||
return individual['len_lags'], individual['rmse']
|
||||
|
||||
try:
|
||||
results = []
|
||||
lengths = []
|
||||
|
||||
for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25):
|
||||
model = phenotype(individual, train)
|
||||
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
||||
|
||||
model = phenotype(individual, train, parameters=parameters)
|
||||
|
||||
if model is None:
|
||||
return (None)
|
||||
raise Exception("Phenotype returned None")
|
||||
|
||||
rmse, _, _ = Measures.get_point_statistics(test, model)
|
||||
lengths.append(len(model))
|
||||
@ -100,36 +150,59 @@ def evaluation1(dataset, individual):
|
||||
rmse = np.nansum([.6 * np.nanmean(results), .4 * np.nanstd(results)])
|
||||
len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])
|
||||
|
||||
#print("EVALUATION {}".format(individual))
|
||||
return len_lags, rmse
|
||||
|
||||
except Exception as ex:
|
||||
print("EXCEPTION!", str(ex), str(individual))
|
||||
return np.inf
|
||||
print("EVALUATION EXCEPTION!", str(ex), str(individual))
|
||||
return np.inf, np.inf
|
||||
|
||||
|
||||
def tournament(population, objective):
|
||||
'''
|
||||
Simple tournament selection strategy.
|
||||
|
||||
:param population: the population
|
||||
:param objective: the objective to be considered on tournament
|
||||
:return:
|
||||
'''
|
||||
n = len(population) - 1
|
||||
|
||||
r1 = random.randint(0, n) if n > 2 else 0
|
||||
r2 = random.randint(0, n) if n > 2 else 1
|
||||
ix = r1 if population[r1][objective] < population[r2][objective] else r2
|
||||
return population[ix]
|
||||
try:
|
||||
r1 = random.randint(0, n) if n > 2 else 0
|
||||
r2 = random.randint(0, n) if n > 2 else 1
|
||||
ix = r1 if population[r1][objective] < population[r2][objective] else r2
|
||||
return population[ix]
|
||||
except Exception as ex:
|
||||
print(r1, population[r1])
|
||||
print(r2, population[r2])
|
||||
raise ex
|
||||
|
||||
|
||||
def selection1(population):
|
||||
pais = []
|
||||
prob = .8
|
||||
def double_tournament(population):
|
||||
'''
|
||||
Double tournament selection strategy.
|
||||
|
||||
# for i in range(len(population)):
|
||||
pai1 = tournament(population, 'rmse')
|
||||
pai2 = tournament(population, 'rmse')
|
||||
:param population:
|
||||
:return:
|
||||
'''
|
||||
|
||||
finalista = tournament([pai1, pai2], 'len_lags')
|
||||
ancestor1 = tournament(population, 'rmse')
|
||||
ancestor2 = tournament(population, 'rmse')
|
||||
|
||||
return finalista
|
||||
selected = tournament([ancestor1, ancestor2], 'len_lags')
|
||||
|
||||
return selected
|
||||
|
||||
|
||||
def lag_crossover2(best, worst):
|
||||
'''
|
||||
Cross over two lag genes
|
||||
|
||||
:param best: best genotype
|
||||
:param worst: worst genotype
|
||||
:return: a tuple (order, lags)
|
||||
'''
|
||||
order = int(round(.7 * best['order'] + .3 * worst['order']))
|
||||
lags = []
|
||||
|
||||
@ -151,15 +224,26 @@ def lag_crossover2(best, worst):
|
||||
|
||||
|
||||
# Cruzamento
|
||||
def crossover(pais):
|
||||
def crossover(parents):
|
||||
'''
|
||||
Crossover operation between two parents
|
||||
|
||||
:param parents: a list with two genotypes
|
||||
:return: a genotype
|
||||
'''
|
||||
import random
|
||||
|
||||
if pais[0]['rmse'] < pais[1]['rmse']:
|
||||
best = pais[0]
|
||||
worst = pais[1]
|
||||
n = len(parents) - 1
|
||||
|
||||
r1 = random.randint(0, n)
|
||||
r2 = random.randint(0, n)
|
||||
|
||||
if parents[r1]['rmse'] < parents[r2]['rmse']:
|
||||
best = parents[r1]
|
||||
worst = parents[r2]
|
||||
else:
|
||||
best = pais[1]
|
||||
worst = pais[0]
|
||||
best = parents[r2]
|
||||
worst = parents[r1]
|
||||
|
||||
npart = int(round(.7 * best['npart'] + .3 * worst['npart']))
|
||||
alpha = float(.7 * best['alpha'] + .3 * worst['alpha'])
|
||||
@ -172,119 +256,197 @@ def crossover(pais):
|
||||
|
||||
order, lags = lag_crossover2(best, worst)
|
||||
|
||||
rmse = []
|
||||
len_lags = []
|
||||
descendent = genotype(mf, npart, partitioner, order, alpha, lags, None, None)
|
||||
|
||||
filho = genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse)
|
||||
return descendent
|
||||
|
||||
return filho
|
||||
|
||||
|
||||
# Mutação | p é a probabilidade de mutação
|
||||
|
||||
def mutation_lags(lags, order):
|
||||
new = sorted(random.sample(range(1, 50), order))
|
||||
for lag in np.arange(len(lags) - 1):
|
||||
new[lag] = min(50, max(1, int(lags[lag] + np.random.normal(0, 0.5))))
|
||||
'''
|
||||
Mutation operation for lags gene
|
||||
|
||||
if order > 1:
|
||||
for k in np.arange(1, order):
|
||||
while new[k] <= new[k - 1]:
|
||||
new[k] = int(new[k] + np.random.randint(1, 5))
|
||||
:param lags:
|
||||
:param order:
|
||||
:return:
|
||||
'''
|
||||
try:
|
||||
l = len(lags)
|
||||
new = []
|
||||
for lag in np.arange(order):
|
||||
if lag < l:
|
||||
new.append( min(50, max(1, int(lags[lag] + np.random.randint(-5, 5)))) )
|
||||
else:
|
||||
new.append( new[-1] + np.random.randint(1, 5) )
|
||||
|
||||
return new
|
||||
if order > 1:
|
||||
for k in np.arange(1, order):
|
||||
while new[k] <= new[k - 1]:
|
||||
new[k] = int(new[k] + np.random.randint(1, 5))
|
||||
|
||||
return new
|
||||
except Exception as ex:
|
||||
print(lags, order, new, lag)
|
||||
|
||||
|
||||
def mutation(individual):
|
||||
def mutation(individual, pmut):
|
||||
'''
|
||||
Mutation operator
|
||||
|
||||
:param population:
|
||||
:return:
|
||||
'''
|
||||
import numpy.random
|
||||
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 2))))
|
||||
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .1)))
|
||||
individual['mf'] = random.randint(1, 2)
|
||||
individual['partitioner'] = random.randint(1, 2)
|
||||
individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 0.5))))
|
||||
# Chama a função mutation_lags
|
||||
individual['lags'] = mutation_lags( individual['lags'], individual['order'])
|
||||
#individual['lags'] = sorted(random.sample(range(1, 50), individual['order']))
|
||||
|
||||
rnd = random.uniform(0, 1)
|
||||
|
||||
if rnd < pmut:
|
||||
|
||||
print('mutation')
|
||||
|
||||
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 4))))
|
||||
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .5)))
|
||||
individual['mf'] = random.randint(1, 2)
|
||||
individual['partitioner'] = random.randint(1, 2)
|
||||
individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 1))))
|
||||
# Chama a função mutation_lags
|
||||
individual['lags'] = mutation_lags( individual['lags'], individual['order'])
|
||||
|
||||
individual['rmse'] = None
|
||||
individual['len_lags'] = None
|
||||
|
||||
return individual
|
||||
|
||||
|
||||
# Elitismo
|
||||
def elitism(population, new_population):
|
||||
# Pega melhor indivíduo da população corrente
|
||||
'''
|
||||
Elitism operation, always select the best individual of the population and discard the worst
|
||||
|
||||
:param population:
|
||||
:param new_population:
|
||||
:return:
|
||||
'''
|
||||
population = sorted(population, key=itemgetter('rmse'))
|
||||
best = population[0]
|
||||
|
||||
# Ordena a nova população e insere o melhor1 no lugar do pior
|
||||
new_population = sorted(new_population, key=itemgetter('rmse'))
|
||||
new_population[-1] = best
|
||||
|
||||
# Ordena novamente e pega o melhor
|
||||
new_population = sorted(new_population, key=itemgetter('rmse'))
|
||||
if new_population[0]["rmse"] > best["rmse"]:
|
||||
new_population.insert(0,best)
|
||||
|
||||
return new_population
|
||||
|
||||
|
||||
def genetico(dataset, ngen, npop, pcruz, pmut, option=1):
|
||||
new_populacao = populacao_nova = []
|
||||
# Gerar população inicial
|
||||
populacao = initial_population(npop)
|
||||
def GeneticAlgorithm(dataset, **kwargs):
|
||||
'''
|
||||
Genetic algoritm for hyperparameter optimization
|
||||
|
||||
# Avaliar população inicial
|
||||
result = [evaluation1(dataset, k) for k in populacao]
|
||||
:param dataset:
|
||||
:param ngen: Max number of generations
|
||||
:param mgen: Max number of generations without improvement
|
||||
:param npop: Population size
|
||||
:param pcruz: Probability of crossover
|
||||
:param pmut: Probability of mutation
|
||||
:param window_size: The length of scrolling window for train/test on dataset
|
||||
:param train_rate: The train/test split ([0,1])
|
||||
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||
:param parameters: dict with model specific arguments for fit method.
|
||||
:return: the best genotype
|
||||
'''
|
||||
|
||||
for i in range(npop):
|
||||
if option == 1:
|
||||
populacao[i]['len_lags'], populacao[i]['rmse'] = result[i]
|
||||
else:
|
||||
populacao[i]['rmse'] = result[i]
|
||||
statistics = []
|
||||
|
||||
ngen = kwargs.get('ngen',30)
|
||||
mgen = kwargs.get('mgen', 7)
|
||||
npop = kwargs.get('npop',20)
|
||||
pcruz = kwargs.get('pcruz',.5)
|
||||
pmut = kwargs.get('pmut',.3)
|
||||
|
||||
collect_statistics = kwargs.get('collect_statistics', False)
|
||||
|
||||
no_improvement_count = 0
|
||||
|
||||
new_population = []
|
||||
|
||||
population = initial_population(npop)
|
||||
|
||||
last_best = population[0]
|
||||
best = population[1]
|
||||
|
||||
for individual in population:
|
||||
individual['len_lags'], individual['rmse'] = evaluate(dataset, individual, **kwargs)
|
||||
|
||||
# Gerações
|
||||
for i in range(ngen):
|
||||
# Iteração para gerar a nova população
|
||||
print("GENERATION {}".format(i))
|
||||
|
||||
generation_statistics = {}
|
||||
|
||||
# Selection
|
||||
for j in range(int(npop / 2)):
|
||||
# Selecao de pais
|
||||
pais = []
|
||||
pais.append(selection1(populacao))
|
||||
pais.append(selection1(populacao))
|
||||
new_population.append(double_tournament(population))
|
||||
new_population.append(double_tournament(population))
|
||||
|
||||
# Cruzamento com probabilidade pcruz
|
||||
rnd = random.uniform(0, 1)
|
||||
filho1 = crossover(pais) if pcruz > rnd else pais[0]
|
||||
rnd = random.uniform(0, 1)
|
||||
filho2 = crossover(pais) if pcruz > rnd else pais[1]
|
||||
# Crossover
|
||||
new = []
|
||||
for j in range(int(npop * pcruz)):
|
||||
new.append(crossover(new_population))
|
||||
new_population.extend(new)
|
||||
|
||||
# Mutação com probabilidade pmut
|
||||
rnd = random.uniform(0, 1)
|
||||
filho11 = mutation(filho1) if pmut > rnd else filho1
|
||||
rnd = random.uniform(0, 1)
|
||||
filho22 = mutation(filho2) if pmut > rnd else filho2
|
||||
# Mutation
|
||||
for ct, individual in enumerate(new_population):
|
||||
new_population[ct] = mutation(individual, pmut)
|
||||
|
||||
# Insere filhos na nova população
|
||||
new_populacao.append(filho11)
|
||||
new_populacao.append(filho22)
|
||||
# Evaluation
|
||||
_f1 = _f2 = []
|
||||
for individual in new_population:
|
||||
f1, f2 = evaluate(dataset, individual, **kwargs)
|
||||
individual['len_lags'], individual['rmse'] = f1, f2
|
||||
if collect_statistics:
|
||||
_f1.append(f1)
|
||||
_f2.append(f2)
|
||||
#print('eval {}'.format(individual))
|
||||
|
||||
result = [evaluation1(dataset, k) for k in new_populacao]
|
||||
if collect_statistics:
|
||||
generation_statistics['population'] = {'f1': np.nanmedian(_f1), 'f2': np.nanmedian(_f2)}
|
||||
|
||||
for i in range(len(new_populacao)):
|
||||
new_populacao[i]['len_lags'], new_populacao[i]['rmse'] = result[i]
|
||||
# Elitism
|
||||
population = elitism(population, new_population)
|
||||
|
||||
populacao = elitism(populacao, new_populacao)
|
||||
population = population[:npop]
|
||||
|
||||
new_populacao = []
|
||||
new_population = []
|
||||
|
||||
melhorT = sorted(populacao, key=lambda item: item['rmse'])[0]
|
||||
last_best = best
|
||||
|
||||
return melhorT
|
||||
best = population[0]
|
||||
|
||||
if collect_statistics:
|
||||
generation_statistics['best'] = {'f1': best["len_lags"], 'f2': best["rmse"]}
|
||||
|
||||
statistics.append(generation_statistics)
|
||||
|
||||
if last_best['rmse'] <= best['rmse'] and last_best['len_lags'] <= best['len_lags']:
|
||||
no_improvement_count += 1
|
||||
#print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
|
||||
pmut += .05
|
||||
else:
|
||||
no_improvement_count = 0
|
||||
pcruz = kwargs.get('pcruz', .5)
|
||||
pmut = kwargs.get('pmut', .3)
|
||||
#print(best)
|
||||
|
||||
if no_improvement_count == mgen:
|
||||
break
|
||||
|
||||
if collect_statistics:
|
||||
return best, generation_statistics
|
||||
else:
|
||||
return best
|
||||
|
||||
|
||||
def cluster_method(dataset, ngen, npop, pcruz, pmut, option=1):
|
||||
print(ngen, npop, pcruz, pmut, option)
|
||||
|
||||
from pyFTS.hyperparam.Evolutionary import genetico
|
||||
def cluster_method(dataset, **kwargs):
|
||||
from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm
|
||||
|
||||
inicio = time.time()
|
||||
ret = genetico(dataset, ngen, npop, pcruz, pmut, option)
|
||||
ret = GeneticAlgorithm(dataset, **kwargs)
|
||||
fim = time.time()
|
||||
ret['time'] = fim - inicio
|
||||
ret['size'] = ret['len_lags']
|
||||
@ -297,16 +459,7 @@ def process_jobs(jobs, datasetname, conn):
|
||||
if job.status == dispy.DispyJob.Finished and result is not None:
|
||||
print("Processing result of {}".format(result))
|
||||
|
||||
metrics = ['rmse', 'size', 'time']
|
||||
|
||||
for metric in metrics:
|
||||
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
||||
result['order'], result['partitioner'], result['npart'],
|
||||
result['alpha'], str(result['lags']), metric, result[metric])
|
||||
|
||||
print(record)
|
||||
|
||||
hUtil.insert_hyperparam(record, conn)
|
||||
log_result(conn, datasetname, result)
|
||||
|
||||
|
||||
else:
|
||||
@ -314,25 +467,47 @@ def process_jobs(jobs, datasetname, conn):
|
||||
print(job.stdout)
|
||||
|
||||
|
||||
def execute(datasetname, dataset, **kwargs):
|
||||
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||
def log_result(conn, datasetname, result):
|
||||
metrics = ['rmse', 'size', 'time']
|
||||
for metric in metrics:
|
||||
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
||||
result['order'], result['partitioner'], result['npart'],
|
||||
result['alpha'], str(result['lags']), metric, result[metric])
|
||||
|
||||
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
|
||||
print(record)
|
||||
|
||||
hUtil.insert_hyperparam(record, conn)
|
||||
|
||||
|
||||
def execute(datasetname, dataset, **kwargs):
|
||||
conn = hUtil.open_hyperparam_db('hyperparam.db')
|
||||
|
||||
ngen = kwargs.get('ngen', 70)
|
||||
npop = kwargs.get('npop', 20)
|
||||
pcruz = kwargs.get('pcruz', .8)
|
||||
pmut = kwargs.get('pmut', .2)
|
||||
option = kwargs.get('option', 1)
|
||||
distributed = kwargs.get('distributed', False)
|
||||
|
||||
jobs = []
|
||||
experiments = kwargs.get('experiments', 30)
|
||||
|
||||
for i in range(kwargs.get('experiments', 30)):
|
||||
print("Experiment {}".format(i))
|
||||
job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
|
||||
jobs.append(job)
|
||||
if not distributed:
|
||||
ret = []
|
||||
for i in range(experiments):
|
||||
result = cluster_method(dataset, **kwargs)
|
||||
log_result(conn, datasetname, result)
|
||||
ret.append(result)
|
||||
|
||||
process_jobs(jobs, datasetname, conn)
|
||||
return result
|
||||
|
||||
Util.stop_dispy_cluster(cluster, http_server)
|
||||
elif distributed=='dispy':
|
||||
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||
|
||||
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
|
||||
|
||||
|
||||
jobs = []
|
||||
|
||||
for i in range(experiments):
|
||||
print("Experiment {}".format(i))
|
||||
job = cluster.submit(dataset, **kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
process_jobs(jobs, datasetname, conn)
|
||||
|
||||
Util.stop_dispy_cluster(cluster, http_server)
|
||||
|
@ -90,7 +90,6 @@ class HighOrderFTS(fts.FTS):
|
||||
self.is_high_order = True
|
||||
self.min_order = 1
|
||||
self.order= kwargs.get("order", self.min_order)
|
||||
self.lags = kwargs.get("lags", None)
|
||||
self.configure_lags(**kwargs)
|
||||
|
||||
def configure_lags(self, **kwargs):
|
||||
|
@ -19,7 +19,7 @@ class FLR(object):
|
||||
self.RHS = set
|
||||
|
||||
def __str__(self):
|
||||
return str([self.LHS[k] for k in self.LHS.keys()]) + " -> " + self.RHS
|
||||
return "{} -> {}".format([self.LHS[k] for k in self.LHS.keys()], self.RHS)
|
||||
|
||||
|
||||
|
||||
|
@ -13,13 +13,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
def __init__(self, **kwargs):
|
||||
super(ClusteredMVFTS, self).__init__(**kwargs)
|
||||
|
||||
self.cluster_method = kwargs.get('cluster_method', grid.GridCluster)
|
||||
"""The cluster method to be called when a new model is build"""
|
||||
self.cluster_params = kwargs.get('cluster_params', {})
|
||||
"""The cluster method parameters"""
|
||||
self.cluster = kwargs.get('cluster', None)
|
||||
"""The trained clusterer"""
|
||||
|
||||
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||
"""The FTS method to be called when a new model is build"""
|
||||
self.fts_params = kwargs.get('fts_params', {})
|
||||
@ -30,6 +23,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
self.is_high_order = True
|
||||
|
||||
self.is_clustered = True
|
||||
|
||||
self.order = kwargs.get("order", 2)
|
||||
self.lags = kwargs.get("lags", None)
|
||||
self.alpha_cut = kwargs.get('alpha_cut', 0.25)
|
||||
@ -43,16 +38,13 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
ndata = []
|
||||
for index, row in data.iterrows():
|
||||
data_point = self.format_data(row)
|
||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, alpha_cut=self.alpha_cut))
|
||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut))
|
||||
|
||||
return ndata
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
if self.cluster is None:
|
||||
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn, **self.cluster_params)
|
||||
|
||||
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
||||
self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
|
||||
if self.model.is_high_order:
|
||||
self.model.order = self.order
|
||||
|
||||
@ -60,7 +52,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
||||
|
||||
self.cluster.prune()
|
||||
self.partitioner.prune()
|
||||
|
||||
def check_data(self, data):
|
||||
if self.pre_fuzzyfy:
|
||||
@ -84,8 +76,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
for var in self.explanatory_variables:
|
||||
if self.target_variable.name != var.name:
|
||||
self.target_variable = var
|
||||
self.cluster.change_target_variable(var)
|
||||
self.model.partitioner = self.cluster
|
||||
self.partitioner.change_target_variable(var)
|
||||
self.model.partitioner = self.partitioner
|
||||
self.model.reset_calculated_values()
|
||||
|
||||
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||
|
@ -7,12 +7,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
||||
"""
|
||||
Multivariate Composite Fuzzy Set
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Create an empty composite fuzzy set
|
||||
:param name: fuzzy set name
|
||||
"""
|
||||
super(MultivariateFuzzySet, self).__init__(name)
|
||||
super(MultivariateFuzzySet, self).__init__("")
|
||||
self.sets = {}
|
||||
self.target_variable = kwargs.get('target_variable',None)
|
||||
|
||||
@ -28,10 +28,10 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
||||
if variable == self.target_variable.name:
|
||||
self.centroid = set.centroid
|
||||
|
||||
self.name += set.name
|
||||
|
||||
def set_target_variable(self, variable):
|
||||
#print(self.target_variable, variable)
|
||||
self.target_variable = variable
|
||||
#print(self.centroid,self.sets[variable.name].centroid)
|
||||
self.centroid = self.sets[variable.name].centroid
|
||||
|
||||
def membership(self, x):
|
||||
@ -42,7 +42,6 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
||||
return np.nanmin(mv)
|
||||
|
||||
|
||||
|
||||
def fuzzyfy_instance(data_point, var):
|
||||
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
||||
return [(var.name, fs) for fs in fsets]
|
||||
|
@ -1,4 +1,4 @@
|
||||
from pyFTS.partitioners import partitioner
|
||||
from pyFTS.models.multivariate import partitioner
|
||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
||||
from itertools import product
|
||||
from scipy.spatial import KDTree
|
||||
@ -6,106 +6,28 @@ import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class GridCluster(partitioner.Partitioner):
|
||||
class GridCluster(partitioner.MultivariatePartitioner):
|
||||
"""
|
||||
A cartesian product of all fuzzy sets of all variables
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(GridCluster, self).__init__(name="GridCluster", preprocess=False, **kwargs)
|
||||
|
||||
self.mvfts = kwargs.get('mvfts', None)
|
||||
self.sets = {}
|
||||
self.kdtree = None
|
||||
self.index = {}
|
||||
self.neighbors = kwargs.get('neighbors', 2)
|
||||
self.optmize = kwargs.get('optmize', False)
|
||||
if self.optmize:
|
||||
self.count = {}
|
||||
data = kwargs.get('data', [None])
|
||||
self.build(data)
|
||||
super(GridCluster, self).__init__(**kwargs)
|
||||
self.name="GridCluster"
|
||||
self.build(None)
|
||||
|
||||
def build(self, data):
|
||||
|
||||
fsets = [[x for x in k.partitioner.sets.values()]
|
||||
for k in self.mvfts.explanatory_variables]
|
||||
|
||||
midpoints = []
|
||||
|
||||
for k in self.explanatory_variables]
|
||||
c = 0
|
||||
for k in product(*fsets):
|
||||
#key = self.prefix+str(c)
|
||||
mvfset = MultivariateFuzzySet(name="", target_variable=self.mvfts.target_variable)
|
||||
mp = []
|
||||
_key = ""
|
||||
mvfset = MultivariateFuzzySet(target_variable=self.target_variable)
|
||||
for fset in k:
|
||||
mvfset.append_set(fset.variable, fset)
|
||||
mp.append(fset.centroid)
|
||||
_key += fset.name
|
||||
mvfset.name = _key
|
||||
self.sets[_key] = mvfset
|
||||
midpoints.append(mp)
|
||||
self.index[c] = _key
|
||||
|
||||
self.sets[mvfset.name] = mvfset
|
||||
c += 1
|
||||
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
self.build_index()
|
||||
|
||||
self.kdtree = KDTree(midpoints)
|
||||
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
def prune(self):
|
||||
|
||||
if not self.optmize:
|
||||
return
|
||||
|
||||
for fset in [fs for fs in self.sets.keys()]:
|
||||
if fset not in self.count:
|
||||
fs = self.sets.pop(fset)
|
||||
del (fs)
|
||||
|
||||
|
||||
vars = [k.name for k in self.mvfts.explanatory_variables]
|
||||
|
||||
midpoints = []
|
||||
|
||||
self.index = {}
|
||||
|
||||
for ct, fset in enumerate(self.sets.values()):
|
||||
mp = []
|
||||
for vr in vars:
|
||||
mp.append(fset.sets[vr].centroid)
|
||||
midpoints.append(mp)
|
||||
self.index[ct] = fset.name
|
||||
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
|
||||
self.kdtree = KDTree(midpoints)
|
||||
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
def knn(self, data):
|
||||
tmp = [data[k.name]
|
||||
for k in self.mvfts.explanatory_variables]
|
||||
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
||||
|
||||
if not isinstance(ix, (list, np.ndarray)):
|
||||
ix = [ix]
|
||||
|
||||
if self.optmize:
|
||||
tmp = []
|
||||
for k in ix:
|
||||
tmp.append(self.index[k])
|
||||
self.count[self.index[k]] = 1
|
||||
return tmp
|
||||
else:
|
||||
return [self.index[k] for k in ix]
|
||||
|
||||
def fuzzyfy(self, data, **kwargs):
|
||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||
|
||||
def change_target_variable(self, variable):
|
||||
for fset in self.sets:
|
||||
self.sets[fset].set_target_variable(variable)
|
||||
|
@ -12,8 +12,8 @@ class MVFTS(fts.FTS):
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super(MVFTS, self).__init__(**kwargs)
|
||||
self.explanatory_variables = []
|
||||
self.target_variable = None
|
||||
self.explanatory_variables = kwargs.get('explanatory_variables',[])
|
||||
self.target_variable = kwargs.get('target_variable',None)
|
||||
self.flrgs = {}
|
||||
self.is_multivariate = True
|
||||
self.shortname = "MVFTS"
|
||||
|
90
pyFTS/models/multivariate/partitioner.py
Normal file
90
pyFTS/models/multivariate/partitioner.py
Normal file
@ -0,0 +1,90 @@
|
||||
from pyFTS.partitioners import partitioner
|
||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
||||
from itertools import product
|
||||
from scipy.spatial import KDTree
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class MultivariatePartitioner(partitioner.Partitioner):
|
||||
"""
|
||||
Base class for partitioners which use the MultivariateFuzzySet
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(MultivariatePartitioner, self).__init__(name="MultivariatePartitioner", preprocess=False, **kwargs)
|
||||
|
||||
self.type = 'multivariate'
|
||||
self.sets = {}
|
||||
self.kdtree = None
|
||||
self.index = {}
|
||||
self.explanatory_variables = kwargs.get('explanatory_variables', [])
|
||||
self.target_variable = kwargs.get('target_variable', None)
|
||||
self.neighbors = kwargs.get('neighbors', 2)
|
||||
self.optimize = kwargs.get('optimize', True)
|
||||
if self.optimize:
|
||||
self.count = {}
|
||||
data = kwargs.get('data', None)
|
||||
self.build(data)
|
||||
|
||||
def build(self, data):
|
||||
pass
|
||||
|
||||
def append(self, fset):
|
||||
self.sets[fset.name] = fset
|
||||
|
||||
def prune(self):
|
||||
|
||||
if not self.optimize:
|
||||
return
|
||||
|
||||
for fset in [fs for fs in self.sets.keys()]:
|
||||
if fset not in self.count:
|
||||
fs = self.sets.pop(fset)
|
||||
del (fs)
|
||||
|
||||
self.build_index()
|
||||
|
||||
def knn(self, data):
|
||||
tmp = [data[k.name]
|
||||
for k in self.explanatory_variables]
|
||||
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
||||
|
||||
if not isinstance(ix, (list, np.ndarray)):
|
||||
ix = [ix]
|
||||
|
||||
if self.optimize:
|
||||
tmp = []
|
||||
for k in ix:
|
||||
tmp.append(self.index[k])
|
||||
self.count[self.index[k]] = 1
|
||||
return tmp
|
||||
else:
|
||||
return [self.index[k] for k in ix]
|
||||
|
||||
def fuzzyfy(self, data, **kwargs):
|
||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||
|
||||
def change_target_variable(self, variable):
|
||||
for fset in self.sets.values():
|
||||
fset.set_target_variable(variable)
|
||||
|
||||
def build_index(self):
|
||||
|
||||
midpoints = []
|
||||
|
||||
self.index = {}
|
||||
|
||||
for ct, fset in enumerate(self.sets.values()):
|
||||
mp = []
|
||||
for vr in self.explanatory_variables:
|
||||
mp.append(fset.sets[vr.name].centroid)
|
||||
midpoints.append(mp)
|
||||
self.index[ct] = fset.name
|
||||
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
|
||||
self.kdtree = KDTree(midpoints)
|
||||
|
||||
sys.setrecursionlimit(1000)
|
@ -1,3 +1,4 @@
|
||||
import pandas as pd
|
||||
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.models.multivariate import FLR as MVFLR
|
||||
@ -24,6 +25,10 @@ class Variable:
|
||||
self.data_label = kwargs.get('data_label', self.name)
|
||||
"""A string with the column name on DataFrame"""
|
||||
self.type = kwargs.get('type', 'common')
|
||||
self.data_type = kwargs.get('data_type', None)
|
||||
"""The type of the data column on Pandas Dataframe"""
|
||||
self.mask = kwargs.get('mask', None)
|
||||
"""The mask for format the data column on Pandas Dataframe"""
|
||||
self.transformation = kwargs.get('transformation', None)
|
||||
self.transformation_params = kwargs.get('transformation_params', None)
|
||||
self.partitioner = None
|
||||
|
@ -20,11 +20,12 @@ class WeightedFLRG(mvflrg.FLRG):
|
||||
self.w = None
|
||||
|
||||
def append_rhs(self, fset, **kwargs):
|
||||
count = kwargs.get('count', 1.0)
|
||||
if fset not in self.RHS:
|
||||
self.RHS[fset] = 1.0
|
||||
self.RHS[fset] = count
|
||||
else:
|
||||
self.RHS[fset] += 1.0
|
||||
self.count += 1.0
|
||||
self.RHS[fset] += count
|
||||
self.count += count
|
||||
|
||||
def weights(self):
|
||||
if self.w is None:
|
||||
@ -51,10 +52,6 @@ class WeightedMVFTS(mvfts.MVFTS):
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super(WeightedMVFTS, self).__init__(order=1, **kwargs)
|
||||
self.explanatory_variables = []
|
||||
self.target_variable = None
|
||||
self.flrgs = {}
|
||||
self.is_multivariate = True
|
||||
self.shortname = "WeightedMVFTS"
|
||||
self.name = "Weighted Multivariate FTS"
|
||||
|
||||
|
@ -21,6 +21,15 @@ class SimplePartitioner(partitioner.Partitioner):
|
||||
|
||||
self.partitions = 0
|
||||
|
||||
def append_complex(self, fs):
|
||||
self.sets[fs.name] = fs
|
||||
self.partitions += 1
|
||||
|
||||
self.ordered_sets = [key for key in sorted(self.sets.keys(), key=lambda k: self.sets[k].centroid)]
|
||||
|
||||
self.min = self.sets[self.ordered_sets[0]].lower
|
||||
self.max = self.sets[self.ordered_sets[-1]].upper
|
||||
|
||||
def append(self, name, mf, parameters, **kwargs):
|
||||
"""
|
||||
Append a new partition (fuzzy set) to the partitioner
|
||||
@ -39,7 +48,7 @@ class SimplePartitioner(partitioner.Partitioner):
|
||||
if mf is None or mf not in (Membership.trimf, Membership.gaussmf,
|
||||
Membership.trapmf, Membership.singleton,
|
||||
Membership.sigmf):
|
||||
raise ValueError("The mf parameter should be one of pyFTS.common.Membership functions")
|
||||
raise ValueError("The mf parameter should be one of pyFTS.common.Membership functions, not {}".format(mf))
|
||||
|
||||
if mf == Membership.trimf:
|
||||
if len(parameters) != 3:
|
||||
|
@ -2,14 +2,14 @@ import numpy as np
|
||||
from pyFTS.hyperparam import GridSearch, Evolutionary
|
||||
|
||||
def get_dataset():
|
||||
#from pyFTS.data import SONDA
|
||||
from pyFTS.data import Malaysia
|
||||
|
||||
ds = Malaysia.get_data('temperature')[:1000]
|
||||
# ds = pd.read_csv('Malaysia.csv',delimiter=',' )[['temperature']].values[:2000].flatten().tolist()
|
||||
#train = ds[:800]
|
||||
#test = ds[800:]
|
||||
#data = SONDA.get_data('temperature')[:1000]
|
||||
data = Malaysia.get_data('temperature')[:1000]
|
||||
|
||||
return 'Malaysia.temperature', ds #train, test
|
||||
#return 'SONDA.glo_avg', data #train, test
|
||||
return 'Malaysia.temperature', data #train, test
|
||||
|
||||
"""
|
||||
hyperparams = {
|
||||
@ -39,4 +39,28 @@ datsetname, dataset = get_dataset()
|
||||
|
||||
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
|
||||
|
||||
Evolutionary.execute(datsetname, dataset, nodes=nodes, ngen=50, npop=30, )
|
||||
'''
|
||||
from pyFTS.models import hofts
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.benchmarks import Measures
|
||||
|
||||
fs = Grid.GridPartitioner(data=dataset[:800], npart=30)
|
||||
|
||||
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
||||
|
||||
model.fit(dataset[:800])
|
||||
|
||||
model.predict(dataset[800:1000])
|
||||
|
||||
Measures.get_point_statistics(dataset[800:1000], model)
|
||||
|
||||
print(model)
|
||||
|
||||
'''
|
||||
ret = Evolutionary.execute(datsetname, dataset,
|
||||
ngen=30, npop=20, pcruz=.5, pmut=.3,
|
||||
window_size=800, experiments=30)
|
||||
#parameters={'distributed': 'spark', 'url': 'spark://192.168.0.106:7077'})
|
||||
|
||||
print(ret)
|
||||
#'''
|
@ -28,8 +28,6 @@ test_uv = dataset['value'].values[24505:]
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
print(train_mv)
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
@ -46,21 +44,16 @@ parameters = [
|
||||
{'order':2, 'knn': 3},
|
||||
]
|
||||
|
||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
||||
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
||||
print(method)
|
||||
model = method(**parameters[ct])
|
||||
model.shortname += str(ct)
|
||||
model.append_variable(vhour)
|
||||
model.append_variable(vvalue)
|
||||
model.target_variable = vvalue
|
||||
model.fit(train_mv)
|
||||
#for ct, method in enumerate([, wmvfts.WeightedMVFTS,
|
||||
# cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
||||
model = mvfts.MVFTS()
|
||||
|
||||
Util.persist_obj(model, model.shortname)
|
||||
model.append_variable(vhour)
|
||||
model.append_variable(vvalue)
|
||||
model.target_variable = vvalue
|
||||
model.fit(train_mv)
|
||||
|
||||
forecasts = model.predict(test_mv.iloc[:100])
|
||||
|
||||
print(model)
|
||||
print(model)
|
||||
|
||||
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import time
|
||||
|
||||
from pyFTS.data import Enrollments, TAIEX
|
||||
from pyFTS.data import Enrollments, TAIEX, SONDA
|
||||
from pyFTS.partitioners import Grid, Simple
|
||||
from pyFTS.models import hofts
|
||||
|
||||
@ -12,20 +13,51 @@ import os
|
||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||
|
||||
data = TAIEX.get_data()
|
||||
#'''
|
||||
data = SONDA.get_data('glo_avg')
|
||||
|
||||
fs = Grid.GridPartitioner(data=data, npart=50)
|
||||
|
||||
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
||||
|
||||
model.fit(data, distributed='spark', url='spark://192.168.0.110:7077')
|
||||
_s1 = time.time()
|
||||
model.fit(data, distributed='spark', url='spark://192.168.0.106:7077')
|
||||
_s2 = time.time()
|
||||
|
||||
print(_s2-_s1)
|
||||
|
||||
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
|
||||
'''
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, grid
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
||||
|
||||
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
train_mv = dataset.iloc[:24505]
|
||||
test_mv = dataset.iloc[24505:]
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
||||
partitioner=Grid.GridPartitioner, npart=35, data_type=np.float64,
|
||||
data=train_mv)
|
||||
|
||||
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue,
|
||||
partitioner=fs)
|
||||
|
||||
model.fit(train_mv, distributed='spark', url='spark://192.168.0.106:7077')
|
||||
#'''
|
||||
print(model)
|
||||
|
||||
|
||||
|
||||
'''
|
||||
def fun(x):
|
||||
return (x, x % 2)
|
||||
|
Loading…
Reference in New Issue
Block a user