Spark Distributed; Hyperparameter optimization
This commit is contained in:
parent
87686e5ff0
commit
2e1d7fa11a
178
docs/build/html/_modules/pyFTS/partitioners/Simple.html
vendored
Normal file
178
docs/build/html/_modules/pyFTS/partitioners/Simple.html
vendored
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><script type="text/javascript">
|
||||||
|
|
||||||
|
var _gaq = _gaq || [];
|
||||||
|
_gaq.push(['_setAccount', 'UA-55120145-3']);
|
||||||
|
_gaq.push(['_trackPageview']);
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||||
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||||
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
<title>pyFTS.partitioners.Simple — pyFTS 1.4 documentation</title>
|
||||||
|
<link rel="stylesheet" href="../../../_static/bizstyle.css" type="text/css" />
|
||||||
|
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||||
|
<script type="text/javascript" src="../../../_static/documentation_options.js"></script>
|
||||||
|
<script type="text/javascript" src="../../../_static/jquery.js"></script>
|
||||||
|
<script type="text/javascript" src="../../../_static/underscore.js"></script>
|
||||||
|
<script type="text/javascript" src="../../../_static/doctools.js"></script>
|
||||||
|
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
|
||||||
|
<script type="text/javascript" src="../../../_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="../../../search.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0">
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script type="text/javascript" src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="../../../genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="../../../index.html">pyFTS 1.4 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> »</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<p class="logo"><a href="../../../index.html">
|
||||||
|
<img class="logo" src="../../../_static/logo_heading2.png" alt="Logo"/>
|
||||||
|
</a></p>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3>Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="../../../search.html" method="get">
|
||||||
|
<input type="text" name="q" />
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
<input type="hidden" name="check_keywords" value="yes" />
|
||||||
|
<input type="hidden" name="area" value="default" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script type="text/javascript">$('#searchbox').show(0);</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<h1>Source code for pyFTS.partitioners.Simple</h1><div class="highlight"><pre>
|
||||||
|
<span></span><span class="sd">"""Simple Partitioner for manually informed fuzzy sets"""</span>
|
||||||
|
|
||||||
|
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">math</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">random</span> <span class="k">as</span> <span class="nn">rnd</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">functools</span><span class="o">,</span> <span class="nn">operator</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">pyFTS.common</span> <span class="k">import</span> <span class="n">FuzzySet</span><span class="p">,</span> <span class="n">Membership</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">pyFTS.partitioners</span> <span class="k">import</span> <span class="n">partitioner</span>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="viewcode-block" id="SimplePartitioner"><a class="viewcode-back" href="../../../pyFTS.partitioners.html#pyFTS.partitioners.Simple.SimplePartitioner">[docs]</a><span class="k">class</span> <span class="nc">SimplePartitioner</span><span class="p">(</span><span class="n">partitioner</span><span class="o">.</span><span class="n">Partitioner</span><span class="p">):</span>
|
||||||
|
<span class="sd">"""Simple Partitioner for manually informed fuzzy sets"""</span>
|
||||||
|
|
||||||
|
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||||
|
<span class="sd">"""</span>
|
||||||
|
<span class="sd"> Simple Partitioner - the fuzzy sets are informed manually</span>
|
||||||
|
<span class="sd"> """</span>
|
||||||
|
<span class="n">kwargs</span><span class="p">[</span><span class="s1">'preprocess'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
|
||||||
|
|
||||||
|
<span class="nb">super</span><span class="p">(</span><span class="n">SimplePartitioner</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">"Simple"</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">partitions</span> <span class="o">=</span> <span class="mi">0</span>
|
||||||
|
|
||||||
|
<div class="viewcode-block" id="SimplePartitioner.append"><a class="viewcode-back" href="../../../pyFTS.partitioners.html#pyFTS.partitioners.Simple.SimplePartitioner.append">[docs]</a> <span class="k">def</span> <span class="nf">append</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">mf</span><span class="p">,</span> <span class="n">parameters</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||||
|
<span class="sd">"""</span>
|
||||||
|
<span class="sd"> Append a new partition (fuzzy set) to the partitioner</span>
|
||||||
|
|
||||||
|
<span class="sd"> :param name: Fuzzy set name</span>
|
||||||
|
<span class="sd"> :param mf: One of the pyFTS.common.Membership functions</span>
|
||||||
|
<span class="sd"> :param parameters: A list with the parameters for the membership function</span>
|
||||||
|
<span class="sd"> :param kwargs: Optional arguments for the fuzzy set</span>
|
||||||
|
<span class="sd"> """</span>
|
||||||
|
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The name of the fuzzy set cannot be empty"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="k">if</span> <span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"This name has already been used"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="k">if</span> <span class="n">mf</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">mf</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="n">Membership</span><span class="o">.</span><span class="n">trimf</span><span class="p">,</span> <span class="n">Membership</span><span class="o">.</span><span class="n">gaussmf</span><span class="p">,</span>
|
||||||
|
<span class="n">Membership</span><span class="o">.</span><span class="n">trapmf</span><span class="p">,</span> <span class="n">Membership</span><span class="o">.</span><span class="n">singleton</span><span class="p">,</span>
|
||||||
|
<span class="n">Membership</span><span class="o">.</span><span class="n">sigmf</span><span class="p">):</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The mf parameter should be one of pyFTS.common.Membership functions"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="k">if</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">trimf</span><span class="p">:</span>
|
||||||
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.trimf"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||||
|
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">gaussmf</span><span class="p">:</span>
|
||||||
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.gaussmf"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||||
|
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">trapmf</span><span class="p">:</span>
|
||||||
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">4</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.trapmf"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">centroid</span> <span class="o">=</span> <span class="p">(</span><span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">parameters</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="mi">2</span>
|
||||||
|
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">singleton</span><span class="p">:</span>
|
||||||
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.singleton"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||||
|
<span class="k">elif</span> <span class="n">mf</span> <span class="o">==</span> <span class="n">Membership</span><span class="o">.</span><span class="n">sigmf</span><span class="p">:</span>
|
||||||
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">parameters</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
||||||
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Incorrect number of parameters for the Membership.sigmf"</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">centroid</span> <span class="o">=</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="p">(</span><span class="n">parameters</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">/</span> <span class="p">(</span><span class="mi">2</span> <span class="o">*</span> <span class="n">parameters</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
|
||||||
|
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">FuzzySet</span><span class="o">.</span><span class="n">FuzzySet</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">mf</span><span class="p">,</span> <span class="n">parameters</span><span class="p">,</span> <span class="n">centroid</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">partitions</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||||
|
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">k</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="o">.</span><span class="n">centroid</span><span class="p">)]</span>
|
||||||
|
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">min</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">lower</span>
|
||||||
|
<span class="bp">self</span><span class="o">.</span><span class="n">max</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sets</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">ordered_sets</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]</span><span class="o">.</span><span class="n">upper</span></div></div>
|
||||||
|
|
||||||
|
|
||||||
|
</pre></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="../../../genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="../../../index.html">pyFTS 1.4 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2018, Machine Intelligence and Data Science Laboratory - UFMG - Brazil.
|
||||||
|
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.7.2.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -26,6 +26,10 @@ class FuzzySet(FuzzySet.FuzzySet):
|
|||||||
self.mf = []
|
self.mf = []
|
||||||
self.parameters = []
|
self.parameters = []
|
||||||
|
|
||||||
|
self.lower = None
|
||||||
|
self.upper = None
|
||||||
|
self.centroid = None
|
||||||
|
|
||||||
|
|
||||||
def membership(self, x):
|
def membership(self, x):
|
||||||
"""
|
"""
|
||||||
@ -62,3 +66,13 @@ class FuzzySet(FuzzySet.FuzzySet):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
self.sets.append(set)
|
self.sets.append(set)
|
||||||
|
|
||||||
|
if self.lower is None or self.lower > set.lower:
|
||||||
|
self.lower = set.lower
|
||||||
|
|
||||||
|
if self.upper is None or self.upper < set.upper:
|
||||||
|
self.upper = set.upper
|
||||||
|
|
||||||
|
if self.centroid is None or self.centroid < set.centroid:
|
||||||
|
self.centroid = set.centroid
|
||||||
|
|
||||||
|
@ -125,6 +125,7 @@ def fuzzyfy(data, partitioner, **kwargs):
|
|||||||
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
|
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
|
||||||
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
|
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
|
||||||
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
|
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
|
||||||
|
|
||||||
:returns a list with the fuzzyfied values, depending on the mode
|
:returns a list with the fuzzyfied values, depending on the mode
|
||||||
"""
|
"""
|
||||||
alpha_cut = kwargs.get('alpha_cut', 0.)
|
alpha_cut = kwargs.get('alpha_cut', 0.)
|
||||||
|
@ -74,7 +74,7 @@ def sigmf(x, parameters):
|
|||||||
|
|
||||||
:param x:
|
:param x:
|
||||||
:param parameters: an list with 2 real values (smoothness and midpoint)
|
:param parameters: an list with 2 real values (smoothness and midpoint)
|
||||||
:return:
|
:return
|
||||||
"""
|
"""
|
||||||
return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1])))
|
return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1])))
|
||||||
|
|
||||||
|
@ -38,6 +38,9 @@ class FTS(object):
|
|||||||
"""A boolean value indicating if the model support probabilistic forecasting, default: False"""
|
"""A boolean value indicating if the model support probabilistic forecasting, default: False"""
|
||||||
self.is_multivariate = False
|
self.is_multivariate = False
|
||||||
"""A boolean value indicating if the model support multivariate time series (Pandas DataFrame), default: False"""
|
"""A boolean value indicating if the model support multivariate time series (Pandas DataFrame), default: False"""
|
||||||
|
self.is_clustered = False
|
||||||
|
"""A boolean value indicating if the model support multivariate time series (Pandas DataFrame), but works like
|
||||||
|
a monovariate method, default: False"""
|
||||||
self.dump = False
|
self.dump = False
|
||||||
self.transformations = []
|
self.transformations = []
|
||||||
"""A list with the data transformations (common.Transformations) applied on model pre and post processing, default: []"""
|
"""A list with the data transformations (common.Transformations) applied on model pre and post processing, default: []"""
|
||||||
@ -61,6 +64,8 @@ class FTS(object):
|
|||||||
"""Flag indicating if the test data will be clipped inside the training Universe of Discourse"""
|
"""Flag indicating if the test data will be clipped inside the training Universe of Discourse"""
|
||||||
self.alpha_cut = kwargs.get("alpha_cut", 0.0)
|
self.alpha_cut = kwargs.get("alpha_cut", 0.0)
|
||||||
"""A float with the minimal membership to be considered on fuzzyfication process"""
|
"""A float with the minimal membership to be considered on fuzzyfication process"""
|
||||||
|
self.lags = kwargs.get("lags", None)
|
||||||
|
"""The list of lag indexes for high order models"""
|
||||||
self.max_lag = self.order
|
self.max_lag = self.order
|
||||||
"""A integer indicating the largest lag used by the model. This value also indicates the minimum number of past lags
|
"""A integer indicating the largest lag used by the model. This value also indicates the minimum number of past lags
|
||||||
needed to forecast a single step ahead"""
|
needed to forecast a single step ahead"""
|
||||||
|
@ -3,6 +3,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from pyFTS.data import Enrollments, TAIEX
|
from pyFTS.data import Enrollments, TAIEX
|
||||||
from pyFTS.partitioners import Grid, Simple
|
from pyFTS.partitioners import Grid, Simple
|
||||||
|
from pyFTS.models.multivariate import partitioner as mv_partitioner
|
||||||
from pyFTS.models import hofts
|
from pyFTS.models import hofts
|
||||||
|
|
||||||
from pyspark import SparkConf
|
from pyspark import SparkConf
|
||||||
@ -10,44 +11,141 @@ from pyspark import SparkContext
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||||
|
SPARK_ADDR = 'spark://192.168.0.110:7077'
|
||||||
|
|
||||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_partitioner(shared_partitioner):
|
def get_partitioner(shared_partitioner, type='common', variables=[]):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param part:
|
:param part:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
fs_tmp = Simple.SimplePartitioner()
|
if type=='common':
|
||||||
|
fs_tmp = Simple.SimplePartitioner()
|
||||||
|
|
||||||
for fset in shared_partitioner.value.keys():
|
for fset in shared_partitioner.value.keys():
|
||||||
fz = shared_partitioner.value[fset]
|
fz = shared_partitioner.value[fset]
|
||||||
fs_tmp.append(fset, fz.mf, fz.parameters)
|
if type=='common':
|
||||||
|
fs_tmp.append_complex(fz)
|
||||||
|
elif type == 'multivariate':
|
||||||
|
fs_tmp.append(fz)
|
||||||
|
|
||||||
return fs_tmp
|
return fs_tmp
|
||||||
|
|
||||||
|
|
||||||
def slave_train(data, shared_method, shared_partitioner, shared_order):
|
def get_clustered_partitioner(explanatory_variables, target_variable, **parameters):
|
||||||
|
from pyFTS.models.multivariate.common import MultivariateFuzzySet
|
||||||
|
fs_tmp = mv_partitioner.MultivariatePartitioner(explanatory_variables=explanatory_variables,
|
||||||
|
target_variable=target_variable)
|
||||||
|
for tmp in parameters['partitioner_names'].value:
|
||||||
|
fs = MultivariateFuzzySet(target_variable=target_variable)
|
||||||
|
for var, fset in parameters['partitioner_{}'.format(tmp)].value:
|
||||||
|
fs.append_set(var, fset)
|
||||||
|
fs_tmp.append(fs)
|
||||||
|
|
||||||
|
fs_tmp.build_index()
|
||||||
|
|
||||||
|
return fs_tmp
|
||||||
|
|
||||||
|
|
||||||
|
def get_variables(**parameters):
|
||||||
|
explanatory_variables = []
|
||||||
|
target_variable = None
|
||||||
|
for name in parameters['variables'].value:
|
||||||
|
from pyFTS.models.multivariate import common, variable
|
||||||
|
var = variable.Variable(name,
|
||||||
|
type=parameters['{}_type'.format(name)].value,
|
||||||
|
data_label=parameters['{}_label'.format(name)].value,
|
||||||
|
alpha_cut=parameters['{}_alpha'.format(name)].value,
|
||||||
|
#data_type=parameters['{}_data_type'.format(name)].value,
|
||||||
|
#mask=parameters['{}_mask'.format(name)].value,
|
||||||
|
)
|
||||||
|
var.partitioner = get_partitioner(parameters['{}_partitioner'.format(name)])
|
||||||
|
var.partitioner.type = parameters['{}_partitioner_type'.format(name)].value
|
||||||
|
|
||||||
|
explanatory_variables.append(var)
|
||||||
|
|
||||||
|
if var.name == parameters['target'].value:
|
||||||
|
target_variable = var
|
||||||
|
|
||||||
|
return (explanatory_variables, target_variable)
|
||||||
|
|
||||||
|
|
||||||
|
def slave_train_univariate(data, **parameters):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param data:
|
:param data:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model = shared_method.value(partitioner=get_partitioner(shared_partitioner),
|
if parameters['type'].value == 'common':
|
||||||
order=shared_order.value)
|
|
||||||
|
|
||||||
ndata = [k for k in data]
|
if parameters['order'].value > 1:
|
||||||
|
model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
|
||||||
|
order=parameters['order'].value, alpha_cut=parameters['alpha_cut'].value,
|
||||||
|
lags=parameters['lags'].value)
|
||||||
|
else:
|
||||||
|
model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
|
||||||
|
alpha_cut=parameters['alpha_cut'].value)
|
||||||
|
|
||||||
|
ndata = [k for k in data]
|
||||||
|
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
model.train(ndata)
|
model.train(ndata)
|
||||||
|
|
||||||
return [(k, model.flrgs[k]) for k in model.flrgs]
|
return [(k, model.flrgs[k]) for k in model.flrgs.keys()]
|
||||||
|
|
||||||
|
|
||||||
def distributed_train(model, data, url='spark://192.168.0.110:7077', app='pyFTS'):
|
def slave_train_multivariate(data, **parameters):
|
||||||
|
explanatory_variables, target_variable = get_variables(**parameters)
|
||||||
|
#vars = [(v.name, v.name) for v in explanatory_variables]
|
||||||
|
|
||||||
|
#return [('vars', vars), ('target',[target_variable.name])]
|
||||||
|
|
||||||
|
if parameters['type'].value == 'clustered':
|
||||||
|
fs = get_clustered_partitioner(explanatory_variables, target_variable, **parameters)
|
||||||
|
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||||
|
target_variable=target_variable,
|
||||||
|
partitioner=fs,
|
||||||
|
order=parameters['order'].value,
|
||||||
|
alpha_cut=parameters['alpha_cut'].value,
|
||||||
|
lags=parameters['lags'].value)
|
||||||
|
else:
|
||||||
|
|
||||||
|
if parameters['order'].value > 1:
|
||||||
|
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||||
|
target_variable=target_variable,
|
||||||
|
order=parameters['order'].value,
|
||||||
|
alpha_cut=parameters['alpha_cut'].value,
|
||||||
|
lags=parameters['lags'].value)
|
||||||
|
else:
|
||||||
|
model = parameters['method'].value(explanatory_variables=explanatory_variables,
|
||||||
|
target_variable=target_variable,
|
||||||
|
alpha_cut=parameters['alpha_cut'].value)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rows = [k for k in data]
|
||||||
|
ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value)
|
||||||
|
|
||||||
|
model.train(ndata)
|
||||||
|
|
||||||
|
if parameters['type'].value == 'clustered':
|
||||||
|
counts = [(fset, count) for fset,count in model.partitioner.count.items()]
|
||||||
|
flrgs = [(k, v) for k,v in model.flrgs.items()]
|
||||||
|
|
||||||
|
return [('counts', counts), ('flrgs', flrgs)]
|
||||||
|
else:
|
||||||
|
return [(k, v) for k,v in model.flrgs.items()]
|
||||||
|
|
||||||
|
|
||||||
|
def distributed_train(model, data, url=SPARK_ADDR, app='pyFTS'):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@ -61,22 +159,92 @@ def distributed_train(model, data, url='spark://192.168.0.110:7077', app='pyFTS'
|
|||||||
conf = SparkConf()
|
conf = SparkConf()
|
||||||
conf.setMaster(url)
|
conf.setMaster(url)
|
||||||
conf.setAppName(app)
|
conf.setAppName(app)
|
||||||
|
conf.set("spark.executor.memory", "2g")
|
||||||
|
conf.set("spark.driver.memory", "2g")
|
||||||
|
conf.set("spark.memory.offHeap.enabled",True)
|
||||||
|
conf.set("spark.memory.offHeap.size","16g")
|
||||||
|
parameters = {}
|
||||||
|
|
||||||
with SparkContext(conf=conf) as context:
|
with SparkContext(conf=conf) as context:
|
||||||
shared_partitioner = context.broadcast(model.partitioner.sets)
|
|
||||||
shared_order = context.broadcast(model.order)
|
|
||||||
shared_method = context.broadcast(type(model))
|
|
||||||
|
|
||||||
func = lambda x: slave_train(x, shared_method, shared_partitioner, shared_order)
|
nodes = context.defaultParallelism
|
||||||
|
|
||||||
flrgs = context.parallelize(data).mapPartitions(func)
|
if not model.is_multivariate:
|
||||||
|
parameters['type'] = context.broadcast('common')
|
||||||
|
parameters['partitioner'] = context.broadcast(model.partitioner.sets)
|
||||||
|
parameters['alpha_cut'] = context.broadcast(model.alpha_cut)
|
||||||
|
parameters['order'] = context.broadcast(model.order)
|
||||||
|
parameters['method'] = context.broadcast(type(model))
|
||||||
|
parameters['lags'] = context.broadcast(model.lags)
|
||||||
|
parameters['max_lag'] = context.broadcast(model.max_lag)
|
||||||
|
|
||||||
for k in flrgs.collect():
|
func = lambda x: slave_train_univariate(x, **parameters)
|
||||||
model.append_rule(k[1])
|
|
||||||
|
|
||||||
return model
|
flrgs = context.parallelize(data).repartition(nodes*2).mapPartitions(func)
|
||||||
|
|
||||||
|
for k in flrgs.collect():
|
||||||
|
model.append_rule(k[1])
|
||||||
|
|
||||||
|
return model
|
||||||
|
else:
|
||||||
|
if model.is_clustered:
|
||||||
|
parameters['type'] = context.broadcast('clustered')
|
||||||
|
names = []
|
||||||
|
for name, fset in model.partitioner.sets.items():
|
||||||
|
names.append(name)
|
||||||
|
parameters['partitioner_{}'.format(name)] = context.broadcast([(k,v) for k,v in fset.sets.items()])
|
||||||
|
|
||||||
|
parameters['partitioner_names'] = context.broadcast(names)
|
||||||
|
|
||||||
|
else:
|
||||||
|
parameters['type'] = context.broadcast('multivariate')
|
||||||
|
names = []
|
||||||
|
for var in model.explanatory_variables:
|
||||||
|
#if var.data_type is None:
|
||||||
|
# raise Exception("It is mandatory to inform the data_type parameter for each variable when the training is distributed! ")
|
||||||
|
names.append(var.name)
|
||||||
|
parameters['{}_type'.format(var.name)] = context.broadcast(var.type)
|
||||||
|
#parameters['{}_data_type'.format(var.name)] = context.broadcast(var.data_type)
|
||||||
|
#parameters['{}_mask'.format(var.name)] = context.broadcast(var.mask)
|
||||||
|
parameters['{}_label'.format(var.name)] = context.broadcast(var.data_label)
|
||||||
|
parameters['{}_alpha'.format(var.name)] = context.broadcast(var.alpha_cut)
|
||||||
|
parameters['{}_partitioner'.format(var.name)] = context.broadcast(var.partitioner.sets)
|
||||||
|
parameters['{}_partitioner_type'.format(var.name)] = context.broadcast(var.partitioner.type)
|
||||||
|
|
||||||
|
parameters['variables'] = context.broadcast(names)
|
||||||
|
parameters['target'] = context.broadcast(model.target_variable.name)
|
||||||
|
|
||||||
|
parameters['columns'] = context.broadcast(data.columns.values)
|
||||||
|
|
||||||
|
data = data.to_dict(orient='records')
|
||||||
|
|
||||||
|
parameters['alpha_cut'] = context.broadcast(model.alpha_cut)
|
||||||
|
parameters['order'] = context.broadcast(model.order)
|
||||||
|
parameters['method'] = context.broadcast(type(model))
|
||||||
|
parameters['lags'] = context.broadcast(model.lags)
|
||||||
|
parameters['max_lag'] = context.broadcast(model.max_lag)
|
||||||
|
|
||||||
|
func = lambda x: slave_train_multivariate(x, **parameters)
|
||||||
|
|
||||||
|
flrgs = context.parallelize(data).mapPartitions(func)
|
||||||
|
|
||||||
|
for k in flrgs.collect():
|
||||||
|
print(k)
|
||||||
|
#for g in k:
|
||||||
|
# print(g)
|
||||||
|
|
||||||
|
#return
|
||||||
|
if parameters['type'].value == 'clustered':
|
||||||
|
if k[0] == 'counts':
|
||||||
|
for fset, count in k[1]:
|
||||||
|
model.partitioner.count[fset] = count
|
||||||
|
elif k[0] == 'flrgs':
|
||||||
|
model.append_rule(k[1])
|
||||||
|
else:
|
||||||
|
model.append_rule(k[1])
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def distributed_predict(data, model, url=SPARK_ADDR, app='pyFTS'):
|
||||||
def distributed_predict(data, model, url='spark://192.168.0.110:7077', app='pyFTS'):
|
|
||||||
return None
|
return None
|
||||||
|
@ -15,15 +15,32 @@ from pyFTS.common import Membership
|
|||||||
from pyFTS.hyperparam import Util as hUtil
|
from pyFTS.hyperparam import Util as hUtil
|
||||||
|
|
||||||
|
|
||||||
# Gera indivíduos após operadores
|
#
|
||||||
def genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse):
|
def genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse):
|
||||||
ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order, alpha=alpha, lags=lags, len_lags=len_lags,
|
'''
|
||||||
rmse=rmse)
|
Create the individual genotype
|
||||||
|
|
||||||
|
:param mf: membership function
|
||||||
|
:param npart: number of partitions
|
||||||
|
:param partitioner: partitioner method
|
||||||
|
:param order: model order
|
||||||
|
:param alpha: alpha-cut
|
||||||
|
:param lags: array with lag indexes
|
||||||
|
:param len_lags: parsimony fitness value
|
||||||
|
:param rmse: accuracy fitness value
|
||||||
|
:return: the genotype, a dictionary with all hyperparameters
|
||||||
|
'''
|
||||||
|
ind = dict(mf=mf, npart=npart, partitioner=partitioner, order=order,
|
||||||
|
alpha=alpha, lags=lags, len_lags=len_lags, rmse=rmse)
|
||||||
return ind
|
return ind
|
||||||
|
|
||||||
|
|
||||||
# Gera indivíduos
|
|
||||||
def random_genotype():
|
def random_genotype():
|
||||||
|
'''
|
||||||
|
Create random genotype
|
||||||
|
|
||||||
|
:return: the genotype, a dictionary with all hyperparameters
|
||||||
|
'''
|
||||||
order = random.randint(1, 3)
|
order = random.randint(1, 3)
|
||||||
return genotype(
|
return genotype(
|
||||||
random.randint(1, 4),
|
random.randint(1, 4),
|
||||||
@ -32,21 +49,34 @@ def random_genotype():
|
|||||||
order,
|
order,
|
||||||
random.uniform(0, .5),
|
random.uniform(0, .5),
|
||||||
sorted(random.sample(range(1, 50), order)),
|
sorted(random.sample(range(1, 50), order)),
|
||||||
[],
|
None,
|
||||||
[]
|
None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Gera uma população de tamanho n
|
#
|
||||||
def initial_population(n):
|
def initial_population(n):
|
||||||
|
'''
|
||||||
|
Create a random population of size n
|
||||||
|
|
||||||
|
:param n: the size of the population
|
||||||
|
:return: a list with n random individuals
|
||||||
|
'''
|
||||||
pop = []
|
pop = []
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
pop.append(random_genotype())
|
pop.append(random_genotype())
|
||||||
return pop
|
return pop
|
||||||
|
|
||||||
|
|
||||||
# Função de avaliação
|
def phenotype(individual, train, parameters={}):
|
||||||
def phenotype(individual, train):
|
'''
|
||||||
|
Instantiate the genotype, creating a fitted model with the genotype hyperparameters
|
||||||
|
|
||||||
|
:param individual: a genotype
|
||||||
|
:param train: the training dataset
|
||||||
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
|
:return: a fitted FTS model
|
||||||
|
'''
|
||||||
try:
|
try:
|
||||||
if individual['mf'] == 1:
|
if individual['mf'] == 1:
|
||||||
mf = Membership.trimf
|
mf = Membership.trimf
|
||||||
@ -67,28 +97,48 @@ def phenotype(individual, train):
|
|||||||
alpha_cut=individual['alpha'],
|
alpha_cut=individual['alpha'],
|
||||||
order=individual['order'])
|
order=individual['order'])
|
||||||
|
|
||||||
model.fit(train)
|
model.fit(train, **parameters)
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("EXCEPTION!", str(ex), str(individual))
|
print("PHENOTYPE EXCEPTION!", str(ex), str(individual))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def evaluation1(dataset, individual):
|
def evaluate(dataset, individual, **kwargs):
|
||||||
|
'''
|
||||||
|
Evaluate an individual using a sliding window cross validation over the dataset.
|
||||||
|
|
||||||
|
:param dataset: Evaluation dataset
|
||||||
|
:param individual: genotype to be tested
|
||||||
|
:param window_size: The length of scrolling window for train/test on dataset
|
||||||
|
:param train_rate: The train/test split ([0,1])
|
||||||
|
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||||
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
|
:return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
|
||||||
|
'''
|
||||||
from pyFTS.common import Util
|
from pyFTS.common import Util
|
||||||
from pyFTS.benchmarks import Measures
|
from pyFTS.benchmarks import Measures
|
||||||
|
|
||||||
|
window_size = kwargs.get('window_size', 800)
|
||||||
|
train_rate = kwargs.get('train_rate', .8)
|
||||||
|
increment_rate = kwargs.get('increment_rate', .2)
|
||||||
|
parameters = kwargs.get('parameters',{})
|
||||||
|
|
||||||
|
if individual['rmse'] is not None and individual['len_lags'] is not None:
|
||||||
|
return individual['len_lags'], individual['rmse']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = []
|
results = []
|
||||||
lengths = []
|
lengths = []
|
||||||
|
|
||||||
for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25):
|
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
||||||
model = phenotype(individual, train)
|
|
||||||
|
model = phenotype(individual, train, parameters=parameters)
|
||||||
|
|
||||||
if model is None:
|
if model is None:
|
||||||
return (None)
|
raise Exception("Phenotype returned None")
|
||||||
|
|
||||||
rmse, _, _ = Measures.get_point_statistics(test, model)
|
rmse, _, _ = Measures.get_point_statistics(test, model)
|
||||||
lengths.append(len(model))
|
lengths.append(len(model))
|
||||||
@ -100,36 +150,59 @@ def evaluation1(dataset, individual):
|
|||||||
rmse = np.nansum([.6 * np.nanmean(results), .4 * np.nanstd(results)])
|
rmse = np.nansum([.6 * np.nanmean(results), .4 * np.nanstd(results)])
|
||||||
len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])
|
len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])
|
||||||
|
|
||||||
|
#print("EVALUATION {}".format(individual))
|
||||||
return len_lags, rmse
|
return len_lags, rmse
|
||||||
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("EXCEPTION!", str(ex), str(individual))
|
print("EVALUATION EXCEPTION!", str(ex), str(individual))
|
||||||
return np.inf
|
return np.inf, np.inf
|
||||||
|
|
||||||
|
|
||||||
def tournament(population, objective):
|
def tournament(population, objective):
|
||||||
|
'''
|
||||||
|
Simple tournament selection strategy.
|
||||||
|
|
||||||
|
:param population: the population
|
||||||
|
:param objective: the objective to be considered on tournament
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
n = len(population) - 1
|
n = len(population) - 1
|
||||||
|
|
||||||
r1 = random.randint(0, n) if n > 2 else 0
|
try:
|
||||||
r2 = random.randint(0, n) if n > 2 else 1
|
r1 = random.randint(0, n) if n > 2 else 0
|
||||||
ix = r1 if population[r1][objective] < population[r2][objective] else r2
|
r2 = random.randint(0, n) if n > 2 else 1
|
||||||
return population[ix]
|
ix = r1 if population[r1][objective] < population[r2][objective] else r2
|
||||||
|
return population[ix]
|
||||||
|
except Exception as ex:
|
||||||
|
print(r1, population[r1])
|
||||||
|
print(r2, population[r2])
|
||||||
|
raise ex
|
||||||
|
|
||||||
|
|
||||||
def selection1(population):
|
def double_tournament(population):
|
||||||
pais = []
|
'''
|
||||||
prob = .8
|
Double tournament selection strategy.
|
||||||
|
|
||||||
# for i in range(len(population)):
|
:param population:
|
||||||
pai1 = tournament(population, 'rmse')
|
:return:
|
||||||
pai2 = tournament(population, 'rmse')
|
'''
|
||||||
|
|
||||||
finalista = tournament([pai1, pai2], 'len_lags')
|
ancestor1 = tournament(population, 'rmse')
|
||||||
|
ancestor2 = tournament(population, 'rmse')
|
||||||
|
|
||||||
return finalista
|
selected = tournament([ancestor1, ancestor2], 'len_lags')
|
||||||
|
|
||||||
|
return selected
|
||||||
|
|
||||||
|
|
||||||
def lag_crossover2(best, worst):
|
def lag_crossover2(best, worst):
|
||||||
|
'''
|
||||||
|
Cross over two lag genes
|
||||||
|
|
||||||
|
:param best: best genotype
|
||||||
|
:param worst: worst genotype
|
||||||
|
:return: a tuple (order, lags)
|
||||||
|
'''
|
||||||
order = int(round(.7 * best['order'] + .3 * worst['order']))
|
order = int(round(.7 * best['order'] + .3 * worst['order']))
|
||||||
lags = []
|
lags = []
|
||||||
|
|
||||||
@ -151,15 +224,26 @@ def lag_crossover2(best, worst):
|
|||||||
|
|
||||||
|
|
||||||
# Cruzamento
|
# Cruzamento
|
||||||
def crossover(pais):
|
def crossover(parents):
|
||||||
|
'''
|
||||||
|
Crossover operation between two parents
|
||||||
|
|
||||||
|
:param parents: a list with two genotypes
|
||||||
|
:return: a genotype
|
||||||
|
'''
|
||||||
import random
|
import random
|
||||||
|
|
||||||
if pais[0]['rmse'] < pais[1]['rmse']:
|
n = len(parents) - 1
|
||||||
best = pais[0]
|
|
||||||
worst = pais[1]
|
r1 = random.randint(0, n)
|
||||||
|
r2 = random.randint(0, n)
|
||||||
|
|
||||||
|
if parents[r1]['rmse'] < parents[r2]['rmse']:
|
||||||
|
best = parents[r1]
|
||||||
|
worst = parents[r2]
|
||||||
else:
|
else:
|
||||||
best = pais[1]
|
best = parents[r2]
|
||||||
worst = pais[0]
|
worst = parents[r1]
|
||||||
|
|
||||||
npart = int(round(.7 * best['npart'] + .3 * worst['npart']))
|
npart = int(round(.7 * best['npart'] + .3 * worst['npart']))
|
||||||
alpha = float(.7 * best['alpha'] + .3 * worst['alpha'])
|
alpha = float(.7 * best['alpha'] + .3 * worst['alpha'])
|
||||||
@ -172,119 +256,197 @@ def crossover(pais):
|
|||||||
|
|
||||||
order, lags = lag_crossover2(best, worst)
|
order, lags = lag_crossover2(best, worst)
|
||||||
|
|
||||||
rmse = []
|
descendent = genotype(mf, npart, partitioner, order, alpha, lags, None, None)
|
||||||
len_lags = []
|
|
||||||
|
|
||||||
filho = genotype(mf, npart, partitioner, order, alpha, lags, len_lags, rmse)
|
return descendent
|
||||||
|
|
||||||
return filho
|
|
||||||
|
|
||||||
|
|
||||||
# Mutação | p é a probabilidade de mutação
|
|
||||||
|
|
||||||
def mutation_lags(lags, order):
|
def mutation_lags(lags, order):
|
||||||
new = sorted(random.sample(range(1, 50), order))
|
'''
|
||||||
for lag in np.arange(len(lags) - 1):
|
Mutation operation for lags gene
|
||||||
new[lag] = min(50, max(1, int(lags[lag] + np.random.normal(0, 0.5))))
|
|
||||||
|
|
||||||
if order > 1:
|
:param lags:
|
||||||
for k in np.arange(1, order):
|
:param order:
|
||||||
while new[k] <= new[k - 1]:
|
:return:
|
||||||
new[k] = int(new[k] + np.random.randint(1, 5))
|
'''
|
||||||
|
try:
|
||||||
|
l = len(lags)
|
||||||
|
new = []
|
||||||
|
for lag in np.arange(order):
|
||||||
|
if lag < l:
|
||||||
|
new.append( min(50, max(1, int(lags[lag] + np.random.randint(-5, 5)))) )
|
||||||
|
else:
|
||||||
|
new.append( new[-1] + np.random.randint(1, 5) )
|
||||||
|
|
||||||
return new
|
if order > 1:
|
||||||
|
for k in np.arange(1, order):
|
||||||
|
while new[k] <= new[k - 1]:
|
||||||
|
new[k] = int(new[k] + np.random.randint(1, 5))
|
||||||
|
|
||||||
|
return new
|
||||||
|
except Exception as ex:
|
||||||
|
print(lags, order, new, lag)
|
||||||
|
|
||||||
|
|
||||||
def mutation(individual):
|
def mutation(individual, pmut):
|
||||||
|
'''
|
||||||
|
Mutation operator
|
||||||
|
|
||||||
|
:param population:
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
import numpy.random
|
import numpy.random
|
||||||
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 2))))
|
|
||||||
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .1)))
|
rnd = random.uniform(0, 1)
|
||||||
individual['mf'] = random.randint(1, 2)
|
|
||||||
individual['partitioner'] = random.randint(1, 2)
|
if rnd < pmut:
|
||||||
individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 0.5))))
|
|
||||||
# Chama a função mutation_lags
|
print('mutation')
|
||||||
individual['lags'] = mutation_lags( individual['lags'], individual['order'])
|
|
||||||
#individual['lags'] = sorted(random.sample(range(1, 50), individual['order']))
|
individual['npart'] = min(50, max(3, int(individual['npart'] + np.random.normal(0, 4))))
|
||||||
|
individual['alpha'] = min(.5, max(0, individual['alpha'] + np.random.normal(0, .5)))
|
||||||
|
individual['mf'] = random.randint(1, 2)
|
||||||
|
individual['partitioner'] = random.randint(1, 2)
|
||||||
|
individual['order'] = min(5, max(1, int(individual['order'] + np.random.normal(0, 1))))
|
||||||
|
# Chama a função mutation_lags
|
||||||
|
individual['lags'] = mutation_lags( individual['lags'], individual['order'])
|
||||||
|
|
||||||
|
individual['rmse'] = None
|
||||||
|
individual['len_lags'] = None
|
||||||
|
|
||||||
return individual
|
return individual
|
||||||
|
|
||||||
|
|
||||||
# Elitismo
|
|
||||||
def elitism(population, new_population):
|
def elitism(population, new_population):
|
||||||
# Pega melhor indivíduo da população corrente
|
'''
|
||||||
|
Elitism operation, always select the best individual of the population and discard the worst
|
||||||
|
|
||||||
|
:param population:
|
||||||
|
:param new_population:
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
population = sorted(population, key=itemgetter('rmse'))
|
population = sorted(population, key=itemgetter('rmse'))
|
||||||
best = population[0]
|
best = population[0]
|
||||||
|
|
||||||
# Ordena a nova população e insere o melhor1 no lugar do pior
|
|
||||||
new_population = sorted(new_population, key=itemgetter('rmse'))
|
|
||||||
new_population[-1] = best
|
|
||||||
|
|
||||||
# Ordena novamente e pega o melhor
|
|
||||||
new_population = sorted(new_population, key=itemgetter('rmse'))
|
new_population = sorted(new_population, key=itemgetter('rmse'))
|
||||||
|
if new_population[0]["rmse"] > best["rmse"]:
|
||||||
|
new_population.insert(0,best)
|
||||||
|
|
||||||
return new_population
|
return new_population
|
||||||
|
|
||||||
|
|
||||||
def genetico(dataset, ngen, npop, pcruz, pmut, option=1):
|
def GeneticAlgorithm(dataset, **kwargs):
|
||||||
new_populacao = populacao_nova = []
|
'''
|
||||||
# Gerar população inicial
|
Genetic algoritm for hyperparameter optimization
|
||||||
populacao = initial_population(npop)
|
|
||||||
|
|
||||||
# Avaliar população inicial
|
:param dataset:
|
||||||
result = [evaluation1(dataset, k) for k in populacao]
|
:param ngen: Max number of generations
|
||||||
|
:param mgen: Max number of generations without improvement
|
||||||
|
:param npop: Population size
|
||||||
|
:param pcruz: Probability of crossover
|
||||||
|
:param pmut: Probability of mutation
|
||||||
|
:param window_size: The length of scrolling window for train/test on dataset
|
||||||
|
:param train_rate: The train/test split ([0,1])
|
||||||
|
:param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
|
||||||
|
:param parameters: dict with model specific arguments for fit method.
|
||||||
|
:return: the best genotype
|
||||||
|
'''
|
||||||
|
|
||||||
for i in range(npop):
|
statistics = []
|
||||||
if option == 1:
|
|
||||||
populacao[i]['len_lags'], populacao[i]['rmse'] = result[i]
|
ngen = kwargs.get('ngen',30)
|
||||||
else:
|
mgen = kwargs.get('mgen', 7)
|
||||||
populacao[i]['rmse'] = result[i]
|
npop = kwargs.get('npop',20)
|
||||||
|
pcruz = kwargs.get('pcruz',.5)
|
||||||
|
pmut = kwargs.get('pmut',.3)
|
||||||
|
|
||||||
|
collect_statistics = kwargs.get('collect_statistics', False)
|
||||||
|
|
||||||
|
no_improvement_count = 0
|
||||||
|
|
||||||
|
new_population = []
|
||||||
|
|
||||||
|
population = initial_population(npop)
|
||||||
|
|
||||||
|
last_best = population[0]
|
||||||
|
best = population[1]
|
||||||
|
|
||||||
|
for individual in population:
|
||||||
|
individual['len_lags'], individual['rmse'] = evaluate(dataset, individual, **kwargs)
|
||||||
|
|
||||||
# Gerações
|
|
||||||
for i in range(ngen):
|
for i in range(ngen):
|
||||||
# Iteração para gerar a nova população
|
print("GENERATION {}".format(i))
|
||||||
|
|
||||||
|
generation_statistics = {}
|
||||||
|
|
||||||
|
# Selection
|
||||||
for j in range(int(npop / 2)):
|
for j in range(int(npop / 2)):
|
||||||
# Selecao de pais
|
new_population.append(double_tournament(population))
|
||||||
pais = []
|
new_population.append(double_tournament(population))
|
||||||
pais.append(selection1(populacao))
|
|
||||||
pais.append(selection1(populacao))
|
|
||||||
|
|
||||||
# Cruzamento com probabilidade pcruz
|
# Crossover
|
||||||
rnd = random.uniform(0, 1)
|
new = []
|
||||||
filho1 = crossover(pais) if pcruz > rnd else pais[0]
|
for j in range(int(npop * pcruz)):
|
||||||
rnd = random.uniform(0, 1)
|
new.append(crossover(new_population))
|
||||||
filho2 = crossover(pais) if pcruz > rnd else pais[1]
|
new_population.extend(new)
|
||||||
|
|
||||||
# Mutação com probabilidade pmut
|
# Mutation
|
||||||
rnd = random.uniform(0, 1)
|
for ct, individual in enumerate(new_population):
|
||||||
filho11 = mutation(filho1) if pmut > rnd else filho1
|
new_population[ct] = mutation(individual, pmut)
|
||||||
rnd = random.uniform(0, 1)
|
|
||||||
filho22 = mutation(filho2) if pmut > rnd else filho2
|
|
||||||
|
|
||||||
# Insere filhos na nova população
|
# Evaluation
|
||||||
new_populacao.append(filho11)
|
_f1 = _f2 = []
|
||||||
new_populacao.append(filho22)
|
for individual in new_population:
|
||||||
|
f1, f2 = evaluate(dataset, individual, **kwargs)
|
||||||
|
individual['len_lags'], individual['rmse'] = f1, f2
|
||||||
|
if collect_statistics:
|
||||||
|
_f1.append(f1)
|
||||||
|
_f2.append(f2)
|
||||||
|
#print('eval {}'.format(individual))
|
||||||
|
|
||||||
result = [evaluation1(dataset, k) for k in new_populacao]
|
if collect_statistics:
|
||||||
|
generation_statistics['population'] = {'f1': np.nanmedian(_f1), 'f2': np.nanmedian(_f2)}
|
||||||
|
|
||||||
for i in range(len(new_populacao)):
|
# Elitism
|
||||||
new_populacao[i]['len_lags'], new_populacao[i]['rmse'] = result[i]
|
population = elitism(population, new_population)
|
||||||
|
|
||||||
populacao = elitism(populacao, new_populacao)
|
population = population[:npop]
|
||||||
|
|
||||||
new_populacao = []
|
new_population = []
|
||||||
|
|
||||||
melhorT = sorted(populacao, key=lambda item: item['rmse'])[0]
|
last_best = best
|
||||||
|
|
||||||
return melhorT
|
best = population[0]
|
||||||
|
|
||||||
|
if collect_statistics:
|
||||||
|
generation_statistics['best'] = {'f1': best["len_lags"], 'f2': best["rmse"]}
|
||||||
|
|
||||||
|
statistics.append(generation_statistics)
|
||||||
|
|
||||||
|
if last_best['rmse'] <= best['rmse'] and last_best['len_lags'] <= best['len_lags']:
|
||||||
|
no_improvement_count += 1
|
||||||
|
#print("WITHOUT IMPROVEMENT {}".format(no_improvement_count))
|
||||||
|
pmut += .05
|
||||||
|
else:
|
||||||
|
no_improvement_count = 0
|
||||||
|
pcruz = kwargs.get('pcruz', .5)
|
||||||
|
pmut = kwargs.get('pmut', .3)
|
||||||
|
#print(best)
|
||||||
|
|
||||||
|
if no_improvement_count == mgen:
|
||||||
|
break
|
||||||
|
|
||||||
|
if collect_statistics:
|
||||||
|
return best, generation_statistics
|
||||||
|
else:
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
def cluster_method(dataset, ngen, npop, pcruz, pmut, option=1):
|
def cluster_method(dataset, **kwargs):
|
||||||
print(ngen, npop, pcruz, pmut, option)
|
from pyFTS.hyperparam.Evolutionary import GeneticAlgorithm
|
||||||
|
|
||||||
from pyFTS.hyperparam.Evolutionary import genetico
|
|
||||||
|
|
||||||
inicio = time.time()
|
inicio = time.time()
|
||||||
ret = genetico(dataset, ngen, npop, pcruz, pmut, option)
|
ret = GeneticAlgorithm(dataset, **kwargs)
|
||||||
fim = time.time()
|
fim = time.time()
|
||||||
ret['time'] = fim - inicio
|
ret['time'] = fim - inicio
|
||||||
ret['size'] = ret['len_lags']
|
ret['size'] = ret['len_lags']
|
||||||
@ -297,16 +459,7 @@ def process_jobs(jobs, datasetname, conn):
|
|||||||
if job.status == dispy.DispyJob.Finished and result is not None:
|
if job.status == dispy.DispyJob.Finished and result is not None:
|
||||||
print("Processing result of {}".format(result))
|
print("Processing result of {}".format(result))
|
||||||
|
|
||||||
metrics = ['rmse', 'size', 'time']
|
log_result(conn, datasetname, result)
|
||||||
|
|
||||||
for metric in metrics:
|
|
||||||
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
|
||||||
result['order'], result['partitioner'], result['npart'],
|
|
||||||
result['alpha'], str(result['lags']), metric, result[metric])
|
|
||||||
|
|
||||||
print(record)
|
|
||||||
|
|
||||||
hUtil.insert_hyperparam(record, conn)
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -314,25 +467,47 @@ def process_jobs(jobs, datasetname, conn):
|
|||||||
print(job.stdout)
|
print(job.stdout)
|
||||||
|
|
||||||
|
|
||||||
def execute(datasetname, dataset, **kwargs):
|
def log_result(conn, datasetname, result):
|
||||||
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
metrics = ['rmse', 'size', 'time']
|
||||||
|
for metric in metrics:
|
||||||
|
record = (datasetname, 'Evolutive', 'WHOFTS', None, result['mf'],
|
||||||
|
result['order'], result['partitioner'], result['npart'],
|
||||||
|
result['alpha'], str(result['lags']), metric, result[metric])
|
||||||
|
|
||||||
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
|
print(record)
|
||||||
|
|
||||||
|
hUtil.insert_hyperparam(record, conn)
|
||||||
|
|
||||||
|
|
||||||
|
def execute(datasetname, dataset, **kwargs):
|
||||||
conn = hUtil.open_hyperparam_db('hyperparam.db')
|
conn = hUtil.open_hyperparam_db('hyperparam.db')
|
||||||
|
|
||||||
ngen = kwargs.get('ngen', 70)
|
distributed = kwargs.get('distributed', False)
|
||||||
npop = kwargs.get('npop', 20)
|
|
||||||
pcruz = kwargs.get('pcruz', .8)
|
|
||||||
pmut = kwargs.get('pmut', .2)
|
|
||||||
option = kwargs.get('option', 1)
|
|
||||||
|
|
||||||
jobs = []
|
experiments = kwargs.get('experiments', 30)
|
||||||
|
|
||||||
for i in range(kwargs.get('experiments', 30)):
|
if not distributed:
|
||||||
print("Experiment {}".format(i))
|
ret = []
|
||||||
job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
|
for i in range(experiments):
|
||||||
jobs.append(job)
|
result = cluster_method(dataset, **kwargs)
|
||||||
|
log_result(conn, datasetname, result)
|
||||||
|
ret.append(result)
|
||||||
|
|
||||||
process_jobs(jobs, datasetname, conn)
|
return result
|
||||||
|
|
||||||
Util.stop_dispy_cluster(cluster, http_server)
|
elif distributed=='dispy':
|
||||||
|
nodes = kwargs.get('nodes', ['127.0.0.1'])
|
||||||
|
|
||||||
|
cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
|
||||||
|
|
||||||
|
|
||||||
|
jobs = []
|
||||||
|
|
||||||
|
for i in range(experiments):
|
||||||
|
print("Experiment {}".format(i))
|
||||||
|
job = cluster.submit(dataset, **kwargs)
|
||||||
|
jobs.append(job)
|
||||||
|
|
||||||
|
process_jobs(jobs, datasetname, conn)
|
||||||
|
|
||||||
|
Util.stop_dispy_cluster(cluster, http_server)
|
||||||
|
@ -90,7 +90,6 @@ class HighOrderFTS(fts.FTS):
|
|||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
self.min_order = 1
|
self.min_order = 1
|
||||||
self.order= kwargs.get("order", self.min_order)
|
self.order= kwargs.get("order", self.min_order)
|
||||||
self.lags = kwargs.get("lags", None)
|
|
||||||
self.configure_lags(**kwargs)
|
self.configure_lags(**kwargs)
|
||||||
|
|
||||||
def configure_lags(self, **kwargs):
|
def configure_lags(self, **kwargs):
|
||||||
|
@ -19,7 +19,7 @@ class FLR(object):
|
|||||||
self.RHS = set
|
self.RHS = set
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str([self.LHS[k] for k in self.LHS.keys()]) + " -> " + self.RHS
|
return "{} -> {}".format([self.LHS[k] for k in self.LHS.keys()], self.RHS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,13 +13,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(ClusteredMVFTS, self).__init__(**kwargs)
|
super(ClusteredMVFTS, self).__init__(**kwargs)
|
||||||
|
|
||||||
self.cluster_method = kwargs.get('cluster_method', grid.GridCluster)
|
|
||||||
"""The cluster method to be called when a new model is build"""
|
|
||||||
self.cluster_params = kwargs.get('cluster_params', {})
|
|
||||||
"""The cluster method parameters"""
|
|
||||||
self.cluster = kwargs.get('cluster', None)
|
|
||||||
"""The trained clusterer"""
|
|
||||||
|
|
||||||
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||||
"""The FTS method to be called when a new model is build"""
|
"""The FTS method to be called when a new model is build"""
|
||||||
self.fts_params = kwargs.get('fts_params', {})
|
self.fts_params = kwargs.get('fts_params', {})
|
||||||
@ -30,6 +23,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
|
|
||||||
|
self.is_clustered = True
|
||||||
|
|
||||||
self.order = kwargs.get("order", 2)
|
self.order = kwargs.get("order", 2)
|
||||||
self.lags = kwargs.get("lags", None)
|
self.lags = kwargs.get("lags", None)
|
||||||
self.alpha_cut = kwargs.get('alpha_cut', 0.25)
|
self.alpha_cut = kwargs.get('alpha_cut', 0.25)
|
||||||
@ -43,16 +38,13 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
ndata = []
|
ndata = []
|
||||||
for index, row in data.iterrows():
|
for index, row in data.iterrows():
|
||||||
data_point = self.format_data(row)
|
data_point = self.format_data(row)
|
||||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, alpha_cut=self.alpha_cut))
|
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner, alpha_cut=self.alpha_cut))
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
if self.cluster is None:
|
self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
|
||||||
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn, **self.cluster_params)
|
|
||||||
|
|
||||||
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
|
||||||
if self.model.is_high_order:
|
if self.model.is_high_order:
|
||||||
self.model.order = self.order
|
self.model.order = self.order
|
||||||
|
|
||||||
@ -60,7 +52,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
|
||||||
|
|
||||||
self.cluster.prune()
|
self.partitioner.prune()
|
||||||
|
|
||||||
def check_data(self, data):
|
def check_data(self, data):
|
||||||
if self.pre_fuzzyfy:
|
if self.pre_fuzzyfy:
|
||||||
@ -84,8 +76,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
for var in self.explanatory_variables:
|
for var in self.explanatory_variables:
|
||||||
if self.target_variable.name != var.name:
|
if self.target_variable.name != var.name:
|
||||||
self.target_variable = var
|
self.target_variable = var
|
||||||
self.cluster.change_target_variable(var)
|
self.partitioner.change_target_variable(var)
|
||||||
self.model.partitioner = self.cluster
|
self.model.partitioner = self.partitioner
|
||||||
self.model.reset_calculated_values()
|
self.model.reset_calculated_values()
|
||||||
|
|
||||||
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
ret[var.name] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
|
||||||
|
@ -7,12 +7,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
|||||||
"""
|
"""
|
||||||
Multivariate Composite Fuzzy Set
|
Multivariate Composite Fuzzy Set
|
||||||
"""
|
"""
|
||||||
def __init__(self, name, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
"""
|
"""
|
||||||
Create an empty composite fuzzy set
|
Create an empty composite fuzzy set
|
||||||
:param name: fuzzy set name
|
:param name: fuzzy set name
|
||||||
"""
|
"""
|
||||||
super(MultivariateFuzzySet, self).__init__(name)
|
super(MultivariateFuzzySet, self).__init__("")
|
||||||
self.sets = {}
|
self.sets = {}
|
||||||
self.target_variable = kwargs.get('target_variable',None)
|
self.target_variable = kwargs.get('target_variable',None)
|
||||||
|
|
||||||
@ -28,10 +28,10 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
|||||||
if variable == self.target_variable.name:
|
if variable == self.target_variable.name:
|
||||||
self.centroid = set.centroid
|
self.centroid = set.centroid
|
||||||
|
|
||||||
|
self.name += set.name
|
||||||
|
|
||||||
def set_target_variable(self, variable):
|
def set_target_variable(self, variable):
|
||||||
#print(self.target_variable, variable)
|
|
||||||
self.target_variable = variable
|
self.target_variable = variable
|
||||||
#print(self.centroid,self.sets[variable.name].centroid)
|
|
||||||
self.centroid = self.sets[variable.name].centroid
|
self.centroid = self.sets[variable.name].centroid
|
||||||
|
|
||||||
def membership(self, x):
|
def membership(self, x):
|
||||||
@ -42,7 +42,6 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
|||||||
return np.nanmin(mv)
|
return np.nanmin(mv)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fuzzyfy_instance(data_point, var):
|
def fuzzyfy_instance(data_point, var):
|
||||||
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
||||||
return [(var.name, fs) for fs in fsets]
|
return [(var.name, fs) for fs in fsets]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from pyFTS.partitioners import partitioner
|
from pyFTS.models.multivariate import partitioner
|
||||||
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
||||||
from itertools import product
|
from itertools import product
|
||||||
from scipy.spatial import KDTree
|
from scipy.spatial import KDTree
|
||||||
@ -6,106 +6,28 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class GridCluster(partitioner.Partitioner):
|
class GridCluster(partitioner.MultivariatePartitioner):
|
||||||
"""
|
"""
|
||||||
A cartesian product of all fuzzy sets of all variables
|
A cartesian product of all fuzzy sets of all variables
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(GridCluster, self).__init__(name="GridCluster", preprocess=False, **kwargs)
|
super(GridCluster, self).__init__(**kwargs)
|
||||||
|
self.name="GridCluster"
|
||||||
self.mvfts = kwargs.get('mvfts', None)
|
self.build(None)
|
||||||
self.sets = {}
|
|
||||||
self.kdtree = None
|
|
||||||
self.index = {}
|
|
||||||
self.neighbors = kwargs.get('neighbors', 2)
|
|
||||||
self.optmize = kwargs.get('optmize', False)
|
|
||||||
if self.optmize:
|
|
||||||
self.count = {}
|
|
||||||
data = kwargs.get('data', [None])
|
|
||||||
self.build(data)
|
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
|
|
||||||
fsets = [[x for x in k.partitioner.sets.values()]
|
fsets = [[x for x in k.partitioner.sets.values()]
|
||||||
for k in self.mvfts.explanatory_variables]
|
for k in self.explanatory_variables]
|
||||||
|
|
||||||
midpoints = []
|
|
||||||
|
|
||||||
c = 0
|
c = 0
|
||||||
for k in product(*fsets):
|
for k in product(*fsets):
|
||||||
#key = self.prefix+str(c)
|
mvfset = MultivariateFuzzySet(target_variable=self.target_variable)
|
||||||
mvfset = MultivariateFuzzySet(name="", target_variable=self.mvfts.target_variable)
|
|
||||||
mp = []
|
|
||||||
_key = ""
|
|
||||||
for fset in k:
|
for fset in k:
|
||||||
mvfset.append_set(fset.variable, fset)
|
mvfset.append_set(fset.variable, fset)
|
||||||
mp.append(fset.centroid)
|
|
||||||
_key += fset.name
|
self.sets[mvfset.name] = mvfset
|
||||||
mvfset.name = _key
|
|
||||||
self.sets[_key] = mvfset
|
|
||||||
midpoints.append(mp)
|
|
||||||
self.index[c] = _key
|
|
||||||
c += 1
|
c += 1
|
||||||
|
|
||||||
import sys
|
self.build_index()
|
||||||
sys.setrecursionlimit(100000)
|
|
||||||
|
|
||||||
self.kdtree = KDTree(midpoints)
|
|
||||||
|
|
||||||
sys.setrecursionlimit(1000)
|
|
||||||
|
|
||||||
def prune(self):
|
|
||||||
|
|
||||||
if not self.optmize:
|
|
||||||
return
|
|
||||||
|
|
||||||
for fset in [fs for fs in self.sets.keys()]:
|
|
||||||
if fset not in self.count:
|
|
||||||
fs = self.sets.pop(fset)
|
|
||||||
del (fs)
|
|
||||||
|
|
||||||
|
|
||||||
vars = [k.name for k in self.mvfts.explanatory_variables]
|
|
||||||
|
|
||||||
midpoints = []
|
|
||||||
|
|
||||||
self.index = {}
|
|
||||||
|
|
||||||
for ct, fset in enumerate(self.sets.values()):
|
|
||||||
mp = []
|
|
||||||
for vr in vars:
|
|
||||||
mp.append(fset.sets[vr].centroid)
|
|
||||||
midpoints.append(mp)
|
|
||||||
self.index[ct] = fset.name
|
|
||||||
|
|
||||||
import sys
|
|
||||||
sys.setrecursionlimit(100000)
|
|
||||||
|
|
||||||
self.kdtree = KDTree(midpoints)
|
|
||||||
|
|
||||||
sys.setrecursionlimit(1000)
|
|
||||||
|
|
||||||
def knn(self, data):
|
|
||||||
tmp = [data[k.name]
|
|
||||||
for k in self.mvfts.explanatory_variables]
|
|
||||||
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
|
||||||
|
|
||||||
if not isinstance(ix, (list, np.ndarray)):
|
|
||||||
ix = [ix]
|
|
||||||
|
|
||||||
if self.optmize:
|
|
||||||
tmp = []
|
|
||||||
for k in ix:
|
|
||||||
tmp.append(self.index[k])
|
|
||||||
self.count[self.index[k]] = 1
|
|
||||||
return tmp
|
|
||||||
else:
|
|
||||||
return [self.index[k] for k in ix]
|
|
||||||
|
|
||||||
def fuzzyfy(self, data, **kwargs):
|
|
||||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
|
||||||
|
|
||||||
def change_target_variable(self, variable):
|
|
||||||
for fset in self.sets:
|
|
||||||
self.sets[fset].set_target_variable(variable)
|
|
||||||
|
@ -12,8 +12,8 @@ class MVFTS(fts.FTS):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(MVFTS, self).__init__(**kwargs)
|
super(MVFTS, self).__init__(**kwargs)
|
||||||
self.explanatory_variables = []
|
self.explanatory_variables = kwargs.get('explanatory_variables',[])
|
||||||
self.target_variable = None
|
self.target_variable = kwargs.get('target_variable',None)
|
||||||
self.flrgs = {}
|
self.flrgs = {}
|
||||||
self.is_multivariate = True
|
self.is_multivariate = True
|
||||||
self.shortname = "MVFTS"
|
self.shortname = "MVFTS"
|
||||||
|
90
pyFTS/models/multivariate/partitioner.py
Normal file
90
pyFTS/models/multivariate/partitioner.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
from pyFTS.partitioners import partitioner
|
||||||
|
from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
|
||||||
|
from itertools import product
|
||||||
|
from scipy.spatial import KDTree
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class MultivariatePartitioner(partitioner.Partitioner):
|
||||||
|
"""
|
||||||
|
Base class for partitioners which use the MultivariateFuzzySet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(MultivariatePartitioner, self).__init__(name="MultivariatePartitioner", preprocess=False, **kwargs)
|
||||||
|
|
||||||
|
self.type = 'multivariate'
|
||||||
|
self.sets = {}
|
||||||
|
self.kdtree = None
|
||||||
|
self.index = {}
|
||||||
|
self.explanatory_variables = kwargs.get('explanatory_variables', [])
|
||||||
|
self.target_variable = kwargs.get('target_variable', None)
|
||||||
|
self.neighbors = kwargs.get('neighbors', 2)
|
||||||
|
self.optimize = kwargs.get('optimize', True)
|
||||||
|
if self.optimize:
|
||||||
|
self.count = {}
|
||||||
|
data = kwargs.get('data', None)
|
||||||
|
self.build(data)
|
||||||
|
|
||||||
|
def build(self, data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def append(self, fset):
|
||||||
|
self.sets[fset.name] = fset
|
||||||
|
|
||||||
|
def prune(self):
|
||||||
|
|
||||||
|
if not self.optimize:
|
||||||
|
return
|
||||||
|
|
||||||
|
for fset in [fs for fs in self.sets.keys()]:
|
||||||
|
if fset not in self.count:
|
||||||
|
fs = self.sets.pop(fset)
|
||||||
|
del (fs)
|
||||||
|
|
||||||
|
self.build_index()
|
||||||
|
|
||||||
|
def knn(self, data):
|
||||||
|
tmp = [data[k.name]
|
||||||
|
for k in self.explanatory_variables]
|
||||||
|
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
||||||
|
|
||||||
|
if not isinstance(ix, (list, np.ndarray)):
|
||||||
|
ix = [ix]
|
||||||
|
|
||||||
|
if self.optimize:
|
||||||
|
tmp = []
|
||||||
|
for k in ix:
|
||||||
|
tmp.append(self.index[k])
|
||||||
|
self.count[self.index[k]] = 1
|
||||||
|
return tmp
|
||||||
|
else:
|
||||||
|
return [self.index[k] for k in ix]
|
||||||
|
|
||||||
|
def fuzzyfy(self, data, **kwargs):
|
||||||
|
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||||
|
|
||||||
|
def change_target_variable(self, variable):
|
||||||
|
for fset in self.sets.values():
|
||||||
|
fset.set_target_variable(variable)
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
|
||||||
|
midpoints = []
|
||||||
|
|
||||||
|
self.index = {}
|
||||||
|
|
||||||
|
for ct, fset in enumerate(self.sets.values()):
|
||||||
|
mp = []
|
||||||
|
for vr in self.explanatory_variables:
|
||||||
|
mp.append(fset.sets[vr.name].centroid)
|
||||||
|
midpoints.append(mp)
|
||||||
|
self.index[ct] = fset.name
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.setrecursionlimit(100000)
|
||||||
|
|
||||||
|
self.kdtree = KDTree(midpoints)
|
||||||
|
|
||||||
|
sys.setrecursionlimit(1000)
|
@ -1,3 +1,4 @@
|
|||||||
|
import pandas as pd
|
||||||
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
|
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.models.multivariate import FLR as MVFLR
|
from pyFTS.models.multivariate import FLR as MVFLR
|
||||||
@ -24,6 +25,10 @@ class Variable:
|
|||||||
self.data_label = kwargs.get('data_label', self.name)
|
self.data_label = kwargs.get('data_label', self.name)
|
||||||
"""A string with the column name on DataFrame"""
|
"""A string with the column name on DataFrame"""
|
||||||
self.type = kwargs.get('type', 'common')
|
self.type = kwargs.get('type', 'common')
|
||||||
|
self.data_type = kwargs.get('data_type', None)
|
||||||
|
"""The type of the data column on Pandas Dataframe"""
|
||||||
|
self.mask = kwargs.get('mask', None)
|
||||||
|
"""The mask for format the data column on Pandas Dataframe"""
|
||||||
self.transformation = kwargs.get('transformation', None)
|
self.transformation = kwargs.get('transformation', None)
|
||||||
self.transformation_params = kwargs.get('transformation_params', None)
|
self.transformation_params = kwargs.get('transformation_params', None)
|
||||||
self.partitioner = None
|
self.partitioner = None
|
||||||
|
@ -20,11 +20,12 @@ class WeightedFLRG(mvflrg.FLRG):
|
|||||||
self.w = None
|
self.w = None
|
||||||
|
|
||||||
def append_rhs(self, fset, **kwargs):
|
def append_rhs(self, fset, **kwargs):
|
||||||
|
count = kwargs.get('count', 1.0)
|
||||||
if fset not in self.RHS:
|
if fset not in self.RHS:
|
||||||
self.RHS[fset] = 1.0
|
self.RHS[fset] = count
|
||||||
else:
|
else:
|
||||||
self.RHS[fset] += 1.0
|
self.RHS[fset] += count
|
||||||
self.count += 1.0
|
self.count += count
|
||||||
|
|
||||||
def weights(self):
|
def weights(self):
|
||||||
if self.w is None:
|
if self.w is None:
|
||||||
@ -51,10 +52,6 @@ class WeightedMVFTS(mvfts.MVFTS):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(WeightedMVFTS, self).__init__(order=1, **kwargs)
|
super(WeightedMVFTS, self).__init__(order=1, **kwargs)
|
||||||
self.explanatory_variables = []
|
|
||||||
self.target_variable = None
|
|
||||||
self.flrgs = {}
|
|
||||||
self.is_multivariate = True
|
|
||||||
self.shortname = "WeightedMVFTS"
|
self.shortname = "WeightedMVFTS"
|
||||||
self.name = "Weighted Multivariate FTS"
|
self.name = "Weighted Multivariate FTS"
|
||||||
|
|
||||||
|
@ -21,6 +21,15 @@ class SimplePartitioner(partitioner.Partitioner):
|
|||||||
|
|
||||||
self.partitions = 0
|
self.partitions = 0
|
||||||
|
|
||||||
|
def append_complex(self, fs):
|
||||||
|
self.sets[fs.name] = fs
|
||||||
|
self.partitions += 1
|
||||||
|
|
||||||
|
self.ordered_sets = [key for key in sorted(self.sets.keys(), key=lambda k: self.sets[k].centroid)]
|
||||||
|
|
||||||
|
self.min = self.sets[self.ordered_sets[0]].lower
|
||||||
|
self.max = self.sets[self.ordered_sets[-1]].upper
|
||||||
|
|
||||||
def append(self, name, mf, parameters, **kwargs):
|
def append(self, name, mf, parameters, **kwargs):
|
||||||
"""
|
"""
|
||||||
Append a new partition (fuzzy set) to the partitioner
|
Append a new partition (fuzzy set) to the partitioner
|
||||||
@ -39,7 +48,7 @@ class SimplePartitioner(partitioner.Partitioner):
|
|||||||
if mf is None or mf not in (Membership.trimf, Membership.gaussmf,
|
if mf is None or mf not in (Membership.trimf, Membership.gaussmf,
|
||||||
Membership.trapmf, Membership.singleton,
|
Membership.trapmf, Membership.singleton,
|
||||||
Membership.sigmf):
|
Membership.sigmf):
|
||||||
raise ValueError("The mf parameter should be one of pyFTS.common.Membership functions")
|
raise ValueError("The mf parameter should be one of pyFTS.common.Membership functions, not {}".format(mf))
|
||||||
|
|
||||||
if mf == Membership.trimf:
|
if mf == Membership.trimf:
|
||||||
if len(parameters) != 3:
|
if len(parameters) != 3:
|
||||||
|
@ -2,14 +2,14 @@ import numpy as np
|
|||||||
from pyFTS.hyperparam import GridSearch, Evolutionary
|
from pyFTS.hyperparam import GridSearch, Evolutionary
|
||||||
|
|
||||||
def get_dataset():
|
def get_dataset():
|
||||||
|
#from pyFTS.data import SONDA
|
||||||
from pyFTS.data import Malaysia
|
from pyFTS.data import Malaysia
|
||||||
|
|
||||||
ds = Malaysia.get_data('temperature')[:1000]
|
#data = SONDA.get_data('temperature')[:1000]
|
||||||
# ds = pd.read_csv('Malaysia.csv',delimiter=',' )[['temperature']].values[:2000].flatten().tolist()
|
data = Malaysia.get_data('temperature')[:1000]
|
||||||
#train = ds[:800]
|
|
||||||
#test = ds[800:]
|
|
||||||
|
|
||||||
return 'Malaysia.temperature', ds #train, test
|
#return 'SONDA.glo_avg', data #train, test
|
||||||
|
return 'Malaysia.temperature', data #train, test
|
||||||
|
|
||||||
"""
|
"""
|
||||||
hyperparams = {
|
hyperparams = {
|
||||||
@ -39,4 +39,28 @@ datsetname, dataset = get_dataset()
|
|||||||
|
|
||||||
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
|
#Evolutionary.cluster_method(dataset, 70, 20, .8, .3, 1)
|
||||||
|
|
||||||
Evolutionary.execute(datsetname, dataset, nodes=nodes, ngen=50, npop=30, )
|
'''
|
||||||
|
from pyFTS.models import hofts
|
||||||
|
from pyFTS.partitioners import Grid
|
||||||
|
from pyFTS.benchmarks import Measures
|
||||||
|
|
||||||
|
fs = Grid.GridPartitioner(data=dataset[:800], npart=30)
|
||||||
|
|
||||||
|
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
||||||
|
|
||||||
|
model.fit(dataset[:800])
|
||||||
|
|
||||||
|
model.predict(dataset[800:1000])
|
||||||
|
|
||||||
|
Measures.get_point_statistics(dataset[800:1000], model)
|
||||||
|
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
'''
|
||||||
|
ret = Evolutionary.execute(datsetname, dataset,
|
||||||
|
ngen=30, npop=20, pcruz=.5, pmut=.3,
|
||||||
|
window_size=800, experiments=30)
|
||||||
|
#parameters={'distributed': 'spark', 'url': 'spark://192.168.0.106:7077'})
|
||||||
|
|
||||||
|
print(ret)
|
||||||
|
#'''
|
@ -28,8 +28,6 @@ test_uv = dataset['value'].values[24505:]
|
|||||||
train_mv = dataset.iloc[:24505]
|
train_mv = dataset.iloc[:24505]
|
||||||
test_mv = dataset.iloc[24505:]
|
test_mv = dataset.iloc[24505:]
|
||||||
|
|
||||||
print(train_mv)
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
@ -46,21 +44,16 @@ parameters = [
|
|||||||
{'order':2, 'knn': 3},
|
{'order':2, 'knn': 3},
|
||||||
]
|
]
|
||||||
|
|
||||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
#for ct, method in enumerate([, wmvfts.WeightedMVFTS,
|
||||||
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
# cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
||||||
print(method)
|
model = mvfts.MVFTS()
|
||||||
model = method(**parameters[ct])
|
|
||||||
model.shortname += str(ct)
|
|
||||||
model.append_variable(vhour)
|
|
||||||
model.append_variable(vvalue)
|
|
||||||
model.target_variable = vvalue
|
|
||||||
model.fit(train_mv)
|
|
||||||
|
|
||||||
Util.persist_obj(model, model.shortname)
|
model.append_variable(vhour)
|
||||||
|
model.append_variable(vvalue)
|
||||||
|
model.target_variable = vvalue
|
||||||
|
model.fit(train_mv)
|
||||||
|
|
||||||
forecasts = model.predict(test_mv.iloc[:100])
|
print(model)
|
||||||
|
|
||||||
print(model)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
|
||||||
from pyFTS.data import Enrollments, TAIEX
|
from pyFTS.data import Enrollments, TAIEX, SONDA
|
||||||
from pyFTS.partitioners import Grid, Simple
|
from pyFTS.partitioners import Grid, Simple
|
||||||
from pyFTS.models import hofts
|
from pyFTS.models import hofts
|
||||||
|
|
||||||
@ -12,20 +13,51 @@ import os
|
|||||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||||
|
#'''
|
||||||
data = TAIEX.get_data()
|
data = SONDA.get_data('glo_avg')
|
||||||
|
|
||||||
fs = Grid.GridPartitioner(data=data, npart=50)
|
fs = Grid.GridPartitioner(data=data, npart=50)
|
||||||
|
|
||||||
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
||||||
|
|
||||||
model.fit(data, distributed='spark', url='spark://192.168.0.110:7077')
|
_s1 = time.time()
|
||||||
|
model.fit(data, distributed='spark', url='spark://192.168.0.106:7077')
|
||||||
|
_s2 = time.time()
|
||||||
|
|
||||||
|
print(_s2-_s1)
|
||||||
|
|
||||||
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
|
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
|
||||||
|
'''
|
||||||
|
|
||||||
|
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, grid
|
||||||
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
||||||
|
|
||||||
|
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
train_mv = dataset.iloc[:24505]
|
||||||
|
test_mv = dataset.iloc[24505:]
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||||
|
|
||||||
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
|
data=train_mv, partitioner_specific=sp, data_type=pd.datetime, mask='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=35, data_type=np.float64,
|
||||||
|
data=train_mv)
|
||||||
|
|
||||||
|
fs = grid.GridCluster(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||||
|
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
||||||
|
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue,
|
||||||
|
partitioner=fs)
|
||||||
|
|
||||||
|
model.fit(train_mv, distributed='spark', url='spark://192.168.0.106:7077')
|
||||||
|
#'''
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
def fun(x):
|
def fun(x):
|
||||||
return (x, x % 2)
|
return (x, x % 2)
|
||||||
|
Loading…
Reference in New Issue
Block a user