460 lines
228 KiB
Plaintext
460 lines
228 KiB
Plaintext
|
{
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0,
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"name": "Benchmarks.ipynb",
|
||
|
"version": "0.3.2",
|
||
|
"provenance": [],
|
||
|
"toc_visible": true
|
||
|
},
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
}
|
||
|
},
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "view-in-github",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"source": [
|
||
|
"[View in Colaboratory](https://colab.research.google.com/github/petroniocandido/pyFTS/blob/master/pyFTS/notebooks/GOOGLE%20COLAB%20-%20Benchmarks.ipynb)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "WHN-tefYEJsD",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"# Example of Computational Experiments"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "msOApqBqEJsG",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## For running on Colab"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "tJ_He1u8EJsI",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 315
|
||
|
},
|
||
|
"outputId": "f0c2390a-61e4-4dcf-d412-de6f907472bd"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!pip3 install -U git+https://github.com/petroniocandido/pyFTS\n",
|
||
|
"!git clone https://github.com/petroniocandido/stac"
|
||
|
],
|
||
|
"execution_count": 1,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Collecting git+https://github.com/petroniocandido/pyFTS\r\n",
|
||
|
" Cloning https://github.com/petroniocandido/pyFTS to /tmp/pip-req-build-qz5k03k2\n",
|
||
|
"Building wheels for collected packages: pyFTS\n",
|
||
|
" Running setup.py bdist_wheel for pyFTS ... \u001b[?25l-\b \b\\\b \bdone\n",
|
||
|
"\u001b[?25h Stored in directory: /tmp/pip-ephem-wheel-cache-6e0g6d7u/wheels/84/d7/1e/a333c7128f25b347640740859808db094c4478e98663cd2297\n",
|
||
|
"Successfully built pyFTS\n",
|
||
|
"Installing collected packages: pyFTS\n",
|
||
|
" Found existing installation: pyFTS 1.2.2\n",
|
||
|
" Uninstalling pyFTS-1.2.2:\n",
|
||
|
" Successfully uninstalled pyFTS-1.2.2\n",
|
||
|
"Successfully installed pyFTS-1.2.2\n",
|
||
|
"Cloning into 'stac'...\n",
|
||
|
"remote: Counting objects: 2238, done.\u001b[K\n",
|
||
|
"remote: Total 2238 (delta 0), reused 0 (delta 0), pack-reused 2238\u001b[K\n",
|
||
|
"Receiving objects: 100% (2238/2238), 23.62 MiB | 33.27 MiB/s, done.\n",
|
||
|
"Resolving deltas: 100% (1147/1147), done.\n"
|
||
|
],
|
||
|
"name": "stdout"
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "07JlvCoVEJsT",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## Common Imports"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "nNQThkh6EJsW",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 34
|
||
|
},
|
||
|
"outputId": "1102cb5f-8bbf-4b39-bc03-d507a65d5b4e"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"import warnings\n",
|
||
|
"warnings.filterwarnings('ignore')\n",
|
||
|
"\n",
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pylab as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"\n",
|
||
|
"%pylab inline"
|
||
|
],
|
||
|
"execution_count": 2,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Populating the interactive namespace from numpy and matplotlib\n"
|
||
|
],
|
||
|
"name": "stdout"
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "-gVJot92EJsj",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## Common data transformations"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "eVYEnD-sEJsl",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 17
|
||
|
},
|
||
|
"outputId": "d90cb43a-7de9-4e4f-ea3c-9772d5f49a40"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"from pyFTS.common import Transformations\n",
|
||
|
"\n",
|
||
|
"tdiff = Transformations.Differential(1)\n",
|
||
|
"\n",
|
||
|
"boxcox = Transformations.BoxCox(0)"
|
||
|
],
|
||
|
"execution_count": 3,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "AXXcR7rtEJsq",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## Import Datasets"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "4AdDygckEJss",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 17
|
||
|
},
|
||
|
"outputId": "3a5853c8-3b60-47ef-bc8f-eaafec1b9dbc"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"from pyFTS.data import TAIEX, NASDAQ, SP500\n",
|
||
|
"\n",
|
||
|
"dataset_names = [\"TAIEX\", \"SP500\",\"NASDAQ\"]\n",
|
||
|
"\n",
|
||
|
"def get_dataset(name):\n",
|
||
|
" if dataset_name == \"TAIEX\":\n",
|
||
|
" return TAIEX.get_data()\n",
|
||
|
" elif dataset_name == \"SP500\":\n",
|
||
|
" return SP500.get_data()[11500:16000]\n",
|
||
|
" elif dataset_name == \"NASDAQ\":\n",
|
||
|
" return NASDAQ.get_data()\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"train_split = 2000\n",
|
||
|
"test_length = 200"
|
||
|
],
|
||
|
"execution_count": 4,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "QkMC3pyQEJs0",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 463
|
||
|
},
|
||
|
"outputId": "0eda18dc-fbd3-407f-ab9f-56a30cc6fd40"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"fig, ax = plt.subplots(nrows=3, ncols=3, figsize=[15,7])\n",
|
||
|
"\n",
|
||
|
"for count,dataset_name in enumerate(dataset_names):\n",
|
||
|
" dataset = get_dataset(dataset_name)\n",
|
||
|
" dataset_diff = tdiff.apply(dataset)\n",
|
||
|
" dataset_boxcox = boxcox.apply(dataset)\n",
|
||
|
"\n",
|
||
|
" ax[0][count].plot(dataset)\n",
|
||
|
" ax[1][count].plot(dataset_diff)\n",
|
||
|
" ax[2][count].plot(dataset_boxcox)\n",
|
||
|
" ax[0][count].set_title(dataset_name)"
|
||
|
],
|
||
|
"execution_count": 5,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3oAAAGqCAYAAABd8jZ+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXd4HNXVh19Vy5JtWbJluXf7ulew\nMc2F3kLooRs7tEACAQImBQJ8CRBCCSG00CH03ot7wR33cl1wk5tkW9WqW74/ZmY1uzvbi3al+z6P\nH+/euTNzZ3c1c8895/xOitPpRKFQKBQKhUKhUCgUzYfUph6AQqFQKBQKhUKhUCiiizL0FAqFQqFQ\nKBQKhaKZoQw9hUKhUCgUCoVCoWhmKENPoVAoFAqFQqFQKJoZytBTKBQKhUKhUCgUimaGMvQUCoVC\noVAoFAqFopmR3tQDUDQvhBDPAZP1t/2AfUCN/v5YKWWlEOJW4P+Ac6SUi0z7/hXoLqX8tRBiEvA9\n8LPnOaSUg4QQNwI3AuOklDZ9/7OA54FhUsrKWFyfQqFIXoQQY4F/AN3QFjoPA38AioAdgNS7pgIH\ngNuklKv0+9HXwG7T4T6RUt4rhEgBHgYuAJxGu36+9sArwDCgHnhQSvl+TC9SoVAkJEIIJ/CKlHK6\nqW0S8Fcp5SRTWz6wCfhcSnm9xzEuR7tnZQMZwDrgN1LKfUKIqcB/gD36NoDPgQeklGUex/knMA0Y\nKaXc47HtdOB+oABIQbsv/klKuSaS61c0DcrQU0QVKeXNxmshxE7gKinlQo9uVwN/Bq4BFuGb3VLK\nQT7O84IQ4mK0G97DQogc4FngemXkKRQKT3SD7Au0e8RXetuFwGfASYDdfL8RQlwGfCqEGKA3LTNP\nxkxcBkwCRqAZevOEEBdLKT8EHkG7j10ohOgO/CSEWCSl3BuTi1QoFInORCHEaCnlKj99rgCeBqYL\nIbKklLUAQoghwFPAeCnlTiFEGvAY2mLSmfq+i6WUp+r9c9HuQXOFEMeZjpMOnKvvexXaQhX6ttOB\n14BLjbmbfi/8QQhxspRyc1Q+BUXcUKGbirgihBiK5uF7CThDCNEqgsP9Gvi9EEIADwKzpJTfR2GY\nCoWi+dER6AIsMRqklB8DI4Fqz85SyveA1oDlYpOJS4DXpJR1Usp64E29zdj2vH68ImAu8IuIrkKh\nUCQz96IZa/64Bngb+AE439Q+FDgopdwJIKW0A39CMwy9kFKW64vvlfoxDc4AlgJvAFd67PYQcJ95\ngV6/F76BtkCvSDKUoaeIN1OBt/SVpVlEMOmRUu5Cu/F8DFwE3BmNASoUimbJIWA5MEcIMV0I0Qdc\nBpgv0oE6/XVPIcR3QggphPhQCNFNbx8IbDftsx0YJIToAORbbYvCtSgUiiRESvkBkKJHJHmhL4bX\nSyl3AG/hbqAtQrsPfS6EuEAIkS+lrJFSHglw2i9oTKkBbR72ph5ZcFAIcax+7hzgWOBLH8eYFPAC\nFQmHMvQUcUMPM7gY+EBv8ryJedJTCLHZ49/jHn0+APoA86SU5dEftUKhaA5IKZ3AacAnwG3Az0KI\nDXr4phtCiBQhxA1ouXtbgf1oC0pXoeXb7UXz3IGWK1Nr2r0GyNHbHVLKBottCoWi5XI78KgQIsti\n27VocyOAhcBAIUQhgJRyHzAO7X70NFAihJgphBgR4HwVQC6AECIPGAvM1reZ52G5aDl5hyyOcRDI\nC3xpikRD5egp4skZaCIIu7RoSwBaCyE6SSmLLfr7zNEz8S+03LxLhRAnmMVdFAqFwoy+GHQ/cL8+\neZoKvIsWvpkmhDDyT1KAjcD5UkoHmhjBXcZxhBAPAIf0FfCjgHnClg1U6e2pQohMPaTTvE2hULRQ\npJQ/CSHmA3cAPxrt+mL4lUAbIcQjenOW3vaEvu8WNCE6hBCDgRnAN0KIHn5O2Rsw5liXA12BI/o8\nLAWoE0LcgWbM2dBC3Pd4HKPQok2RBCiPniKeXAtcI6Vsb/xDy1+xjC8PhBDiF8AY4I/A74CXI8z5\nUygUzRQhRHchxInGeynlQSnlo2iqdeegi7Ho/4SU8gI9fAohRKEpVBO0RVIn2qRoM9DftG0AsFEP\npypBUx922xaL61MoFEnFH4Fb0Ywqg9OBdVLKXNMcaQK6x00IMVqYVsmllJv0Y3RFCxP3Qjcef4mm\nYg7aPGySaR6WCyxGU0G3A8vQFIQ9OQ+YH/bVKpoMZegp4oIuM34mmkS5mU/xH77p63j5wHPAr6WU\n9VLKT9HkiO+PdKwKhaJZ0gNNRXOs0aDnpvREy93zx/nAx0KINvr729DEn+qA94EbhBA5+vYbgHf0\nfu+jhWkZinkT0VQ+FQpFC0ZKuR+tFMJfTc1T0eZE5n6rgPZCiOFohuAbRiinriR8FdrCkle4pR5x\n8CJQCryvewB7oAmxmDHPwx4A/iKEONV0nEvQFuT/Ec61KpoWFbqpiBe/QpP9rfBon4+WizfMYp+e\nplAqM9egTZ4+klIuMbXfCqwVQnwQQLpYoVC0MKSUi/W8u+d02fE0tFp5lwG7Auz+EproymohhB3N\nK3edftwPdeNxNZqX720p5Rf6fn8EXhNCbEPL45supTwY5UtTKBTJyePA9eBaDD8PfWHIg0/RPHF/\nQLtvzdE9dRnAT/p+BhP0eVMammrwZ8AZUkqbEOJatNp8To/jfwE8o4u7fC+EuAp4UgjRFs1O2Aqc\nJaXcFpWrVsSVFKfT8/tWKBQKhUKhUCgULRUhxLnA41JKEbCzImFRoZsKhUKhUCgUCoXCzCy0sNGz\nmnogivBRHj2FQqFQKBQKhULhhhDibDQ9hAop5fCmHo8idJShp1Aomh16zudnwJNSymeEEB8ABfrm\nfGAJ8Hc0xcWVenuJlPISPX/rbbSaQlXAFUEUpFUoFAqFQqFIKJQYi0KhaFboSmP/Rgs7AUBKeYlp\n+yto4hr6JjnJ4xC3A3OllI/p4h336P8UCoVCoVAokoakMfRKSipDcj3m5WVTWlodq+FEjWQZJyTP\nWJNlnJA8Y43lOAsK2qZE+ZB1wNlYGGd6DaL2UsplQojePvY/BZimv/4C+DLQCUO5PyXLdx6I5nAd\nzeEaQF1HrIjBvSnutMR7kz/UNTYPWvo1hnJvShpDL1TS09OaeghBkSzjhOQZa7KME5JnrMkyTgAp\npQ2wmerKmrkNzdtn0FkI8SFawdn/SCn/B3RGK3QNUIx7QVtL8vKyQ/qMCgraBt03kWkO19EcrgHU\ndSgiJ5nu8+GirrF5oK4xhONE5SgKhUKR4AghMoETpZS/0ZsOA38B3kLLx1smhJjtsVtQq2ahrCwW\nFLSlpKQy6P6JSnO4juZwDaCuI1Yoo1OhUCQ7ytBTKBQthYnAMuONlLISeFV/e0gIsQIYBOxD8+qV\nA9309wqFQqFQKBRJhaqjl6SUVdUxd/VeHEo1VaEIlmOBNcYbIcRkIcQT+uscYBSwBfgeMMRbLgK+\njfM4FQpFDFi4dj+HymqaehgKhaIZcqSilpo6W1j77jpQyaadsRH3VoZelHE6nezYX4HN7ojpee54\nZhFvfCv5aO526hrsMT2XQpFMCCHGCiHmAlOB24QQc4UQ+Wi5dsWmrguAfCHEYmAO8LCUci/wNHCM\nEGIBMBl4LJ7jVygU0WdbUTmvfL2JmSuLmnooCoWimWGzO7j7ucU8+PqKsPZ/4LXlPPbu6iiPSiOo\n0E2LmlQ9gDeBNGA/cLWUsk4IcSWaNLkDeFFK+bIQIgN4DegF2IHrpJQ/CyFGohVhdAJrpZQ3R/na\nmoT7Xl7G3kNHOXlkV6aeNSjm5/tm6W6+WbqbV2ZMifm5FIpkQEq5Ephksem3Hv1saMag5/5VwC9j\nMTaFQtE0/P0trVzm4fLaJh6JQqFoTjidTt6btQ2H08nBI9UcLq+lQ24WRypq2by7lBRSmDCss8/9\nDx6JrXpoQI+eV
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x7ff86cecce10>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"tags": []
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "KEv4ilhoEJs-",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## Batch computational experiments with sliding_window_benchmarks\n",
|
||
|
"\n",
|
||
|
"The **benchmarks.sliding_window_benchmarks** is a method that executes sliding window experiments on batch and store its results on a Sqlite3 database for posterior analysis. \n",
|
||
|
"\n",
|
||
|
"For each data window, a train and test datasets will be splitted. For each train split, number of partitions and partitioning method will be created a partitioner model. And for each partitioner, order, steps ahead and FTS method a foreasting model will be trained. The number of experiments is determined by 'windowsize' and 'inc' parameters.\n",
|
||
|
" \n",
|
||
|
" Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database (identified by the 'file' parameter) for posterior analysis.\n",
|
||
|
" \n",
|
||
|
" All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and informing the list of dispy nodes on 'nodes' parameter. \n",
|
||
|
" \n",
|
||
|
"**Mandatory Parameters**\n",
|
||
|
"\n",
|
||
|
" - **data**: test data\n",
|
||
|
" - **windowsize**: size of sliding window\n",
|
||
|
" - **train**: percentual of sliding window data used to train the models\n",
|
||
|
"\n",
|
||
|
"**kwargs optional arguments**\n",
|
||
|
" \n",
|
||
|
" - **benchmark_methods**: a list with Non FTS models to benchmark. The default is None.\n",
|
||
|
" - **benchmark_methods_parameters**: a list with Non FTS models parameters. The default is None.\n",
|
||
|
" - **dataset**: the dataset name to identify the current set of benchmarks results on database.\n",
|
||
|
" - **distributed**: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False\n",
|
||
|
" - **file**: file path to save the results. The default is benchmarks.db.\n",
|
||
|
" - **inc**: a float on interval [0,1] indicating the percentage of the windowsize to move the window\n",
|
||
|
" - **methods**: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.\n",
|
||
|
" - **models**: a list with prebuilt FTS objects. The default is None.\n",
|
||
|
" - **nodes**: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].\n",
|
||
|
" - **orders**: a list with orders of the models (for high order models). The default is [1,2,3].\n",
|
||
|
" - **partitions**: a list with the numbers of partitions on the Universe of Discourse. The default is [10].\n",
|
||
|
" - **partitioners_models**: a list with prebuilt Universe of Discourse partitioners objects. The default is None.\n",
|
||
|
" - **partitioners_methods**: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].\n",
|
||
|
" - **progress**: If true a progress bar will be displayed during the benchmarks. The default is False.\n",
|
||
|
" - **start**: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.\n",
|
||
|
" - **steps_ahead**: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.\n",
|
||
|
" - **tag**: a name to identify the current set of benchmarks results on database.\n",
|
||
|
" - **type**: the forecasting type, one of these values: point(default), interval or distribution. The default is point.\n",
|
||
|
" - **transformations**: a list with data transformations do apply . The default is [None].\n",
|
||
|
"\n",
|
||
|
"The default file database name is 'benchmarks.db', and it contains a table named 'benchmarks' with the below schema:\n",
|
||
|
"\n",
|
||
|
"|Field|Type|Description|\n",
|
||
|
"|-------|--------|-----------------|\n",
|
||
|
"|ID|integer|incremental primary key|\n",
|
||
|
"|Date| datetime |Date/hour of benchmark execution|\n",
|
||
|
"|Dataset|text| Identify on which dataset the dataset was performed|\n",
|
||
|
"|Tag|text| a user defined word that indentify a benchmark set|\n",
|
||
|
"|Type|varchar| forecasting type (point, interval, distribution)|\n",
|
||
|
"|Model|varchar| FTS model|\n",
|
||
|
"|Transformation|varchar| The name of data transformation, if one was used|\n",
|
||
|
"|Order|integer| the order of the FTS method |\n",
|
||
|
"|Scheme|text|UoD partitioner|\n",
|
||
|
"|Partitions|integer| Number of partitions|\n",
|
||
|
"|Size|integer| Number of rules of the FTS model |\n",
|
||
|
"|Steps|integer| prediction horizon, i. e., the number of steps ahead|\n",
|
||
|
"|Measure|varchar| accuracy measure|\n",
|
||
|
"|Value|real| the measure value|\n",
|
||
|
"\n",
|
||
|
"Know the sliding_window_benchmarks options:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "-I4BAgQ3EJtB",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 17
|
||
|
},
|
||
|
"outputId": "9ea5fd94-112d-4fef-b284-26eccb964b58"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"from pyFTS.benchmarks import benchmarks as bchmk\n"
|
||
|
],
|
||
|
"execution_count": 6,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "cmRHpToLEJtJ",
|
||
|
"colab_type": "text"
|
||
|
},
|
||
|
"cell_type": "markdown",
|
||
|
"source": [
|
||
|
"## Partitioning optimization by dataset\n",
|
||
|
"\n",
|
||
|
"**CAUTION**: This task is computationally expensive and take several hours to be performed. We strongly recommend to use the distributed version with a dispy cluster."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "UiM-chLmEJtL",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 17
|
||
|
},
|
||
|
"outputId": "2bec6dc3-ad64-4b35-f398-925496271d3c"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"from pyFTS.partitioners import Grid, Util as pUtil\n",
|
||
|
"from pyFTS.benchmarks import benchmarks as bchmk\n",
|
||
|
"from pyFTS.models import chen,yu\n",
|
||
|
"\n",
|
||
|
"tag = 'partitioning'\n",
|
||
|
"_type = 'point'\n",
|
||
|
"\n",
|
||
|
"for dataset_name in dataset_names:\n",
|
||
|
" dataset = get_dataset(dataset_name)\n",
|
||
|
"\n",
|
||
|
" bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,\n",
|
||
|
" methods=[chen.ConventionalFTS, yu.WeightedFTS],\n",
|
||
|
" benchmark_models=False,\n",
|
||
|
" transformations=[None],\n",
|
||
|
" partitions=np.arange(10,100,2), \n",
|
||
|
" progress=False, type=_type,\n",
|
||
|
" #distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],\n",
|
||
|
" file=\"benchmarks.db\", dataset=dataset_name, tag=tag)\n",
|
||
|
"\n",
|
||
|
" bchmk.sliding_window_benchmarks(dataset[:2000], 1000, train=0.8, inc=0.2,\n",
|
||
|
" methods=[chen.ConventionalFTS, yu.WeightedFTS],\n",
|
||
|
" benchmark_models=False,\n",
|
||
|
" transformations=[tdiff],\n",
|
||
|
" partitions=np.arange(3,30,1), \n",
|
||
|
" progress=False, type=_type,\n",
|
||
|
" #distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],\n",
|
||
|
" file=\"benchmarks.db\", dataset=dataset_name, tag=tag)\n"
|
||
|
],
|
||
|
"execution_count": 7,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "Ye-pCHnHEJtX",
|
||
|
"colab_type": "code",
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 749
|
||
|
},
|
||
|
"outputId": "bf7fd0b7-07a2-46ab-96f9-50a95cce7628"
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"from pyFTS.benchmarks import Util as bUtil\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"fig, ax = plt.subplots(nrows=3, ncols=2, figsize=[20,10])\n",
|
||
|
"df1 = bUtil.get_dataframe_from_bd(\"benchmarks.db\",\n",
|
||
|
" \"tag = 'partitioning' and measure = 'rmse'and transformation is null\")\n",
|
||
|
"\n",
|
||
|
"df2 = bUtil.get_dataframe_from_bd(\"benchmarks.db\",\n",
|
||
|
" \"tag = 'partitioning' and measure = 'rmse' and transformation is not null\")\n",
|
||
|
"\n",
|
||
|
"for count,dataset_name in enumerate(dataset_names):\n",
|
||
|
" \n",
|
||
|
" tmp1 = df1[(df1.Dataset == dataset_name)]\n",
|
||
|
" tmp2 = df2[(df2.Dataset == dataset_name)]\n",
|
||
|
" \n",
|
||
|
" axis = ax[count][0]\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" g1 = sns.boxplot(x='Partitions', y='Value', hue='Model', data=tmp1, showfliers=False, ax=axis, \n",
|
||
|
" palette=\"Set3\")\n",
|
||
|
" axis.set_title(\"Original data\")\n",
|
||
|
" axis.set_ylabel(dataset_name)\n",
|
||
|
" axis.set_xlabel(\"\")\n",
|
||
|
" \n",
|
||
|
" axis = ax[count][1]\n",
|
||
|
"\n",
|
||
|
" g2 = sns.boxplot(x='Partitions', y='Value', hue='Model', data=tmp2, showfliers=False, ax=axis, \n",
|
||
|
" palette=\"Set3\")\n",
|
||
|
" axis.set_title(\"Differentiated data\")\n",
|
||
|
" axis.set_ylabel(\"RMSE\")\n",
|
||
|
" axis.set_xlabel(\"Number of partitions of the UoD\")\n",
|
||
|
" \n",
|
||
|
"plt.tight_layout()"
|
||
|
],
|
||
|
"execution_count": 12,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZgAAALICAYAAADyhJW9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3XmYXGWZ9/FvdydNdyedRQgCAQSU\nuWcEgRiWEQziQiNBnHkVxRE3iIosIosgEEARArwgRhFFg5HNkYGgjKIBosyrjiAICAaCPuo4iChI\n0KS6k+6kO939/nGqmupKLaeqzl6/z3XlStepszzn1Kmqp+5zn/tpGx8fR0RERERERERERESkXu1x\nN0BERERERERERERE0kkBZhERERERERERERFpiALMIiIiIiIiIiIiItIQBZhFREREREREREREpCEK\nMIuIiIiIiIiIiIhIQxRgFhEREREREREREZGGTIm7ASIiWWZmbcBpwIeBqXgX9v4fcIFzbm2FZe4D\nznbO/bLKei8H/uic+2qD7foMsLNz7sM15rsAeJVz7kM15jsWuNs5199Ie0REREQkfcxsHPgfYBSY\nBjwOLHHO/Tz//ESf1cxOBi4CvgTcCqwCNjjn9ougnRN9VTO7GVjhnLurxjIfcc5dX+d23gd82Dl3\nWEDzHQH82jn3TD3tEBGJmjKYRUTCtQQ4DjjSOfePwKuB9cCPzay73ALOuTdXCy7n5zmv0eBySC4G\nZsTdCBERERGJ3GHOOQN2AW4Cvmtmh8JWfdZ3Aoudc0uAQ4Dnoggu5030VZ1zH/ARXN4BOCeKhtVw\nBrBr3I0QEalFGcwiIiExs5cBpwP7OeeeBXDObQE+ZWZvBt4PLDOzp4Fv4AWiDwd+CrzPOfczMzs/\nv44/AjcA5zjndjOzG4HfO+cuzS9/ObAIr2P/LefcWfk2fBg4C+/z/jng/c65P1ZpczdwI/DPwNPA\nb4qeM2A5sC1eNvaFzrlbzewbgOEFzT8E/A7vx8VuwDbAl5xzn2/gEIqIiIhISjjnxoEVZjYTuAI4\nuNBnxQvuvg74JzN7N7APMMPMfuWc29fM/gW4FC8L+vfAe51zL+bvupsL7At8C/gicCFev7kL+E/g\nTOfcqJn9GPge8A5gd7w+9Xvx+q/FfdVLga87575pZm/HSwjpBDYAi5xzjwMPADub2W/ybX0VcB2w\nI7AZON4594iZtQPXAG8Hngd+Uu7YVJvPzF5Omb6zmV0CvDl/zM4B7sL7PbBfvr3fds590t+rIyIS\nLmUwi4iE55+BZ5xzvy3z3F3AG4oe7+ycs+Lb38xsL7zMiX2BBcC7q2zrULxO+3zg42a2s5ltD1wL\nHO6c2xOvs35hjTYfD+wAvBKvc95X9NzngO875/4JOAFYbmZTnXMn5J8/zDn3M+AC4H/zGdtvBi43\ns11qbFdEREREsuF7wEHFd+s5584BfoGXLHEEcB7w83xweQ/gFuDfnHN74JWTK75TbyGw0Dn3BeB9\neH3iA/H6q68ETiqa92i8hI1/AN4EHFymrwqAmU3BC+x+JJ+B/V28/i54fd1n8v3ZLXiB7Judc/8A\nfAwvS3sK8Fa8/vKr8fr2h1Y4JtXmK9t3ds5dCPwZOM45d1t+P3uBfwReC3zIzF5fYXsiIpFSgFlE\nJDwvA8rWWQb+mn++4Ptl5jkU+LFz7jnn3Ca8LOdKvuWcG3XO/SW/7l2ccy8AMwrZ08B/A3vUaPOh\nwHecc1ucc38rade/AFfl//4ZXtbIjmXWcRrwcQDn3B/wsjR2r7FdEREREcmGfrxYQ6/P+d+K1+d9\nMv/4q8Dbzawj//gh59yL+b+PBr7hnMvl7wz8Ol5SRMEdzrkh59xG4LdUKS+RX35759yD+UmV+sr/\nCGxPvi/unLsfr49/MF7f+QfOuQ3OuSHg9gqbqzafr76zc+5q4F+cc+POuXXAmgrtFRGJnEpkiIiE\n50VgpwrPvRx4oejx38vMM7tk+p+rbCtX9Pco0JHvlH82f+tfB14nv1w2dbGXlaxrHS/9ODgCuMDM\n5gBjQBvlL1QegJd5sWu+LTtWmE9EREREsmc3YARv3BE/ZgGH5stRFOTwyrLB5P7wLOCTZvbR/OMp\nTE7o2KpPXGPbp5nZB/FKU3QB4xXa1wP82qsYB3glP7bF6zv/pWjedRW2U20+X31nM9sT+LyZ/WN+\nvl3wSmaIiMROAWYRkfD8HHiZme3rnPtVyXNvwxtBu5p+YHrR43LZwtUci1fn7dB8DbuP4NWrq2Yd\nMLPo8RwAM5sKrADe7ZxbaWbbAEMV1vFNYCnwVefcuJlVC4yLiIiISLYcg5eRPFwUkK3mL8CPnHPH\nlD5RZvm/AN9zzl3bbCPN7GDgU8CBzrmnzexw4PoK7evPl7AoXcc/U6bvXEbZPnae377zl4FHgX/N\n15y+v8J8IiKRU0aZiEhInHM5vEFDbjGz3cGr9WZml+NlU/xHjVX8AnijmW2XD+h+sM4mbA88nQ8u\nb4tXr256jWV+Tv6WRDPbDq/mHXgDrkwDHsk//gQwXLS+LXjZHYXtPprvIH8wv1yt7YqIiIhIiplZ\nm5kdgzdA9fl1LHovsCBfixkzO9DMvlhh3u8C7zeznvy8J+b7m7UU91ULtse7o/CZ/Po+CEwzsza8\nDOzp+TrLfwSeze8b+b75rWY2Da/vfISZ9eTX8a4K2682X7W+8wiT+9iP5YPLhwN7oj62iCSEAswi\nIiFyzn0OWAbclb/t7ym8W+Te4pwbrrHsL/AGHnkM+C+8gQHL3bZXya3Atmb2+/zfFwC7mNnVVZa5\nHu/Wwj8A3wHuzLdlPXAl8JiZPQb8D95gJ9/Pd65vBx7Ijwp+IXCnma3G6/R+DbjezF5ZR9tFRERE\nJB1+nO/n/gVvILqjnHOP1FhmgnPuOeAjeP3HX+MNUn1bhdn/E69P/Mv8Nt+OF6CupbivWnBPvs3/\nA6wCvoDXD74DWI1XmuN5vFIU7wFOzW/zp8B9+TrPdwH3Aw74CbCywvarzVet73wH8B9mdiZwKXC1\nmT2JN1DgxcDFZnaIj/0XEQlV2/h4PbEKERGJkpm1OefG838fBVzqnJsXc7NERERERERERADVYBYR\nSaz8YHq/MbPXAs/glbj4ebytEhERERERERF5iUpkiIgklHNuLbAYuA/4LV5pjc/E2SYRERERERER\nkWIqkSEiIiIiIiIiIiIiDVEGs4iIiIiIiIiIiIg0JJU1mNeuHVDatYiIiIgEbs6c3ra429As9ZVF\nREREJAyV+sqhBZjNrAe4EXg50AVcAhwDzAf+lp/tKufcD8zsOOB0YAxY5pxbHla7RERERERERERE\nRCQYYWYwHw084py70sxeAfwQeAA4zzn3/cJMZjYNuAg4EBgGHjazO51zfw+xbSIiIiIiIiIiIiLS\npNACzM6524oe7gI8W2HWg4CHnXM5ADO7HzgEuCustomIiIiIiIiIiIhI80KvwWxmDwA7A28DzgRO\nNbMzgReAU4EdgLVFi7wA7FhtnbNn9zBlSkc4DRYRERERERERERERX0IPMDvnDjaz/YBvAmcAf3PO\nPW5m5wKfwSubUazmwCrr1g0G3k4RERERkTlzeuNugoiIiIhIqrSHtWIzm29muwA45x7HC2Y/kf8b\n4HvAa4C/4GUxF8zNTxMRERERERERERGRBAstwAwcCpwFYGYvB6YDXzOzPfLPHwY8CTwEHGBms8xs\nOl795f8OsV0iIiIiIiIiIiIiEoAwS2R8FVhuZv8NdAOnABuA28xsMP/38c65oXy5jHuBceDiwoB/\nYVq1aiWPPvoQAF1d3ey11z709S0Me7MiIiIiIpFatWola9asZtOmIUB9XxEREREJVmgBZufcEPDe\nMk8dUGbeO4A7wmpLJcPDw4DXyRYRERERyTL1fUVEREQkDGGWyEi0vr6FzJgxkxkzZnLGGecqg0NE\nREQkRitX3sWNN
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x7ff865c84550>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"tags": []
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"metadata": {
|
||
|
"id": "5ZjskQyuYAwz",
|
||
|
"colab_type": "code",
|
||
|
"colab": {}
|
||
|
},
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
""
|
||
|
],
|
||
|
"execution_count": 0,
|
||
|
"outputs": []
|
||
|
}
|
||
|
]
|
||
|
}
|