pyFTSex/tutorial/pyFTS/developer/Severiano et al - HighOrderFTS.ipynb

1141 lines
725 KiB
Plaintext
Raw Permalink Normal View History

2024-08-15 12:15:32 +04:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
},
"colab": {
"name": "Severiano et al - HighOrderFTS.ipynb",
"provenance": [],
"include_colab_link": true
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/PYFTS/notebooks/blob/master/Severiano%20et%20al%20-%20HighOrderFTS.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "uYqGG55CAjBk",
"colab_type": "text"
},
"source": [
"# High Order Fuzzy Time Series \n",
"\n",
"Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M3ZFi-suAjBp",
"colab_type": "text"
},
"source": [
"## Environment Setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jLZuySHQAjBr",
"colab_type": "text"
},
"source": [
"### Library install/update"
]
},
{
"cell_type": "code",
"metadata": {
"id": "CQtIJ-tfAjBu",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 211
},
"outputId": "84dc9861-c6c2-4fb9-e854-1958f121f51f"
},
"source": [
"!pip3 install -U git+https://github.com/PYFTS/pyFTS"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting git+https://github.com/PYFTS/pyFTS\n",
" Cloning https://github.com/PYFTS/pyFTS to /tmp/pip-req-build-lh8_3clr\n",
" Running command git clone -q https://github.com/PYFTS/pyFTS /tmp/pip-req-build-lh8_3clr\n",
"Building wheels for collected packages: pyFTS\n",
" Building wheel for pyFTS (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pyFTS: filename=pyFTS-1.6-cp36-none-any.whl size=197025 sha256=0d23c79607b2a2624a44c8801dafb25269c97c4e5a3b1a45582f3dd2191c14e2\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-ucpjm000/wheels/e7/32/a9/230470113df5a73242a5a6d05671cb646db97abf14bbce2644\n",
"Successfully built pyFTS\n",
"Installing collected packages: pyFTS\n",
"Successfully installed pyFTS-1.6\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tYY62froAjB4",
"colab_type": "text"
},
"source": [
"### External libraries import"
]
},
{
"cell_type": "code",
"metadata": {
"id": "upZQ4nUgAjB7",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "5a8e81d7-b638-40c9-bcca-55573551564c"
},
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pylab as plt\n",
"import seaborn as sns\n",
"\n",
"%pylab inline"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rihOPXpmAjCD",
"colab_type": "text"
},
"source": [
"### Common pyFTS imports "
]
},
{
"cell_type": "code",
"metadata": {
"id": "82gFjYG6AjCE",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.common import Util as cUtil\n",
"from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil\n",
"from pyFTS.partitioners import Util as pUtil\n",
"\n",
"from pyFTS.models import hofts\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "cxb1DSuYAjCJ",
"colab_type": "text"
},
"source": [
"## Common data transformations"
]
},
{
"cell_type": "code",
"metadata": {
"id": "vW3yCAviAjCL",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.common import Transformations\n",
"\n",
"tdiff = Transformations.Differential(1)\n",
"\n",
"boxcox = Transformations.BoxCox(0)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "On8v-92bAjCR",
"colab_type": "text"
},
"source": [
"## Datasets"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "DTt3J8Y1AjCU",
"colab_type": "text"
},
"source": [
"### Data Loading"
]
},
{
"cell_type": "code",
"metadata": {
"id": "4bmsWM40AjCV",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.data import TAIEX, NASDAQ, SP500\n",
"\n",
"dataset_names = [\"TAIEX\", \"SP500\",\"NASDAQ\"]\n",
"\n",
"def get_dataset(name):\n",
" if dataset_name == \"TAIEX\":\n",
" return TAIEX.get_data()\n",
" elif dataset_name == \"SP500\":\n",
" return SP500.get_data()[11500:16000]\n",
" elif dataset_name == \"NASDAQ\":\n",
" return NASDAQ.get_data()\n",
"\n",
"\n",
"train_split = 2000\n",
"test_length = 200"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "W6txJnZ_AjCa",
"colab_type": "text"
},
"source": [
"### Visualization"
]
},
{
"cell_type": "code",
"metadata": {
"id": "wZO0b2NwAjCc",
"colab_type": "code",
"colab": {},
"outputId": "ab30611d-0dc4-4cc0-9e85-70a355543ee6"
},
"source": [
"fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[10,5])\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
" dataset_diff = tdiff.apply(dataset)\n",
"\n",
" ax[0][count].plot(dataset)\n",
" ax[1][count].plot(dataset_diff)\n",
" ax[0][count].set_title(dataset_name)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmoAAAE/CAYAAAD2ee+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXd4FOX2x79n0yD0EjoYCAEEpIug\nFBGkesUuVvSiXK+9XQV7F7tiwWvB8ruKoGIF6V167yWE0EsIEEhC6p7fH/PO7ruzs32TnV3ez/Pk\nYeadsrMhM3PeU76HmBkKhUKhUCgUCuthi/QFKBQKhUKhUCjMUYaaQqFQKBQKhUVRhppCoVAoFAqF\nRVGGmkKhUCgUCoVFUYaaQqFQKBQKhUVRhppCoVAoFAqFRVGGmkKhUCgUCoVFUYZalEJEedKPnYjO\nSuu3SPvdQURMRDcajr+UiA5I6wuIqNBw3j/Etn8Q0REiqi3tP5yIDhJRjYr4vgqFPxBRLyJaSkS5\nRHSCiP4mogvFfVAm/q5PE9F6IrpCHJMq7hH5b/9Z6ZxJRDRRHHeEiB41fGZ/ItpORAVENJ+Izqvo\n761QKGIXZahFKcxcVf8BsA/AP6Sx76RdRwI4AeB2P057v3xeZv6H+Kw/AMwD8B4AEFFNABMA/JuZ\nc8P5vRSKYCGi6gD+BPAhgNoAGgN4EUCR2GWZuF9qAvgSwBQiqiWdoqb0t/+yNP4CgHQA5wHoB+AJ\nIhosPrMugKkAnhWfuRrA5PL5hgqFO0SURUTHiKiKNHYXES2Q1omIMoloq8nx7YholpjYnCKiNUQ0\nVGy7VDgC9AnMASKaQkQXmpzH42eI7VcQ0UoiyieiHCL6HxE1DssvIcZRhloMI2b2fQGMBjCIiBqE\ncLoHAQwhokHQDLaFzPx7GC5ToQgXrQCAmScxcxkzn2XmWcy8Ud6Jme0AJgKoDCDNj/OOBPAyM59k\n5m0APgdwh9h2DYAtzPwjMxdCM+o6ElGbsHwjhcI/4gA85GV7HwD1ALQwMbL+ADAbQAOxz4MATkvb\nD4kJTjUAPQBsB7CYiPr7+xlEdB2A7wG8D6AugHYAisV5avr7Jc9VlKEW29wOYDUz/wxgG4BbfOzv\nEWY+Du1B8B2AK6DdzAqFldgJoIyIviGiIQZvmQMiigdwF4A8ALukTXuFx+Ar4SmDOEdDABuk/TZA\ne9FA/OvYxsz5AHZL2xWKiuAtAI97MXpGAvgNwHSxDMDhEW4O4HNmLhY/fzPzEuMJWOMAMz8H4AsA\nb/j5GQTgHQCvMPP3YgJ1BNo9WADvBqYCylCLdW6HNouB+NdX+HO8cH3rPy8bti8HUAPALGbODvO1\nKhQhwcynAfQCwNC8XtlE9DsR1Re79CCiUwCOALgJwNUidH8cwIXQQptdoXkO9PSBquJfOcSfK/bR\ntxvD//J2haIiWA1gAYDHjRuIKBnAddD+pr8DMIKIEsXmHAAZAP5HRFdJ94ovpgLooodbfXxGawDN\nAPwon0B4tn8GMNDPzzxnUYZajEJEl0CbKf0ghr4HcAERdfJy2IPMXFP6edaw/TMA3wIYSkQ9w3/V\nCkVoMPM2Zr6DmZsAaA+gEbRwCwAsF3/XdZm5BzPPEcfkMfNqZi5l5qMA7gcwkIiqQfO6AUB16WOq\nAzgjlvMM24zbFYqK4jkADxBRimH8Gmh5mrMATAOQAGAYoHnJoOVdZkHzeh0mokVElO7jsw4BIGj5\nnl4/A1qoEwAOm5znMADj9SoMKEMtdhkJ7UZaT0RHAKyQxgOGiEYBaArgXgBPAfhCmjEpFJaDmbcD\n+BqawRbQoeJfGzOfhPYy6Sht7whgi1jeIm8THoY0abtCUSEw82ZoxTRjDJtGApgiJiKF0LxYI6Xj\nDjDz/cycBs2rnA9tQu6NxtDuk1N+fMZx8W9Dk/M0lLYrPKAMtRiEiCoBuAFaEUEn6ecBADeLHJ1A\nztcIWg7E3cxcBOBTaC7zp8N53QpFKBBRGyJ6jIiaiPWm0EKcy30cdxERtSYiGxHVATAewAKpovlb\nAM8QUS1RJHA3NAMQAH4B0J6IrhX33XMANgojUaGoaJ6H9vfZGADEvXAZgFuFtMwRaCHKoXoepgwz\n7wfwMXxPbq4GsJaZ8/34jB0ADgC4Xj4BEdkAXAstZKvwgjLUYpOrAJwF8C0zH9F/oFW6xQMY7OG4\njwxaUmvE+CcAfmDmxYDDXX43gIeJSCVNK6zCGQAXAVhBRPnQDLTNAB7zcVwLADPE8ZuhhXBukrY/\nD61AYC+AhQDeYuYZACByNa8F8CqAk+LzR4Tp+ygUAcHMGdDkYfRir9ugFdm0hnPC3gqa4XSTmHy8\nSEQtxUSlLoB/wmRyI+Q3GhPR89AKAZ7y5zPE++JxaJOdm4moklAg+AJaWPTDsP8iYgzSfocKhUKh\nUCiiDSLKAnCXnnMpPMm7oBlbDQB8zMwfGo55AlrUpS+0iXhfaEZTHoD5AB5n5oNEdCk0Dc0CaKk0\nuQCWAnibmZeLc2339hnM3E2sDwfwDLSK6MoANgG4npl3hPP3EYsoQ02hUCgUCkWFQUQDoRW4DWDm\n9ZG+HqujDDWFQqFQKBQVChH9A0BjZv400tdidZShplAoFAqFQmFRVDGBQhEmSGvcfYyINktjk0lr\nAL6etJ5868V4KhGdlbZ9Kh3TlYg2EVEGEY0Xyt4KhUKhOAcJSKZBoVB45WsAH0HSIGLmG/VlInoH\nrir2u5nZTIB4ArSq2hXQ2rEMBvBXOVyvQqFQKCxO1BpqdevW5dTU1EhfhkIBAFizZs1xZk4holSz\n7cIrdgM0vSGPEFFDANWliqpvocmt+DTU1D2hsBL6PRGpz1f3g8JqBHtPRK2hlpqaitWrV0f6MhQK\nAAAR7fWxS28AR5lZbgLenIjWATgN4BmhU9cYmv6QzgEx5ulzR0MTNkazZs3UPaGwDH7cE+WKekco\nrEaw94TKUVMoKoabAEyS1g8DaMbMnQE8CuB7IjL2jPQJM3/GzN2YuVtKimqZp1AoFLFG1HrUFIpo\nQbTsugZAV31MtOIqEstriGg3NDXvgwCaSIc3EWMKhUKhOAdRHrUKpLCkLNKXoIgMAwBsZ2ZHSJOI\nUogoTiy3AJAOIJOZDwM4TUQ9RF7b7QB+i8RFKxSeyD5ThIOnzkb6MhQKy1BaZkdxqb1czq0MtQoi\nMzsPbZ6dgV/WHfC9syIqIaJJAJYBaE1EB4holNg0Aq5hTwDoA2CjkOv4CcA9zHxCbLsXWh+8DGg9\nJlXFp8JSfDRvF4Z+sDjSl6FQlDtvztiOzxbt9rnfNROWotUz5fOo9mmoedCGqk1Es4lol/i3lhgn\nofuUQUQbiaiLdMxIsf8uIhopjVteM2rzwVzc8OkyFBSXBn2OiX/vAQA8MnkDjp0pDNelKSwEM9/E\nzA2ZOYGZmzDzl2L8DqP6NjP/zMztmLkTM3dh5j+kbauZuT0zpzHz/axUqRUW45tle5F7tiTSl6FQ\nlCvMjE8W7MZr07f73HfjgVyf+wSLPx61r6HpOMmMATCXmdMBzBXrADAEWggnHVol2gRAM+wAPA/g\nIgDdATyvG3dwakbpxxk/K+Jc8eESrMw6gfnbs4M+x9S1zjSj/1sW0WIohUKhCBo1b1CcK7zw+xbH\n8sFTZ3HLF8uROmYadh0947Jfed8TPg01Zl4E4IRheDiAb8TyN9B0nvTxb1ljOYCaQhdqEIDZzHyC\nmU8CmA1gsKwZJbwG30rnshxxtuCdfXbpP9JyLkOFQqHwA2bGij3a66Bzs5p+HSM6cmwSHThWi7Gw\nRWUUivJg7raj+EZyqmw+mIu/M3IAAJe/t8hl31MFmnd5YNv65XItweao1RdJzwBwBIB+dY0B7Jf2\n0zWgvI0HpBlFRKuJaHV2dvDerWAJwU6DbHCr+ai1eX36NkxcsifSl6FQWI4pq/djxGfLAQA3Xdgs\nkEP7iTB/N7Eez
"text/plain": [
"<matplotlib.figure.Figure at 0x7f5bdcad4b38>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ofSiWr_RAjCh",
"colab_type": "text"
},
"source": [
"### Statistics"
]
},
{
"cell_type": "code",
"metadata": {
"id": "V6hrTChXAjCi",
"colab_type": "code",
"colab": {},
"outputId": "88bc4dc3-2e59-45a3-8bf9-44408bd3628f"
},
"source": [
"from statsmodels.tsa.stattools import adfuller\n",
"\n",
"rows =[]\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" row = [dataset_name]\n",
" dataset = get_dataset(dataset_name)\n",
" result = adfuller(dataset)\n",
" row.extend([result[0],result[1]])\n",
" row.extend([value for key, value in result[4].items()])\n",
" rows.append(row)\n",
" \n",
"pd.DataFrame(rows,columns=['Dataset','ADF Statistic','p-value','Cr. Val. 1%','Cr. Val. 5%','Cr. Val. 10%'])\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Dataset</th>\n",
" <th>ADF Statistic</th>\n",
" <th>p-value</th>\n",
" <th>Cr. Val. 1%</th>\n",
" <th>Cr. Val. 5%</th>\n",
" <th>Cr. Val. 10%</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>TAIEX</td>\n",
" <td>-2.656728</td>\n",
" <td>0.081830</td>\n",
" <td>-3.431601</td>\n",
" <td>-2.862093</td>\n",
" <td>-2.567064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SP500</td>\n",
" <td>-1.747171</td>\n",
" <td>0.406987</td>\n",
" <td>-3.431811</td>\n",
" <td>-2.862186</td>\n",
" <td>-2.567114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NASDAQ</td>\n",
" <td>0.476224</td>\n",
" <td>0.984132</td>\n",
" <td>-3.432022</td>\n",
" <td>-2.862279</td>\n",
" <td>-2.567163</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Dataset ADF Statistic p-value Cr. Val. 1% Cr. Val. 5% Cr. Val. 10%\n",
"0 TAIEX -2.656728 0.081830 -3.431601 -2.862093 -2.567064\n",
"1 SP500 -1.747171 0.406987 -3.431811 -2.862186 -2.567114\n",
"2 NASDAQ 0.476224 0.984132 -3.432022 -2.862279 -2.567163"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QM-OvDtnAjCn",
"colab_type": "text"
},
"source": [
"## Partitioning\n",
"\n",
"The best number of partitions of the Universe of Discourse is an optimization problem. The know more about partitioning schemes please look on the Partitioners notebook. To know more about benchmarking look on the Benchmarks notebook."
]
},
{
"cell_type": "code",
"metadata": {
"id": "e-f6xBhjAjCp",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.partitioners import Grid, Util as pUtil\n",
"from pyFTS.benchmarks import benchmarks as bchmk\n",
"from pyFTS.models import chen\n",
"\n",
"tag = 'chen_partitioning'\n",
"_type = 'point'\n",
"\n",
"for dataset_name in dataset_names:\n",
" dataset = get_dataset(dataset_name)\n",
"\n",
" bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,\n",
" methods=[hofts.HighOrderFTS],\n",
" benchmark_models=False,\n",
" transformations=[None],\n",
" order=[1,2,3],\n",
" partitions=np.arange(10,100,2), \n",
" progress=False, type=_type,\n",
" file=\"benchmarks.db\", dataset=dataset_name, tag=tag)\n",
"\n",
" bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,\n",
" methods=[hofts.HighOrderFTS],\n",
" benchmark_models=False,\n",
" transformations=[tdiff],\n",
" order=[1,2,3],\n",
" partitions=np.arange(3,30,1), \n",
" progress=False, type=_type,\n",
" file=\"benchmarks.db\", dataset=dataset_name, tag=tag)\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3XAksq47AjCu",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.benchmarks import Util as bUtil\n",
"\n",
"df1 = bUtil.get_dataframe_from_bd(\"benchmarks.db\",\n",
" \"tag = 'chen_partitioning' and measure = 'rmse'and transformation is null\")\n",
"\n",
"df2 = bUtil.get_dataframe_from_bd(\"benchmarks.db\",\n",
" \"tag = 'chen_partitioning' and measure = 'rmse' and transformation is not null\")\n",
"\n",
"fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[15,7])\n",
"\n",
"g1 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df1, showfliers=False, ax=ax[0], \n",
" palette=\"Set3\")\n",
"box = g1.get_position()\n",
"g1.set_position([box.x0, box.y0, box.width * 0.85, box.height]) \n",
"g1.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)\n",
"ax[0].set_title(\"Original data\")\n",
"ax[0].set_ylabel(\"RMSE\")\n",
"ax[0].set_xlabel(\"\")\n",
"\n",
"g2 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df2, showfliers=False, ax=ax[1], \n",
" palette=\"Set3\")\n",
"box = g2.get_position()\n",
"g2.set_position([box.x0, box.y0, box.width * 0.85, box.height]) \n",
"g2.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)\n",
"ax[1].set_title(\"Differentiated data\")\n",
"ax[1].set_ylabel(\"RMSE\")\n",
"ax[1].set_xlabel(\"Number of partitions of the UoD\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "8t9ZNYeXAjCy",
"colab_type": "text"
},
"source": [
"### Comparing the partitioning schemas"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Yyoj8h1EAjC0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 367
},
"outputId": "0c59b426-9f66-4536-8ccf-ad2ebe23ac05"
},
"source": [
"from pyFTS.partitioners import Grid, Util as pUtil\n",
"\n",
"fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[20,5])\n",
"\n",
"\n",
"partitioners = {}\n",
"partitioners_diff = {}\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
"\n",
" partitioner = Grid.GridPartitioner(data=dataset, npart=30)\n",
" partitioners[dataset_name] = partitioner\n",
" partitioner_diff = Grid.GridPartitioner(data=dataset, npart=10, transformation=tdiff)\n",
" partitioners_diff[dataset_name] = partitioner_diff\n",
"\n",
" pUtil.plot_sets(dataset, [partitioner.sets], titles=[dataset_name], axis=ax[0][count])\n",
" pUtil.plot_sets(dataset, [partitioner_diff.sets], titles=[''], axis=ax[1][count])"
],
"execution_count": 6,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAFKCAYAAABsCIRVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzsvXm8ZMlV3/mLzLw317dlvqrqXa3e\nJJYZYRAgf8wYDDYIjIxtwIOwR2Is4MN4WGZAH7wAkllsJLCFEYNhBAIhkFiEkIyAQWKR1EILqAUS\nWnqpltTqtbrrZeZbcr1bzB9x4964cWN73U9Vr7ri9/nUJ7dbuZx3b5wT55xvBKGUwsvLy8vLy8vL\ny8vLy8vLy8vr6lHjcn8BLy8vLy8vLy8vLy8vLy8vL69LK58Q8vLy8vLy8vLy8vLy8vLy8rrK5BNC\nXl5eXl5eXl5eXl5eXl5eXleZfELIy8vLy8vLy8vLy8vLy8vL6yqTTwh5eXl5eXl5eXl5eXl5eXl5\nXWXyCSEvLy8vLy8vLy8vLy8vLy+vq0w+IeTl5eXl5eXl5eXl5eXl5eV1lcknhLyueBFCZsK/jBCy\nFB7/S+G4byOEUELI/yr9/68ghDwsPH4XIWQlve/b8tdeQAi5QAgZCsd/AyHkEULI1qX4vV5eXl5e\nxxMh5MsIIe8jhBwQQiaEkPcSQr449wtpPs4fEkI+TAj5+vz/3Jz7DNEX/Ijwnm1CyK/k/+8CIeT7\npc/8KkLIPYSQBSHknYSQZ1zq3+3l5eXl5eXlZZJPCHld8aKUDvg/AA8CeIHw3BuEQ18MYALgRQ5v\n+93i+1JKX5B/1tsA/DmAnwEAQsg2gF8A8H9QSg9O8nd5eXl5eT11EUI2AfwBgJ8DMARwPYAfBbDO\nD3l/7j+2AbwWwO8QQnaEt9gWfMGPC8//RwC3A3gGgH8A4AcJIc/PP3MXwO8B+JH8M+8C8NufnV/o\n5eXl5WUTIeQBQsgThJC+8Ny3E0LeJTwmhJBPEUI+ofj/n0cIeUdeVNgnhHyIEPJ1+WtfkRelefHg\nYULI7xBCvljxPtrPyF//ekLIXxFC5oSQMSHkNwgh15+IEby8FPIJIa+rQnll9ssBfCeAryGEXPMU\n3u57AXwtIeRrwBJD76aU/v4JfE0vLy8vr5PXHQBAKf1NSmlKKV1SSt9BKf1b8SBKaQbgVwB0Adzq\n8L4vBvDjlNIppfRuAL8E4Nvy1/45gI9TSt9EKV2BJY+eQwh59on8Ii8vLy+vJ6MmgO8zvP73AZwF\ncIsimfM2AH8C4Jr8mO8FcCi8/mheXNgA8DwA9wB4DyHkq1w/gxDyTQDeCOC/AdgF8HkAovx9tl1/\npJfXceQTQl5Xi14E4C5K6ZsB3A3gX1qO14pSugfmTN4A4OvBHIKXl5eX1+nUfQBSQsivEUK+Vur+\nKUQIaQH4dgAzAOeFlz6TV3t/Ne/8Qf4e1wL4iHDcR8CCd+S3xWuU0jmATwqve3l5eXldev00gJca\nkisvBvA/APxRfh9A0fX5TAC/RCmN8n/vpZT+hfwGlOlhSunLAPwygFc6fgYB8F8B/ASl9I158eIC\nmF9awJzI8vJ60vIJIa+rRS8Cy7gjv7VhY6/O20H5vx+XXv8AgC0A76CUXjzh7+rl5eXldUKilB4C\n+DIAFKyL5yIh5PcJIefyQ55HCNkHcAHACwH8sxwB3gPwxWBI2BeBVX05hjzIb0VU+CA/hr8uY8Ti\n615eXl5el153AXgXgJfKLxBCegC+CWycfwOAbyGEhPnLYwD3A/gNQsg/FfyHTb8H4As5pmb5jGcB\nuAnAm8Q3yLtX3wzgqx0/08vrWPIJIa+nvQghfw8sq/9b+VNvBPA/EUK+wPDfvpdSui38+xHp9dcA\neD2AryOE/N2T/9ZeXl5eXiclSundlNJvo5TeAODzAVwH1pIPAB/Ix/ldSunzKKV/mv+fGaX0Lkpp\nQil9HMB3A/hqQsgGWBcRAGwKH7MJ4Ci/P5Nek1/38vLy8ro8ehmA7yGEnJGe/+dga8u9A8AfAggA\n/GOAdf2ArRX3AFgXz2OEkDsJIbdbPutRAARsjTrjZ4AhYgDwmOJ9HgMgf18vrxORTwh5XQ16Mdhg\n/GFCyAUAfyk8f2wRQl4C4EYA/wbAfwDwy0J238vLy8vrFItSeg+A14Elho71X/PbBqV0ChagP0d4\n/TkAPp7f/7j4Wl4dvlV43cvLy8vrMohS+jGwjQb+nfTSiwH8Tl4EWIF15bxY+H8PU0q/m1J6K1jn\n6BysOGzS9WC+Y9/hM/by22sV73Ot8LqX14nKJ4S8ntYihHQA/AuwxaS/QPj3PQC+NV8z4jjvdx0Y\nf/wdlNI1gF8EayP9oZP83l5eXl5eJyNCyLMJIT9ACLkhf3wjGBr2Acv/+1JCyLMIIQ1CyAjAqwG8\nS9hR8vUAfpgQspMvFv0dYIkmAHgLgM8nhHxj7odeBuBv82SUl5eXl9fl1cvBxuzrASD3D18J4F8R\nQi7kBeRvAiMBduX/TCl9CMDPw15Y+GcA/ppSOnf4jHsBPAzgm8U3IIQ0AHwjGOrm5XXi8gkhr6e7\n/imAJYDXU0ov8H9gO8m0ADxf8//+H2HryBkh5EP58/8dwG9RSt8DFC2k3wHg/yKE+MVCvby8vE6f\njgB8KYC/JITMwRJBHwPwA5b/dwuAP87//8fA2vxfKLz+crCFoj8D4N0AfppS+scAkK8t940A/hOA\naf7533JCv8fLy8vL6ymIUno/gN9GuTHM/wa2AcGzUBaP7wBL0LwwT/z/KCHktrxIsAvgX0NRWMi3\nlb+eEPJysAWh/4PLZ+RzipeCFRq+lRDSyXdF/mUwnOznTtwQXl4ACDv3vLy8vLy8vLy8vLy8vLye\nfiKEPADg2/k6cXm36HmwpM41AH6eUvpz0v/5QTDS4MvBisJfDpacmQF4J4CXUkofIYR8BYA/B9sN\njIBtIvA+AP+FUvqB/L3uMX0GpfS5+eNvAPDDYLtSdgF8FMA3U0rvPUl7eHlx+YSQl5eXl5eXl5eX\nl5eXl9cpEiHkq8E2w/mHlNIPX+7v4/X0lE8IeXl5eXl5eXl5eXl5eXmdMhFCXgDgekrpL17u7+L1\n9JRPCHl5eXl5eXl5eXl5eXl5eXldZfKLSnt5eXl5eXl5eXl5eXl5eXldZTrWltsnqd3dXXrzzTdf\nro/38vLyOrX60Ic+tEcpPXO5v8fllvcTXl5eXmp5P8Hk/YSXl5eXWq5+4rIlhG6++Wbcddddl+vj\nvby8vE6tCCGfudzf4TTI+wkvLy8vtbyfYPJ+wsvLy0stVz/hkTEvLy8vLy8vLy8vLy8vLy+vq0w+\nIeTl5eXl5eXl5eXl5eXl5eV1lcknhLy8vLy8vLy8vLy8vLy8vLyuMvmEkJeXl5eXl5eXl5eXl5eX\nl9dVJp8Q8vLy8vLy8vLy8vLy8vLy8rrK5BNCXl5eXl5eXl5eXl5eXl5eXleZrAkhQsivEEKeIIR8\nTPM6IYS8mhByPyHkbwkhX3jyX9PLy8vL67TK+wkvLy8vL5O8n/Dy8vI6nXLpEHodgOcbXv9aALfn\n/74TwC889a/l5eXl5XUF6XXwfsLLy8vLS6/XwfsJLy8vr1Mna0KIUnongInhkG8A8HrK9AEA24SQ\na0/qCz4ZUUoRp5n9wCQCKDUekmYp0iy1fh6NY+vHpYn9O2VZiszl8xx+X5IkDp+XgFLze1FKEWdm\nOwFA5PD7kCZAZj4uoxnizG5PGkXWY7I0A7V8d5plyFKzzQGAOvy+NE2RWX4fpRmyzP63iSzvAwBJ\nmiGz/W2yjNndojh1sHkcg9p+X0aROZyfaeLweWkGarlGsyxD6vD3yzL7+RJn1Pp5aUaRuIwvV5Gu\nRD8BOI5ZrtdPFlvPHcBx3Mqo/boGu4Zsn+nqL1zGLvbdHL4/pUgcvn/scH0DYL7aIhdfDTD7u9gs\ndbBZlqbW8RBw8
"text/plain": [
"<Figure size 1440x360 with 6 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "cDONJFYEAjC5",
"colab_type": "text"
},
"source": [
"## Fitting models"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "y_C377BCAjC6",
"colab_type": "text"
},
"source": [
"### With original data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "NZdAGDrnAjC8",
"colab_type": "code",
"colab": {}
},
"source": [
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
" \n",
" for order in [2,3]:\n",
"\n",
" model1 = hofts.HighOrderFTS(partitioner=partitioners[dataset_name], order=order)\n",
" model1.name=dataset_name +str(order)\n",
" model1.fit(dataset[:train_split], save_model=True, file_path='model1'+dataset_name+str(order))\n",
"\n",
" #print(model1)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "8nq5I8CzAjDB",
"colab_type": "text"
},
"source": [
"### With transformed data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "2GC-ORciAjDC",
"colab_type": "code",
"colab": {}
},
"source": [
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
"\n",
" for order in [2,3]:\n",
"\n",
" model2 = hofts.HighOrderFTS(partitioner=partitioners_diff[dataset_name], order=order)\n",
" model2.name=dataset_name +str(order)\n",
" model2.append_transformation(tdiff)\n",
" model2.fit(dataset[:train_split], save_model=True, file_path='model2'+dataset_name+str(order))\n",
"\n",
" #print(model2)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "-WP3KKc0BnPx",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86
},
"outputId": "b473d4a9-7171-434a-a7bd-7e2f370f0da5"
},
"source": [
"!ls"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"text": [
"model1NASDAQ2 model1TAIEX2 model2SP5002 NASDAQ.csv.bz2\n",
"model1NASDAQ3 model1TAIEX3 model2SP5003 sample_data\n",
"model1SP5002 model2NASDAQ2 model2TAIEX2 SP500.csv.bz2\n",
"model1SP5003 model2NASDAQ3 model2TAIEX3 TAIEX.csv.bz2\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dSYwpPM7AjDH",
"colab_type": "text"
},
"source": [
"## Predicting with the models"
]
},
{
"cell_type": "code",
"metadata": {
"id": "fclsaJUnAjDI",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 749
},
"outputId": "eecdf2cb-11ae-43f7-b113-f3e9c5e43d0e"
},
"source": [
"fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])\n",
"\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
" \n",
" for order in [2,3]:\n",
" \n",
" ax[count].plot(dataset[train_split:train_split+200])\n",
"\n",
" model1 = cUtil.load_obj('model1'+dataset_name+str(order))\n",
"\n",
" forecasts = model1.predict(dataset[train_split:train_split+200])\n",
"\n",
" ax[count].plot(forecasts)\n",
"\n",
" ax[count].set_title(dataset_name)\n",
" \n",
"plt.tight_layout()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZgAAALICAYAAADyhJW9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xd0lEXbx/Hv7G56hdB774giTbDR\nm1Q7qCCKFbvYe3kU0ccugq9YQVEhgNJBmgWkI6BI74RASCN1d+f9YwNJSIAQQoIPv885e5LMzD1z\n3RFz7lyZvcZYaxEREREREREREREROV2Okg5ARERERERERERERP6dlGAWERERERERERERkUJRgllE\nRERERERERERECkUJZhEREREREREREREpFCWYRURERERERERERKRQlGAWERERERERERERkUJRgllE\nRERERERERERECkUJZhERERGRfBhjknO8vMaY1BxfD8wxbrAxxhpjrj/u+iuNMbtzfL3AGJN23Lw/\nZvX1MsbsN8aUzjG+jzFmjzEmojjuV0RERESkMJRgFhERERHJh7U29OgL2An0ytE2LsfQQUAccEsB\nph2Wc15rba+stX4EfgbeBjDGRAKjgLuttQlFeV8iIiIiIkXJVdIBiIiIiIj8WxljqgNXANcCE4wx\nFay1+ws53f3ABmNMV+AGYKG1dmoRhSoiIiIiclYowSwiIiIiUni3AMuttRONMX8BA4G3CjORtfag\nMeYBYBxggUZFF6aIiIiIyNmhEhkiIiIiIoV3CzA+6/PxnLpMxnvGmPgcr5eP618CRACzrbWxRRyr\niIiIiEiRU4JZRERERKQQjDHtgJrAt1lN44GmxpgLT3LZ/dbayByvZ4/rHwN8CfQwxlxS9FGLiIiI\niBQtlcgQERERESmcQYABVhtjjm9ffbqTGWNuA6oCVwHLgf8zxlxkrc0oglhFRERERM4K7WAWERER\nETlNxphA4DrgDuDCHK/7gAHGmNPayGGMqQSMBIZaa9OBj4FDwNNFGbeIiIiISFFTgllERERE5PT1\nBVKBL621+4++gLH43iXY7QTXfWCMSc7xWpHV/hHwrbV2MYC11gJDgQeNMY3P7q2IiIiIiBSe8T27\nioiIiIiIiIiIiIicHu1gFhEREREREREREZFCUYJZRERERERERERERApFCWYRERERERERERERKRQl\nmEVERERERERERESkUFwlHcCplClTxtaoUaOkwxARERERERERERE5b6xYseKgtbbsqcad8wnmGjVq\nsHz58pIOQ0REREREREREROS8YYzZUZBxKpEhIiIiIiIiIiIiIoWiBLOIiIiIiIiIiIiIFIoSzCIi\nIiIiIiIiIiJSKEowi4iIiIiIiIiIiEihKMEsIiIiIiIiIiIiIoWiBLOIiIiIiIiIiIiIFIqrpAMQ\nERERERERERGRfw9PcjJHFswhbdaXRF1WCWeQX95BFw6Aqq0KvYbNzCRl5SqOfPc+nkP78x0TVC2S\nyJaV83YEhkP7p8EVUOj1peAKlGA2xmwHkgAP4LbWtsjR9wjwJlDWWnvQGGOAd4EeQAow2Fq7Mmvs\nIOCZrEtfsdZ+UVQ3IiIiIiIiIiIiImdHxvbtJC1YQPLChaQsWw5uNwDOhPVENbW5B6fFw+FtcMuU\n01rDffgwRxYtInnhQpIX/4I3KQmMxRmYd6x1Q8IfuwjzX40zZx7ZeiDlEFRpBQ2vOs27lMI4nR3M\n7a21B3M2GGOqAl2AnTmauwN1s16tgVFAa2NMaeB5oAVggRXGmKnW2sNnEL+IiIiIiIiIiIicJYnT\npxP77ntk7NgBgH/t2pS+tCphjmXEbG1CYmpZooZ/l/uiWU/DH2MgPRkCQk+5RvLiXzg4ahSpq1aB\ntTjLlCGsZT1C0+cQ0qE7zgGfgTG5rkldvZrtN9xIUuORRPbrm93hzoA3asLmuUowF5MzrcH8NvAY\nvoTxUX2AL63PEiDSGFMR6ArMsdbGZSWV5wDdznB9ERERERERERERKWLWWg5+8gl7Hn4ER2go5Z95\nhtpz51D72Z6Ur/Arwf3uJvyagaT9+ScZu3fnvrhuZ/BkwPZfTrpGxu7d7Lp3GLuGDsUdG0uZe+6h\nxvffUXf8f6lUaQ7hbZvhvO7jPMllgMBmzXBVqkjizBm5O1z+UOtK2DwPrM1znRS9giaYLTDbGLPC\nGHMHgDGmD7DHWrvmuLGVgV05vt6d1Xai9jyMMXcYY5YbY5bHxsYWMEQRERERERERERE5U9bjIebl\nV4h967+E9+hB9W/GU/qmgfinrIdZT0GDq6DjC4R16w5A4ozjkrzVLgG/YN8u4nx4U1OJfe89tvbo\nyZHff6fsww9Ta9pPlL1vGEFVIzETBkBoObh+HPjlUx8DMMYQ3q07R379DU98/LH2yRuW8FSmg+1H\n9sLBf4rmGyInVdASGZdaa/cYY8oBc4wxfwNP4SuPUeSstWOAMQAtWrTQnxpERERERERERESKgTct\njb3Dh5M0Zy6lhwyh3KOPYBwO2L8OfhgCFZpC/zHgcOBfpTKBzS4gccYMygwdmj2JKwBqXg6b5/h2\nEWftQLbWkjR7DjEjXse9dx/hPXtSbvij+FWo4LsuPQnGXw/uNBj0I4SWPWms4d27Ezd2LAlz5jCx\nRjhj131KsuMvAH6qUpGGU4ZxY9uX6duozVn5XolPgRLM1to9WR8PGGOigSuAmsAa35l+VAFWGmNa\nAXuAqjkur5LVtge48rj2BWcWvoiIiIiIiIiIiBQF9+HD7L77HlLXrKH8U09R+pabfR1J+32J34Bw\nuHEC+IcA8OuOvzjQsCoNvp3GsI8f4p/QNJIyD5HqPcxFJoi3E3YSGbcVomoDcPD9Dzj40UcE1K9P\n5a9GENyyZfbiXg/8cBvE/g0Dv4dyDU4Zr6NBfY6UKcXcT1/jvRszwYbTttQgbmnWk0U/Xcek4P08\nu2woI5Y2ZEiT27jt4s44HGdaMViOd8rvqDEmxBgTdvRzfLuWl1lry1lra1hra+Ard9HcWrsfmArc\nYnzaAAnW2n3ALKCLMaaUMaZU1jyzzs5tiYiIiIiIiIiISEFl7NrFjhsHkLZhA5XfeSc7uZyRAt/c\nCKlxMOBbCK8IwIdLfuTO+dfzTtRMAEqvmMP+9HW4bTqhzrIsd2ynd5WKfDD7DbxeL+7Dhzn02WeE\ndelCzYk/5E4uA8x+BjbNgh5vQJ2OJ43V7fHw/LwvaPVlV2bXjafRjlT6hd7ObwPnMrr3o7Sr3pAn\na/di3p4DtIu4kWS7m/c2PErLz3sxbePyIv/ene8KkrIvD/xijFkD/AFMs9bOPMn46cBWYDPwCXAP\ngLU2DngZWJb1eimrTUREREREREREREqItZZdd92N+/Bhqn3+GeFdc1TFXfwW7F0FV38KFZsBkJKZ\nzifr38HlKcv9HcZA08YM2l+TtbctZtmQKfwyeDwj235OBbeD0Rm/0faL61j97lvYtDTK3n8fxnVc\nUYXN82DJR9DmHmh5+0lj/XbtItp80YdJu9/EZQKJ7HQHTgsPZVYgLCAoe2CdjoS70/i4UQt+GTCX\n7hXuI4N4nvzlIXbG68y3onTKBLO1dqu1tlnWq7G19tV8xtSw1h7M+txaa++11ta21ja11i7PMW6s\ntbZO1uuzor0VEREREREREREROV2pK1eSsWUL5R9/nODmzbM7rIU/v/PtKG7Q41jz47NH43EdYEjD\n+7m6cVvK9+5LxqbNpG/efGxMt3rN+aZyd545lIhN3Y6ZNJG/GlchrUqlvAEsGQWh5aHTiyeMccWe\nLXT8aiivrrqXDOK5uupwlg76kbsHPIB/jRp5Dxqs3g5cQbB5LhGBwbzR9Q5ebvMWXmcSAyc/hNfr\nLfT3S3JT0REREREREREREZHzWHx0NCY4OPfOZYA9KyF+JzTuf6xpe9wBFsR8TZi3EcPa9AIgrGsX\nMIbEGbmLHjjrd
"text/plain": [
"<Figure size 1440x720 with 3 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "QqqUcEpyAjDM",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 237
},
"outputId": "91218f66-f749-4b0d-9ef1-949345f0f7db"
},
"source": [
"from pyFTS.benchmarks import Measures\n",
"\n",
"rows = []\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" \n",
" dataset = get_dataset(dataset_name)\n",
" \n",
" for order in [2,3]:\n",
" \n",
" row = [order, dataset_name]\n",
" \n",
" test = dataset[train_split:train_split+200]\n",
"\n",
" model1 = cUtil.load_obj('model1'+dataset_name+str(order))\n",
"\n",
" row.extend(Measures.get_point_statistics(test, model1))\n",
"\n",
" rows.append(row)\n",
"\n",
" \n",
"pd.DataFrame(rows,columns=[\"Order\",\"Dataset\",\"RMSE\",\"SMAPE\",\"Theil's U\"])\n",
" \n",
" \n",
" "
],
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Order</th>\n",
" <th>Dataset</th>\n",
" <th>RMSE</th>\n",
" <th>SMAPE</th>\n",
" <th>Theil's U</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>TAIEX</td>\n",
" <td>113.68</td>\n",
" <td>1.99</td>\n",
" <td>1.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>TAIEX</td>\n",
" <td>114.77</td>\n",
" <td>2.02</td>\n",
" <td>1.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>SP500</td>\n",
" <td>13.26</td>\n",
" <td>1.01</td>\n",
" <td>2.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>SP500</td>\n",
" <td>13.65</td>\n",
" <td>1.03</td>\n",
" <td>2.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NASDAQ</td>\n",
" <td>50.55</td>\n",
" <td>2.49</td>\n",
" <td>2.11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>3</td>\n",
" <td>NASDAQ</td>\n",
" <td>47.91</td>\n",
" <td>2.32</td>\n",
" <td>1.99</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Order Dataset RMSE SMAPE Theil's U\n",
"0 2 TAIEX 113.68 1.99 1.72\n",
"1 3 TAIEX 114.77 2.02 1.75\n",
"2 2 SP500 13.26 1.01 2.42\n",
"3 3 SP500 13.65 1.03 2.48\n",
"4 2 NASDAQ 50.55 2.49 2.11\n",
"5 3 NASDAQ 47.91 2.32 1.99"
]
},
"metadata": {
"tags": []
},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "pfAIMY8NAjDR",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 749
},
"outputId": "c73ad657-4c10-4c5f-d798-b26d1712668b"
},
"source": [
"fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])\n",
"\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
" \n",
" for order in [2,3]:\n",
" \n",
" ax[count].plot(dataset[train_split:train_split+200])\n",
"\n",
" model1 = cUtil.load_obj('model2'+dataset_name+str(order))\n",
"\n",
" forecasts = model1.predict(dataset[train_split:train_split+200])\n",
"\n",
" ax[count].plot(forecasts)\n",
"\n",
" ax[count].set_title(dataset_name)\n",
" \n",
"plt.tight_layout()"
],
"execution_count": 18,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZgAAALICAYAAADyhJW9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xd01MXXx/H3bHpIgUCooUsRUUER\nFRVBkV5EpYM0QcXHXsH2U7CioChVegcR6YgUKYIIoQhIkd5LQhKSkL47zx8bgRhKCIGgfF7n5GQz\n7XtnPZyTXGfvGGstIiIiIiIiIiIiIiKXy5HbAYiIiIiIiIiIiIjIv5MSzCIiIiIiIiIiIiKSLUow\ni4iIiIiIiIiIiEi2KMEsIiIiIiIiIiIiItmiBLOIiIiIiIiIiIiIZIsSzCIiIiIiIiIiIiKSLUow\ni4iIiIiIiIiIiEi2KMEsIiIiInIexpj4c75cxpjEc35ud864TsYYa4xp9Y/5tYwxh875eakxJukf\n685O72tijDlmjAk5Z3wzY8xhY0zwtdiviIiIiEh2KMEsIiIiInIe1tqAv7+AA0CTc9omnDO0IxAF\nPJmFZf/v3HWttU3SnzUbWAL0BzDG5AUGA89aa0/l5L5ERERERHKSZ24HICIiIiLyb2WMKQk8CLQA\nphhjCltrj2VzuReArcaYekBrYJm1dlYOhSoiIiIiclUowSwiIiIikn1PAuHW2h+MMduAdsCX2VnI\nWhtpjHkRmABYoFLOhSkiIiIicnWoRIaIiIiISPY9CUxMfz2RS5fJGGCMiTnnq/c/+lcDwcDP1tqI\nHI5VRERERCTHKcEsIiIiIpINxpj7gNLA5PSmicCtxpgqF5n2grU27zlf7/6jfxgwFmhojLk356MW\nEREREclZKpEhIiIiIpI9HQEDbDTG/LN94+UuZozpChQHGgPhwHBjTFVrbUoOxCoiIiIiclXoBLOI\niIiIyGUyxvgCLYHuQJVzvp4H2hpjLusghzGmKNAX6GatTQaGACeBt3MybhERERGRnKYEs4iIiIjI\n5XsUSATGWmuP/f0FjMT9KcH6F5j3rTEm/pyvdentg4DJ1toVANZaC3QDXjLG3HJ1tyIiIiIikn3G\n/buriIiIiIiIiIiIiMjl0QlmEREREREREREREckWJZhFREREREREREREJFuUYBYRERERERERERGR\nbFGCWURERERERERERESyxTO3A7iUAgUK2FKlSuV2GCIiIiIiIiIiIiI3jHXr1kVaa0MvNe66TzCX\nKlWK8PDw3A5DRERERERERERE5IZhjNmflXEqkSEiIiIiIiIiIiIi2aIEs4iIiIiIiIiIiIhkixLM\nIiIiIiIiIiIiIpItWUowG2P2GWM2G2M2GmPC/9H3qjHGGmMKpP9sjDEDjDG7jDGbjDF3nDO2ozFm\nZ/pXx5zdioiIiIiIiIiIiIhcS5dzyV9ta23kuQ3GmOJAXeDAOc0NgHLpX3cDg4G7jTEhwPtANcAC\n64wxs6y10VcQv4iIiIiIiIiIiIjkkistkdEfeAN3wvhvzYCx1m01kNcYUwSoByy01kalJ5UXAvWv\n8PkiIiIiIiIiIiIikkuymmC2wM/GmHXGmO4AxphmwGFr7R//GFsMOHjOz4fS2y7UnokxprsxJtwY\nEx4REZHFEEVERERERERERORGNumPZXT44UMW7tyY26HcMLJaIuN+a+1hY0xBYKExZjvQC3d5jBxn\nrR0GDAOoVq2avcRwERERERERERERuUG5XC5GrV/IsM3DSHD8BcDGVd8TsuoOXrmrB80q3Z3LEf63\nZSnBbK09nP79hDHmR+BBoDTwhzEGIAxYb4ypDhwGip8zPSy97TBQ6x/tS68sfBEREREREREREckN\nG47sZe3h7eyLOcLhuGNEJB7jVGokCc4oSvhXZmjjdygcmO+qPd/lcvHt6tmM3T6CZI+9GBtMzfxd\n6Xbno3y5aiwbT83hnbVP8fmaW3n+jmdofVvNqxbLjcxYe/EDwsaYPIDDWhuX/noh8KG19qdzxuwD\nqllrI40xjYD/AxrivuRvgLW2evolf+uAO9KnrQfutNZGXez51apVs+Hh4dnbnYiIiIiIiIiIiOS4\nkeE/02/L6xjjOtvoDMCbELxNHuLMdhyuILpUfI2Xajya7ee4kpIwPj6kH3J1t7lcDFg9i7Hbh5Lq\ncQiTFkLdYm14v3ZHAn38zow7EhvFu0u+Y03Uj+BxmjyuCnz64P+oVaZytuO5kRhj1llrq11yXBYS\nzGWAH9N/9AQmWms/+seYfZxNMBvgW9wX+CUAna214enjuuAurQHwkbV21KUCVIJZRERERERERETk\n+pGSlsY9Y5uQZhN4tep7VCxQgooFixHs639mzA9/rqLP6g9I8zxCqKnO4Aa9qRBa9LKekxYdzd6m\nzQisW5fC774DwIq9W3lrWR9izWY80grSpGQHej7YBn8vnwuuczIhjveWjGD5iUl42AAWt5lBfv/A\n7G3+BpJjCebcpgSziIiIiIiIiIjI9eOdRaOYebgfLUq8yXu1219w3OnkZJ6f34810VMw1pumxZ+h\n98OdcDgcWXrOsQ97Ez1xIhhD3nFjeGnvTP48PQdjvXigQFu+qN/joonlfxq1biFfbn6V4l4PMr/d\nN1med6PKaoI5a/81RURERERERERE5IaTvHMnUeMn4Iw/DUBM4mlmHRiJt7MkvWq2uejcPD4+jHy0\nJ98+OB4/ijHrSH8eGtcVl8t10XkASX/9RfTkyQQ1aUJyHj9+e6sLW0/PoJhnDb5v/CODmrx8Wcll\ngM53PsJtAY9xKG0pHy2deFlz5cKUYBYREREREREREZFMEjZsYF/bdhzv04fdDz9M5NBhvDWrP9Yj\nhheqvIynh0eW1qlVpjKrOk7j1jzNOUk4Ezctu+h4ay0nPv0UR0AAXcscYMz9Sdx8KI2+KU+xoP0g\nKoaGZXtPw5v1xNdZhsl7+7P20K5sryNnKcEsIiIiIiIiIiIiGZz+7TcOdH0Kj5B8hA0ehG+V24no\n35/On06g1W+F6FDhnstaz9PDg4EN3wCnP9/9MfqiY+N/+YXTq35ja6MHOez/JzEPtMa7fHlumjwb\nV1KSe9DmaTDrBTix/bLi8PfyYXDdLwF49udXSEhNvqz5kpkSzCIiIiIiIiIiInJG3JJfOPj0M3gX\nK0ap8eMJrF2bEkOHMrBrDXYVMTy+9DC76jzCyREjsVkod/G3fP4B3BpUj5N2A6sP7DjvGFdKCsc/\n+wzvMmX4tOg2TFoIA5u+QeFevUg9coSo0aMh5gDMeh7Wj4FB98CUDnD0jyzHUS3sJlqXfplkj708\nNfOTLM+T81OCWURERERERERERAA4NXcuh154AZ8KFSg5biyeoaEArNi7laWh4UxqX5dSkyfhW6kS\nJ/r2JXbOnMta/+0HugEOPl01/Lz90ePGkbr/AJta1CfBex91irbG38uHPPfcTeAjjxA5dBipk14E\nDHRfBjVfgz1LYWhNmNASDq7NWhy12hLm+SCb4qczat3Cy9qDZKQEs4iIiIiIiIiIiBD9/fccee11\n/KtUocSokXjkzQunT8Kepbyz/DOwXnzxyGv4ValC8eHf4Vm0CLFz513WM24pVJyiHvewK3EJh05F\nZehLi4wkctBgAh58kL78Cs5A3q/d8Ux/wTdeh9QUImauh9q9oGgVeOgdeGmz+/uhtTCiDoxpCofC\nLxnL+Oaf4OkMpf8fH7An6vhl7UPO8sztAERERERERERERCR3RU2cyPEPe5PngQcIG/A1Dj8/d8ec\nF1m/52eiihbiruBWZy7YM8YQVL8BUePG4Tx1Co/g4Cw/6/lqXem1ZiV9lo9iSJNXz7RHfP01ruRk\n/niiHnFH3qNGvo4E+/qf6fcODSLkljRO/uFPPr8a+P3d4ZcXar4Odz8L60bBr1/B8IehUjN46D0o\ncNN548jvH0jvG
"text/plain": [
"<Figure size 1440x720 with 3 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "EzXq3Dq8AjDU",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 237
},
"outputId": "d36c04d0-4f4d-45aa-aeef-b0b683389a04"
},
"source": [
"from pyFTS.benchmarks import Measures\n",
"\n",
"rows = []\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" \n",
" dataset = get_dataset(dataset_name)\n",
" \n",
" for order in [2,3]:\n",
" \n",
" row = [order, dataset_name]\n",
" \n",
" test = dataset[train_split:train_split+200]\n",
"\n",
" model1 = cUtil.load_obj('model2'+dataset_name+str(order))\n",
"\n",
" row.extend(Measures.get_point_statistics(test, model1))\n",
"\n",
" rows.append(row)\n",
"\n",
" \n",
"pd.DataFrame(rows,columns=[\"Order\",\"Dataset\",\"RMSE\",\"SMAPE\",\"Theil's U\"])\n",
" \n",
" \n",
" "
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Order</th>\n",
" <th>Dataset</th>\n",
" <th>RMSE</th>\n",
" <th>SMAPE</th>\n",
" <th>Theil's U</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>TAIEX</td>\n",
" <td>105.91</td>\n",
" <td>1.87</td>\n",
" <td>1.61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>TAIEX</td>\n",
" <td>114.56</td>\n",
" <td>2.04</td>\n",
" <td>1.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>SP500</td>\n",
" <td>7.60</td>\n",
" <td>0.57</td>\n",
" <td>1.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>SP500</td>\n",
" <td>5.99</td>\n",
" <td>0.43</td>\n",
" <td>1.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NASDAQ</td>\n",
" <td>26.90</td>\n",
" <td>1.31</td>\n",
" <td>1.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>3</td>\n",
" <td>NASDAQ</td>\n",
" <td>26.22</td>\n",
" <td>1.27</td>\n",
" <td>1.09</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Order Dataset RMSE SMAPE Theil's U\n",
"0 2 TAIEX 105.91 1.87 1.61\n",
"1 3 TAIEX 114.56 2.04 1.75\n",
"2 2 SP500 7.60 0.57 1.38\n",
"3 3 SP500 5.99 0.43 1.09\n",
"4 2 NASDAQ 26.90 1.31 1.12\n",
"5 3 NASDAQ 26.22 1.27 1.09"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hm04eYYZAjDZ",
"colab_type": "text"
},
"source": [
"## Residual Analysis"
]
},
{
"cell_type": "code",
"metadata": {
"id": "FjEI3ZAfAjDa",
"colab_type": "code",
"colab": {}
},
"source": [
"from pyFTS.benchmarks import ResidualAnalysis as ra\n",
"\n",
"for count,dataset_name in enumerate(dataset_names):\n",
" dataset = get_dataset(dataset_name)\n",
"\n",
" for order in [2,3]:\n",
" \n",
" model1 = cUtil.load_obj('model1'+dataset_name+str(order))\n",
" model2 = cUtil.load_obj('model2'+dataset_name+str(order))\n",
"\n",
" ra.plot_residuals_by_model(dataset, [model1, model2],order=order)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "iy_Rb0xrAjDf",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "A7ZRiD2jAjDl",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}