This commit is contained in:
Aleksey Filippov 2024-11-01 11:04:05 +04:00
commit 1710432d38
29 changed files with 10210 additions and 0 deletions

2
.flake8 Normal file
View File

@ -0,0 +1,2 @@
[flake8]
max-line-length = 120

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* text=crlf

278
.gitignore vendored Normal file
View File

@ -0,0 +1,278 @@
# Created by https://www.toptal.com/developers/gitignore/api/python,pycharm+all
# Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm+all
### PyCharm+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### PyCharm+all Patch ###
# Ignores the whole .idea folder and all .iml files
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
.idea/*
# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
*.iml
modules.xml
.idea/misc.xml
*.ipr
# Sonarlint plugin
.idea/sonarlint
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
# End of https://www.toptal.com/developers/gitignore/api/python,pycharm+all
# JS
node_modules/
test.csv

Binary file not shown.

Binary file not shown.

Binary file not shown.

18
data/density_test.csv Normal file
View File

@ -0,0 +1,18 @@
T;Al2O3;TiO2;Density
30;0;0;1,05696
55;0;0;1,04158
25;0,05;0;1,08438
30;0,05;0;1,08112
35;0,05;0;1,07781
40;0,05;0;1,07446
60;0,05;0;1,06053
35;0,3;0;1,17459
65;0,3;0;1,14812
45;0;0,05;1,07424
50;0;0,05;1,07075
55;0;0,05;1,06721
20;0;0,3;1,22417
30;0;0,3;1,2131
40;0;0,3;1,20265
60;0;0,3;1,18265
70;0;0,3;1,17261
1 T Al2O3 TiO2 Density
2 30 0 0 1,05696
3 55 0 0 1,04158
4 25 0,05 0 1,08438
5 30 0,05 0 1,08112
6 35 0,05 0 1,07781
7 40 0,05 0 1,07446
8 60 0,05 0 1,06053
9 35 0,3 0 1,17459
10 65 0,3 0 1,14812
11 45 0 0,05 1,07424
12 50 0 0,05 1,07075
13 55 0 0,05 1,06721
14 20 0 0,3 1,22417
15 30 0 0,3 1,2131
16 40 0 0,3 1,20265
17 60 0 0,3 1,18265
18 70 0 0,3 1,17261

39
data/density_train.csv Normal file
View File

@ -0,0 +1,39 @@
T;Al2O3;TiO2;Density
20;0;0;1,0625
25;0;0;1,05979
35;0;0;1,05404
40;0;0;1,05103
45;0;0;1,04794
50;0;0;1,04477
60;0;0;1,03826
65;0;0;1,03484
70;0;0;1,03182
20;0,05;0;1,08755
45;0,05;0;1,07105
50;0,05;0;1,0676
55;0,05;0;1,06409
65;0,05;0;1,05691
70;0,05;0;1,05291
20;0,3;0;1,18861
25;0,3;0;1,18389
30;0,3;0;1,1792
40;0,3;0;1,17017
45;0,3;0;1,16572
50;0,3;0;1,16138
55;0,3;0;1,15668
60;0,3;0;1,15233
70;0,3;0;1,14414
20;0;0,05;1,09098
25;0;0,05;1,08775
30;0;0,05;1,08443
35;0;0,05;1,08108
40;0;0,05;1,07768
60;0;0,05;1,06362
65;0;0,05;1,05999
70;0;0,05;1,05601
25;0;0,3;1,2186
35;0;0,3;1,20776
45;0;0,3;1,19759
50;0;0,3;1,19268
55;0;0,3;1,18746
65;0;0,3;1,178
1 T Al2O3 TiO2 Density
2 20 0 0 1,0625
3 25 0 0 1,05979
4 35 0 0 1,05404
5 40 0 0 1,05103
6 45 0 0 1,04794
7 50 0 0 1,04477
8 60 0 0 1,03826
9 65 0 0 1,03484
10 70 0 0 1,03182
11 20 0,05 0 1,08755
12 45 0,05 0 1,07105
13 50 0,05 0 1,0676
14 55 0,05 0 1,06409
15 65 0,05 0 1,05691
16 70 0,05 0 1,05291
17 20 0,3 0 1,18861
18 25 0,3 0 1,18389
19 30 0,3 0 1,1792
20 40 0,3 0 1,17017
21 45 0,3 0 1,16572
22 50 0,3 0 1,16138
23 55 0,3 0 1,15668
24 60 0,3 0 1,15233
25 70 0,3 0 1,14414
26 20 0 0,05 1,09098
27 25 0 0,05 1,08775
28 30 0 0,05 1,08443
29 35 0 0,05 1,08108
30 40 0 0,05 1,07768
31 60 0 0,05 1,06362
32 65 0 0,05 1,05999
33 70 0 0,05 1,05601
34 25 0 0,3 1,2186
35 35 0 0,3 1,20776
36 45 0 0,3 1,19759
37 50 0 0,3 1,19268
38 55 0 0,3 1,18746
39 65 0 0,3 1,178

BIN
data/dtree.model.sav Normal file

Binary file not shown.

3
data/readme.md Normal file
View File

@ -0,0 +1,3 @@
Dataset on fuzzy logic based-modelling and optimization of thermophysical properties of nanofluid mixture
https://www.sciencedirect.com/science/article/pii/S2352340919309023

18
data/viscosity_test.csv Normal file
View File

@ -0,0 +1,18 @@
T;Al2O3;TiO2;Viscosity
30;0;0;2,716
40;0;0;2,073
60;0;0;1,329
65;0;0;1,211
25;0,05;0;4,12
45;0,05;0;2,217
65;0,05;0;1,315
70;0,05;0;1,105
45;0,3;0;3,111
50;0,3;0;2,735
65;0,3;0;1,936
30;0;0,05;3,587
55;0;0,05;1,953
65;0;0,05;1,443
40;0;0,3;3,99
50;0;0,3;3,189
65;0;0,3;2,287
1 T Al2O3 TiO2 Viscosity
2 30 0 0 2,716
3 40 0 0 2,073
4 60 0 0 1,329
5 65 0 0 1,211
6 25 0,05 0 4,12
7 45 0,05 0 2,217
8 65 0,05 0 1,315
9 70 0,05 0 1,105
10 45 0,3 0 3,111
11 50 0,3 0 2,735
12 65 0,3 0 1,936
13 30 0 0,05 3,587
14 55 0 0,05 1,953
15 65 0 0,05 1,443
16 40 0 0,3 3,99
17 50 0 0,3 3,189
18 65 0 0,3 2,287

39
data/viscosity_train.csv Normal file
View File

@ -0,0 +1,39 @@
T;Al2O3;TiO2;Viscosity
20;0;0;3,707
25;0;0;3,18
35;0;0;2,361
45;0;0;1,832
50;0;0;1,629
55;0;0;1,465
70;0;0;1,194
20;0,05;0;4,66
30;0,05;0;3,38
35;0,05;0;2,874
40;0,05;0;2,489
50;0,05;0;1,897
55;0,05;0;1,709
60;0,05;0;1,47
20;0,3;0;6,67
25;0,3;0;5,594
30;0,3;0;4,731
35;0,3;0;4,118
40;0,3;0;3,565
55;0,3;0;2,426
60;0,3;0;2,16
70;0,3;0;1,728
20;0;0,05;4,885
25;0;0,05;4,236
35;0;0,05;3,121
40;0;0,05;2,655
45;0;0,05;2,402
50;0;0,05;2,109
60;0;0,05;1,662
70;0;0,05;1,289
20;0;0,3;7,132
25;0;0,3;5,865
30;0;0,3;4,944
35;0;0,3;4,354
45;0;0,3;3,561
55;0;0,3;2,838
60;0;0,3;2,538
70;0;0,3;1,9097
1 T Al2O3 TiO2 Viscosity
2 20 0 0 3,707
3 25 0 0 3,18
4 35 0 0 2,361
5 45 0 0 1,832
6 50 0 0 1,629
7 55 0 0 1,465
8 70 0 0 1,194
9 20 0,05 0 4,66
10 30 0,05 0 3,38
11 35 0,05 0 2,874
12 40 0,05 0 2,489
13 50 0,05 0 1,897
14 55 0,05 0 1,709
15 60 0,05 0 1,47
16 20 0,3 0 6,67
17 25 0,3 0 5,594
18 30 0,3 0 4,731
19 35 0,3 0 4,118
20 40 0,3 0 3,565
21 55 0,3 0 2,426
22 60 0,3 0 2,16
23 70 0,3 0 1,728
24 20 0 0,05 4,885
25 25 0 0,05 4,236
26 35 0 0,05 3,121
27 40 0 0,05 2,655
28 45 0 0,05 2,402
29 50 0 0,05 2,109
30 60 0 0,05 1,662
31 70 0 0,05 1,289
32 20 0 0,3 7,132
33 25 0 0,3 5,865
34 30 0 0,3 4,944
35 35 0 0,3 4,354
36 45 0 0,3 3,561
37 55 0 0,3 2,838
38 60 0 0,3 2,538
39 70 0 0,3 1,9097

BIN
data/vtree.model.sav Normal file

Binary file not shown.

956
density_fuzzy.ipynb Normal file

File diff suppressed because one or more lines are too long

795
density_regression.ipynb Normal file
View File

@ -0,0 +1,795 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.06250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05979</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05404</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 20 0.0 0.0 1.06250\n",
"1 25 0.0 0.0 1.05979\n",
"2 35 0.0 0.0 1.05404"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.05696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.04158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" <td>1.08438</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 30 0.00 0.0 1.05696\n",
"1 55 0.00 0.0 1.04158\n",
"2 25 0.05 0.0 1.08438"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"density_train = pd.read_csv(\"data/density_train.csv\", sep=\";\", decimal=\",\")\n",
"density_test = pd.read_csv(\"data/density_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(density_train.head(3))\n",
"display(density_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 20 0.0 0.0\n",
"1 25 0.0 0.0\n",
"2 35 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.06250\n",
"1 1.05979\n",
"2 1.05404\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 30 0.00 0.0\n",
"1 55 0.00 0.0\n",
"2 25 0.05 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.05696\n",
"1 1.04158\n",
"2 1.08438\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"density_y_train = density_train[\"Density\"]\n",
"density_train = density_train.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_train.head(3))\n",
"display(density_y_train.head(3))\n",
"\n",
"density_y_test = density_test[\"Density\"]\n",
"density_test = density_test.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_test.head(3))\n",
"display(density_y_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" density_train.values, density_y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(density_train.values)\n",
" y_test_pred = fitted_model.predict(density_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(density_y_test, y_test_pred)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_472ca_row0_col0, #T_472ca_row0_col1, #T_472ca_row4_col0 {\n",
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row0_col2, #T_472ca_row6_col3 {\n",
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row0_col3, #T_472ca_row1_col3, #T_472ca_row2_col3, #T_472ca_row6_col2 {\n",
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row1_col0, #T_472ca_row1_col1 {\n",
" background-color: #26828e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row1_col2 {\n",
" background-color: #5c01a6;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row2_col0 {\n",
" background-color: #25848e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row2_col1 {\n",
" background-color: #24868e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row2_col2 {\n",
" background-color: #6a00a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row3_col0 {\n",
" background-color: #25858e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row3_col1 {\n",
" background-color: #238a8d;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row3_col2 {\n",
" background-color: #7a02a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row3_col3, #T_472ca_row4_col3 {\n",
" background-color: #d9586a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row4_col1 {\n",
" background-color: #228c8d;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row4_col2 {\n",
" background-color: #8104a7;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row5_col0, #T_472ca_row5_col1 {\n",
" background-color: #1e9c89;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row5_col2 {\n",
" background-color: #a01a9c;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row5_col3 {\n",
" background-color: #d35171;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_472ca_row6_col0, #T_472ca_row6_col1 {\n",
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"</style>\n",
"<table id=\"T_472ca\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_472ca_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_472ca_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_472ca_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_472ca_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row0\" class=\"row_heading level0 row0\" >linear_poly</th>\n",
" <td id=\"T_472ca_row0_col0\" class=\"data row0 col0\" >0.000319</td>\n",
" <td id=\"T_472ca_row0_col1\" class=\"data row0 col1\" >0.000362</td>\n",
" <td id=\"T_472ca_row0_col2\" class=\"data row0 col2\" >0.016643</td>\n",
" <td id=\"T_472ca_row0_col3\" class=\"data row0 col3\" >0.999965</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row1\" class=\"row_heading level0 row1\" >linear_interact</th>\n",
" <td id=\"T_472ca_row1_col0\" class=\"data row1 col0\" >0.001131</td>\n",
" <td id=\"T_472ca_row1_col1\" class=\"data row1 col1\" >0.001491</td>\n",
" <td id=\"T_472ca_row1_col2\" class=\"data row1 col2\" >0.033198</td>\n",
" <td id=\"T_472ca_row1_col3\" class=\"data row1 col3\" >0.999413</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row2\" class=\"row_heading level0 row2\" >linear</th>\n",
" <td id=\"T_472ca_row2_col0\" class=\"data row2 col0\" >0.002464</td>\n",
" <td id=\"T_472ca_row2_col1\" class=\"data row2 col1\" >0.003261</td>\n",
" <td id=\"T_472ca_row2_col2\" class=\"data row2 col2\" >0.049891</td>\n",
" <td id=\"T_472ca_row2_col3\" class=\"data row2 col3\" >0.997191</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row3\" class=\"row_heading level0 row3\" >random_forest</th>\n",
" <td id=\"T_472ca_row3_col0\" class=\"data row3 col0\" >0.002716</td>\n",
" <td id=\"T_472ca_row3_col1\" class=\"data row3 col1\" >0.005575</td>\n",
" <td id=\"T_472ca_row3_col2\" class=\"data row3 col2\" >0.067298</td>\n",
" <td id=\"T_472ca_row3_col3\" class=\"data row3 col3\" >0.991788</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
" <td id=\"T_472ca_row4_col0\" class=\"data row4 col0\" >0.000346</td>\n",
" <td id=\"T_472ca_row4_col1\" class=\"data row4 col1\" >0.006433</td>\n",
" <td id=\"T_472ca_row4_col2\" class=\"data row4 col2\" >0.076138</td>\n",
" <td id=\"T_472ca_row4_col3\" class=\"data row4 col3\" >0.989067</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row5\" class=\"row_heading level0 row5\" >ridge</th>\n",
" <td id=\"T_472ca_row5_col0\" class=\"data row5 col0\" >0.013989</td>\n",
" <td id=\"T_472ca_row5_col1\" class=\"data row5 col1\" >0.015356</td>\n",
" <td id=\"T_472ca_row5_col2\" class=\"data row5 col2\" >0.116380</td>\n",
" <td id=\"T_472ca_row5_col3\" class=\"data row5 col3\" >0.937703</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_472ca_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
" <td id=\"T_472ca_row6_col0\" class=\"data row6 col0\" >0.053108</td>\n",
" <td id=\"T_472ca_row6_col1\" class=\"data row6 col1\" >0.056776</td>\n",
" <td id=\"T_472ca_row6_col2\" class=\"data row6 col2\" >0.217611</td>\n",
" <td id=\"T_472ca_row6_col3\" class=\"data row6 col3\" >0.148414</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x21f02523b00>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\user\\Projects\\python\\fuzzy\\.venv\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n",
" _data = np.array(data, dtype=dtype, copy=copy,\n"
]
},
{
"data": {
"text/plain": [
"{'criterion': 'absolute_error', 'max_depth': 7}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from sklearn import model_selection\n",
"\n",
"parameters = {\n",
" \"criterion\": [\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"],\n",
" \"max_depth\": np.arange(1, 21).tolist()[0::2],\n",
" # \"min_samples_split\": np.arange(2, 11).tolist()[0::2],\n",
"}\n",
"\n",
"grid = model_selection.GridSearchCV(\n",
" tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1\n",
")\n",
"\n",
"grid.fit(density_train, density_y_train)\n",
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'RMSE_test': 0.006433043831746894,\n",
" 'RMAE_test': 0.07613841884048704,\n",
" 'R2_test': 0.989067217447684}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'RMSE_test': 0.005040505635233745,\n",
" 'RMAE_test': 0.06943469212568175,\n",
" 'R2_test': 0.9932880934907101}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = grid.best_estimator_\n",
"y_pred = model.predict(density_test)\n",
"old_metrics = {\n",
" \"RMSE_test\": models[\"decision_tree\"][\"RMSE_test\"],\n",
" \"RMAE_test\": models[\"decision_tree\"][\"RMAE_test\"],\n",
" \"R2_test\": models[\"decision_tree\"][\"R2_test\"],\n",
"}\n",
"new_metrics = {}\n",
"new_metrics[\"RMSE_test\"] = math.sqrt(metrics.mean_squared_error(density_y_test, y_pred))\n",
"new_metrics[\"RMAE_test\"] = math.sqrt(metrics.mean_absolute_error(density_y_test, y_pred))\n",
"new_metrics[\"R2_test\"] = metrics.r2_score(density_y_test, y_pred)\n",
"\n",
"display(old_metrics)\n",
"display(new_metrics)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- Al2O3 <= 0.18\n",
"| |--- TiO2 <= 0.18\n",
"| | |--- T <= 32.50\n",
"| | | |--- TiO2 <= 0.03\n",
"| | | | |--- Al2O3 <= 0.03\n",
"| | | | | |--- T <= 22.50\n",
"| | | | | | |--- value: [1.06]\n",
"| | | | | |--- T > 22.50\n",
"| | | | | | |--- value: [1.06]\n",
"| | | | |--- Al2O3 > 0.03\n",
"| | | | | |--- value: [1.09]\n",
"| | | |--- TiO2 > 0.03\n",
"| | | | |--- T <= 27.50\n",
"| | | | | |--- T <= 22.50\n",
"| | | | | | |--- value: [1.09]\n",
"| | | | | |--- T > 22.50\n",
"| | | | | | |--- value: [1.09]\n",
"| | | | |--- T > 27.50\n",
"| | | | | |--- value: [1.08]\n",
"| | |--- T > 32.50\n",
"| | | |--- TiO2 <= 0.03\n",
"| | | | |--- Al2O3 <= 0.03\n",
"| | | | | |--- T <= 55.00\n",
"| | | | | | |--- T <= 47.50\n",
"| | | | | | | |--- value: [1.05]\n",
"| | | | | | |--- T > 47.50\n",
"| | | | | | | |--- value: [1.04]\n",
"| | | | | |--- T > 55.00\n",
"| | | | | | |--- T <= 62.50\n",
"| | | | | | | |--- value: [1.04]\n",
"| | | | | | |--- T > 62.50\n",
"| | | | | | | |--- value: [1.03]\n",
"| | | | |--- Al2O3 > 0.03\n",
"| | | | | |--- T <= 60.00\n",
"| | | | | | |--- T <= 52.50\n",
"| | | | | | | |--- value: [1.07]\n",
"| | | | | | |--- T > 52.50\n",
"| | | | | | | |--- value: [1.06]\n",
"| | | | | |--- T > 60.00\n",
"| | | | | | |--- T <= 67.50\n",
"| | | | | | | |--- value: [1.06]\n",
"| | | | | | |--- T > 67.50\n",
"| | | | | | | |--- value: [1.05]\n",
"| | | |--- TiO2 > 0.03\n",
"| | | | |--- T <= 50.00\n",
"| | | | | |--- T <= 37.50\n",
"| | | | | | |--- value: [1.08]\n",
"| | | | | |--- T > 37.50\n",
"| | | | | | |--- value: [1.08]\n",
"| | | | |--- T > 50.00\n",
"| | | | | |--- T <= 67.50\n",
"| | | | | | |--- T <= 62.50\n",
"| | | | | | | |--- value: [1.06]\n",
"| | | | | | |--- T > 62.50\n",
"| | | | | | | |--- value: [1.06]\n",
"| | | | | |--- T > 67.50\n",
"| | | | | | |--- value: [1.06]\n",
"| |--- TiO2 > 0.18\n",
"| | |--- T <= 40.00\n",
"| | | |--- T <= 30.00\n",
"| | | | |--- value: [1.22]\n",
"| | | |--- T > 30.00\n",
"| | | | |--- value: [1.21]\n",
"| | |--- T > 40.00\n",
"| | | |--- T <= 60.00\n",
"| | | | |--- T <= 52.50\n",
"| | | | | |--- T <= 47.50\n",
"| | | | | | |--- value: [1.20]\n",
"| | | | | |--- T > 47.50\n",
"| | | | | | |--- value: [1.19]\n",
"| | | | |--- T > 52.50\n",
"| | | | | |--- value: [1.19]\n",
"| | | |--- T > 60.00\n",
"| | | | |--- value: [1.18]\n",
"|--- Al2O3 > 0.18\n",
"| |--- T <= 35.00\n",
"| | |--- T <= 22.50\n",
"| | | |--- value: [1.19]\n",
"| | |--- T > 22.50\n",
"| | | |--- T <= 27.50\n",
"| | | | |--- value: [1.18]\n",
"| | | |--- T > 27.50\n",
"| | | | |--- value: [1.18]\n",
"| |--- T > 35.00\n",
"| | |--- T <= 52.50\n",
"| | | |--- T <= 42.50\n",
"| | | | |--- value: [1.17]\n",
"| | | |--- T > 42.50\n",
"| | | | |--- T <= 47.50\n",
"| | | | | |--- value: [1.17]\n",
"| | | | |--- T > 47.50\n",
"| | | | | |--- value: [1.16]\n",
"| | |--- T > 52.50\n",
"| | | |--- T <= 65.00\n",
"| | | | |--- T <= 57.50\n",
"| | | | | |--- value: [1.16]\n",
"| | | | |--- T > 57.50\n",
"| | | | | |--- value: [1.15]\n",
"| | | |--- T > 65.00\n",
"| | | | |--- value: [1.14]\n",
"\n"
]
}
],
"source": [
"rules = tree.export_text(\n",
" model,\n",
" feature_names=density_train.columns.values.tolist()\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(model, open(\"data/dtree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

1699
density_tree.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
docs/path1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
docs/path2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

BIN
docs/path3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

BIN
docs/path4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

350
fluid.ipynb Normal file

File diff suppressed because one or more lines are too long

2955
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

2
poetry.toml Normal file
View File

@ -0,0 +1,2 @@
[virtualenvs]
in-project = true

21
pyproject.toml Normal file
View File

@ -0,0 +1,21 @@
[tool.poetry]
name = "fuzzy"
version = "1.0.0"
description = "Fuzzy Controller"
authors = ["Aleksey Filippov <al.filippov@ulstu.ru>"]
readme = "readme.md"
package-mode = false
[tool.poetry.dependencies]
python = "^3.12"
jupyter = "^1.1.1"
numpy = "^2.1.0"
pandas = "^2.2.2"
matplotlib = "^3.9.2"
scikit-learn = "^1.5.2"
scikit-fuzzy = "^0.5.0"
networkx = "^3.4.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

55
readme.md Normal file
View File

@ -0,0 +1,55 @@
## Окружение и примеры для выполнения лабораторных работ по дисциплине "Методы ИИ"
### Python
Используется Python версии 3.12
Установщик https://www.python.org/ftp/python/3.12.5/python-3.12.5-amd64.exe
### Poetry
Для создания и настройки окружения проекта необходимо установить poetry
**Для Windows (Powershell)**
```
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
```
**Linux, macOS, Windows (WSL)**
```
curl -sSL https://install.python-poetry.org | python3 -
```
**Добавление poetry в PATH**
1. Открыть настройки переменных среды \
\
<img src="docs/path1.png" width="300"> \
\
<img src="docs/path2.png" width="400"> \
2. Изменить переменную Path текущего пользователя \
\
<img src="docs/path3.png" width="500"> \
3. Добавление пути `%APPDATA%\Python\Scripts` до исполняемого файла poetry \
\
<img src="docs/path4.png" width="400">
### Создание окружения
```
poetry install
```
### Запуск тестового сервиса
Запустить тестовый сервис можно с помощью VSCode (см. launch.json в каталоге .vscode).
Также запустить тестовый сервис можно с помощью командной строки:
1. Активация виртуального окружения -- `poetry shell`
2. Запуск сервиса -- `python run.py`
Для выходы из виртуального окружения используется команду `exit`

36
src/cluster_helper.py Normal file
View File

@ -0,0 +1,36 @@
from typing import Dict
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame
from sklearn import cluster, metrics
def get_best_clusters_num(
X: DataFrame, random_state: int, max_clusters: int = 10
) -> Dict[int, float]:
silhouette_scores: Dict[int, float] = {}
for cluster_num in range(2, max_clusters + 1):
kmeans = cluster.KMeans(n_clusters=cluster_num, random_state=random_state)
labels = kmeans.fit_predict(X)
silhouette_scores[cluster_num] = float(metrics.silhouette_score(X, labels))
return silhouette_scores
def draw_best_clusters_plot(clusters_score: Dict[int, float]):
plt.figure(figsize=(4, 4))
plt.plot(list(clusters_score.keys()), list(clusters_score.values()), "bo-")
plt.xlabel("Clusters count", fontsize=8)
plt.ylabel("Silhouette score", fontsize=8)
plt.title("The Silhouette score")
plt.show()
def print_cluster_result(X: DataFrame, clusters_num: int, labels: np.ndarray):
for cluster_id in range(clusters_num):
cluster_indices = np.where(labels == cluster_id)[0]
print(f"Кластер {cluster_id + 1} ({len(cluster_indices)}):")
rules = [str(X.index[idx]) for idx in cluster_indices]
print(";\n".join(rules))
print("--------")

360
src/rules.py Normal file
View File

@ -0,0 +1,360 @@
import enum
import sys
from functools import reduce
from operator import and_
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
from skfuzzy.control.fuzzyvariable import FuzzyVariable
from skfuzzy.control.rule import Rule as FuzzyRule
from skfuzzy.control.term import Term
from sklearn.tree._tree import TREE_UNDEFINED # type: ignore
class ComparisonType(enum.Enum):
LESS = "<="
GREATER = ">"
EQUALS = "="
class RuleAtom:
def __init__(self, variable: str, type: ComparisonType, value: float) -> None:
self._variable = variable
self._type = type
self._value = value
def get_varaible(self) -> str:
return self._variable
def get_type(self) -> ComparisonType:
return self._type
def get_value(self) -> float:
return self._value
def __repr__(self) -> str:
return f"({self._variable} {self._type.value} {np.round(self._value, 3)})"
def __eq__(self, other: object) -> bool:
if id(self) == id(other):
return True
if not isinstance(other, RuleAtom):
return False
return (
self._variable == other._variable
and self._type == other._type
and self._value == other._value
)
class Rule:
def __init__(self, antecedent: List[RuleAtom], consequent: float) -> None:
self._antecedent = antecedent
self._consequent = consequent
def get_antecedent(self) -> List[RuleAtom]:
return self._antecedent
def set_antecedent(self, antecedent: List[RuleAtom]):
self._antecedent = []
self._antecedent.extend(antecedent)
def get_consequent(self) -> float:
return self._consequent
def set_consequent(self, value: float):
self._consequent = value
def __repr__(self) -> str:
return f"if {" and ".join([str(atom) for atom in self._antecedent])} -> {np.round(self._consequent, 3)}"
# https://mljar.com/blog/extract-rules-decision-tree/
def get_rules(tree, feature_names) -> List[Rule]:
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != TREE_UNDEFINED else "undefined!" for i in tree_.feature
]
rules: List[Rule] = []
antecedent: List[RuleAtom] = []
def recurse(node, antecedent, rules):
if tree_.feature[node] != TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
p1, p2 = list(antecedent), list(antecedent)
p1.append(RuleAtom(name, ComparisonType.LESS, threshold))
recurse(tree_.children_left[node], p1, rules)
p2.append(RuleAtom(name, ComparisonType.GREATER, threshold))
recurse(tree_.children_right[node], p2, rules)
else:
rules.append(Rule(antecedent, tree_.value[node][0][0]))
recurse(0, antecedent, rules)
# sort by values
values = [rule.get_consequent() for rule in rules]
sorted_index = list(np.argpartition(values, 1))
rules = [rules[i] for i in sorted_index]
return rules
# from
# if (Al2O3 <= 0.175) and (TiO2 <= 0.175) and (T > 32.5) and (TiO2 <= 0.025)
# and (Al2O3 <= 0.025) and (T > 55.0) and (T > 62.5)
# to
# if (Al2O3 <= 0.025) and (TiO2 <= 0.025) and (T > 32.5)
# if (Al2O3 <= 0.025) and (TiO2 <= 0.025) and (T > 32.5)
# max(<=)
# min(>)
def normalise_rules(rules: List[Rule]) -> List[Rule]:
for rule in rules:
dict: Dict[str, Dict[ComparisonType, float]] = {}
new_antecedent: List[RuleAtom] = []
for atom in rule.get_antecedent():
old_value: float | None = dict.get(atom.get_varaible(), {}).get(
atom.get_type(), None
)
new_value = 0
if atom.get_type() == ComparisonType.GREATER:
new_value = min(
old_value if old_value is not None else sys.maxsize,
atom.get_value(),
)
if atom.get_type() == ComparisonType.LESS:
new_value = max(
old_value if old_value is not None else -sys.maxsize - 1,
atom.get_value(),
)
if dict.get(atom.get_varaible(), None) is None:
dict[atom.get_varaible()] = {}
dict[atom.get_varaible()][atom.get_type()] = new_value
for key_var, other in dict.items():
for key_type, value in other.items():
new_antecedent.append(RuleAtom(key_var, key_type, value))
rule.set_antecedent(new_antecedent)
return rules
def _is_same_rules(rule1: Rule, rule2: Rule) -> bool:
antecedent1 = rule1.get_antecedent()
antecedent2 = rule2.get_antecedent()
if len(antecedent1) != len(antecedent2):
return False
match: int = len([atom for atom in antecedent1 if atom not in antecedent2])
return match == 0
def _get_rules_accum(rules: List[Rule]) -> Dict[str, Dict[str, float]]:
accum_dict: Dict[str, Dict[str, float]] = {}
for rule in rules:
key = str(rule.get_antecedent())
if accum_dict.get(key, None) is None:
accum_dict[key] = {}
cv = accum_dict[key].get("V", 0)
cv += rule.get_consequent()
cc = accum_dict[key].get("C", 0)
cc += 1
accum_dict[key]["V"] = cv
accum_dict[key]["C"] = cc
return accum_dict
def _recalculate_consequents(
accum_dict: Dict[str, Dict[str, float]], rules: List[Rule]
) -> List[Rule]:
for rule in rules:
key: str = str(rule.get_antecedent())
value: float = accum_dict[key]["V"]
count: int = int(accum_dict[key]["C"])
if count == 1:
continue
rule.set_consequent(value / count)
return rules
def delete_same_rules(rules: List[Rule]) -> List[Rule]:
same_rules: List[int] = []
accum_dict: Dict[str, Dict[str, float]] = _get_rules_accum(rules)
for rule1_index, rule1 in enumerate(rules):
for rule2_index, rule2 in enumerate(rules):
if rule1_index >= rule2_index:
continue
if _is_same_rules(rule1, rule2):
same_rules.append(rule1_index)
break
cleared_rules = [
rule for index, rule in enumerate(rules) if index not in same_rules
]
return _recalculate_consequents(accum_dict, cleared_rules)
def get_features(rules: List[Rule], exclude: List[str] | None = None) -> List[str]:
atoms: List[str] = []
for rule in rules:
for atom in rule.get_antecedent():
if exclude is not None and atom.get_varaible() in exclude:
continue
if str(atom) in atoms:
continue
atoms.append(str(atom))
atoms.sort()
return atoms
def vectorize_rules(rules: List[Rule], features: List[str]) -> pd.DataFrame:
columns: List[str] = []
columns.append("rule")
columns.extend(features)
columns.append("consequent")
df = pd.DataFrame(columns=columns)
for rule in rules:
data = [str(rule)]
mask = np.isin(list(features), [str(atom) for atom in rule.get_antecedent()])
data = np.append(data, mask.astype(int))
data = np.append(data, rule.get_consequent())
df.loc[len(df)] = pd.Series(data=data, index=df.columns)
df = df.set_index("rule")
return df
def _get_clustered_rules(
rules: List[Rule], clusters_num: int, labels: np.ndarray
) -> List[List[Rule]]:
clustered_rules: List[List[Rule]] = []
for cluster_id in range(clusters_num):
cluster_indices = np.where(labels == cluster_id)[0]
clustered_rules.append([rules[idx] for idx in cluster_indices])
return clustered_rules
def _get_variables_minmax(X: pd.DataFrame) -> Dict[str, Tuple[float, float]]:
itervals: Dict[str, Tuple[float, float]] = {}
for column in X.columns:
itervals[column] = (X[column].min(), X[column].max())
return itervals
def _get_varibles_interval(
antecedent: List[RuleAtom],
) -> Dict[str, Tuple[float | None, float | None]]:
intervals: Dict[str, Tuple[float | None, float | None]] = {}
for atom in antecedent:
if intervals.get(atom.get_varaible(), None) is None:
intervals[atom.get_varaible()] = (None, None)
if atom.get_type() == ComparisonType.GREATER:
intervals[atom.get_varaible()] = (
atom.get_value(),
intervals[atom.get_varaible()][1],
)
if atom.get_type() == ComparisonType.LESS:
intervals[atom.get_varaible()] = (
intervals[atom.get_varaible()][0],
atom.get_value(),
)
return intervals
def simplify_and_group_rules(
X: pd.DataFrame, rules: List[Rule], clusters_num: int, clusters_labels: np.ndarray
):
minmax = _get_variables_minmax(X)
new_rules: List[List[Rule]] = []
for cluster in _get_clustered_rules(rules, clusters_num, clusters_labels):
cl_rules: List[Rule] = []
for rule in cluster:
intervals = _get_varibles_interval(rule.get_antecedent())
new_atoms = []
for key, value in intervals.items():
val: float = 0
if value[0] is None and value[1] is not None:
val = minmax[key][0]
if value[1] is None and value[0] is not None:
val = minmax[key][1]
if value[0] is not None and value[1] is not None:
val = (value[0] + value[1]) / 2
new_atoms.append(RuleAtom(key, ComparisonType.EQUALS, val))
cl_rules.append(Rule(new_atoms, rule.get_consequent()))
new_rules.append(cl_rules)
return new_rules
def _get_fuzzy_rule_atom(
fuzzy_variable: FuzzyVariable, value: float
) -> Tuple[Term, float]:
values = {}
for term in fuzzy_variable.terms:
mval = np.interp(value, fuzzy_variable.universe, fuzzy_variable[term].mf)
values[term] = mval
best_value = sorted(values.items(), key=lambda x: x[1], reverse=True)[0]
return (fuzzy_variable[best_value[0]], best_value[1])
def _get_fuzzy_rules(
rules: List[Rule], fuzzy_variables: Dict[str, FuzzyVariable]
) -> List[Tuple[List[RuleAtom], Term, float]]:
fuzzy_rules: List[Tuple[List[RuleAtom], Term, float]] = []
for rule in rules:
antecedent = []
for atom in rule.get_antecedent():
antecedent.append(
_get_fuzzy_rule_atom(
fuzzy_variables[atom.get_varaible()], atom.get_value()
)
)
consequent = _get_fuzzy_rule_atom(
fuzzy_variables["consequent"], rule.get_consequent()
)[0]
fuzzy_rules.append(
(
# FuzzyRule(reduce(and_, [atom[0] for atom in antecedent]), consequent),
[atom[0] for atom in antecedent],
consequent,
sum([atom[1] for atom in antecedent]),
)
)
return fuzzy_rules
def _delete_same_fuzzy_rules(
rules_cluster: List[Tuple[List[RuleAtom], Term, float]]
) -> List[Tuple[List[RuleAtom], Term, float]]:
same_rules: List[int] = []
for rule1_index, rule1 in enumerate(rules_cluster):
for rule2_index, rule2 in enumerate(rules_cluster):
if rule1_index >= rule2_index:
continue
# Remove the same rules
if str(rule1[0]) == str(rule2[0]) and str(rule1[1]) == str(rule2[1]):
same_rules.append(rule1_index)
break
# If antecedents is equals, but consequents is not equals then
# Remove rule with the higher weight
if str(rule1[0]) == str(rule2[0]) and str(rule1[2]) <= str(rule2[2]):
same_rules.append(rule2_index)
break
if str(rule1[0]) == str(rule2[0]) and str(rule1[2]) > str(rule2[2]):
same_rules.append(rule1_index)
break
return [rule for index, rule in enumerate(rules_cluster) if index not in same_rules]
def get_fuzzy_rules(
clustered_rules: List[List[Rule]], fuzzy_variables: Dict[str, FuzzyVariable]
) -> List[FuzzyRule]:
fuzzy_rules: List[List[Tuple[List[RuleAtom], Term, float]]] = []
fuzzy_rules = [
_get_fuzzy_rules(rules, fuzzy_variables) for rules in clustered_rules
]
fuzzy_rules = [_delete_same_fuzzy_rules(cluster) for cluster in fuzzy_rules]
return [
FuzzyRule(reduce(and_, item[0]), item[1])
for cluster in fuzzy_rules
for item in cluster
]

814
viscosity_regression.ipynb Normal file
View File

@ -0,0 +1,814 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Viscosity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.707</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.361</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 20 0.0 0.0 3.707\n",
"1 25 0.0 0.0 3.180\n",
"2 35 0.0 0.0 2.361"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Viscosity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>40</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>60</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.329</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 30 0.0 0.0 2.716\n",
"1 40 0.0 0.0 2.073\n",
"2 60 0.0 0.0 1.329"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"viscosity_train = pd.read_csv(\"data/viscosity_train.csv\", sep=\";\", decimal=\",\")\n",
"viscosity_test = pd.read_csv(\"data/viscosity_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(viscosity_train.head(3))\n",
"display(viscosity_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 20 0.0 0.0\n",
"1 25 0.0 0.0\n",
"2 35 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 3.707\n",
"1 3.180\n",
"2 2.361\n",
"Name: Viscosity, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>40</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>60</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 30 0.0 0.0\n",
"1 40 0.0 0.0\n",
"2 60 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 2.716\n",
"1 2.073\n",
"2 1.329\n",
"Name: Viscosity, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"viscosity_y_train = viscosity_train[\"Viscosity\"]\n",
"viscosity_train = viscosity_train.drop([\"Viscosity\"], axis=1)\n",
"\n",
"display(viscosity_train.head(3))\n",
"display(viscosity_y_train.head(3))\n",
"\n",
"viscosity_y_test = viscosity_test[\"Viscosity\"]\n",
"viscosity_test = viscosity_test.drop([\"Viscosity\"], axis=1)\n",
"\n",
"display(viscosity_test.head(3))\n",
"display(viscosity_y_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" viscosity_train.values, viscosity_y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(viscosity_train.values)\n",
" y_test_pred = fitted_model.predict(viscosity_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(viscosity_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_test_pred)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_0b35b_row0_col0 {\n",
" background-color: #21918c;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row0_col1, #T_0b35b_row4_col0 {\n",
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row0_col2, #T_0b35b_row6_col3 {\n",
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row0_col3, #T_0b35b_row6_col2 {\n",
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row1_col0 {\n",
" background-color: #31b57b;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row1_col1 {\n",
" background-color: #22a884;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row1_col2 {\n",
" background-color: #a31e9a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row1_col3 {\n",
" background-color: #c13b82;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row2_col0 {\n",
" background-color: #1f9e89;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row2_col1 {\n",
" background-color: #2cb17e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row2_col2 {\n",
" background-color: #ab2494;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row2_col3 {\n",
" background-color: #b7318a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row3_col0 {\n",
" background-color: #54c568;\n",
" color: #000000;\n",
"}\n",
"#T_0b35b_row3_col1, #T_0b35b_row4_col1 {\n",
" background-color: #3aba76;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row3_col2 {\n",
" background-color: #b02991;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row3_col3 {\n",
" background-color: #ad2793;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row4_col2 {\n",
" background-color: #b83289;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row4_col3 {\n",
" background-color: #ac2694;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row5_col0 {\n",
" background-color: #48c16e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row5_col1 {\n",
" background-color: #52c569;\n",
" color: #000000;\n",
"}\n",
"#T_0b35b_row5_col2 {\n",
" background-color: #c23c81;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row5_col3 {\n",
" background-color: #9a169f;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_0b35b_row6_col0, #T_0b35b_row6_col1 {\n",
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"</style>\n",
"<table id=\"T_0b35b\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_0b35b_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_0b35b_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_0b35b_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_0b35b_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row0\" class=\"row_heading level0 row0\" >linear_poly</th>\n",
" <td id=\"T_0b35b_row0_col0\" class=\"data row0 col0\" >0.150745</td>\n",
" <td id=\"T_0b35b_row0_col1\" class=\"data row0 col1\" >0.139507</td>\n",
" <td id=\"T_0b35b_row0_col2\" class=\"data row0 col2\" >0.336239</td>\n",
" <td id=\"T_0b35b_row0_col3\" class=\"data row0 col3\" >0.978119</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row1\" class=\"row_heading level0 row1\" >linear_interact</th>\n",
" <td id=\"T_0b35b_row1_col0\" class=\"data row1 col0\" >0.361309</td>\n",
" <td id=\"T_0b35b_row1_col1\" class=\"data row1 col1\" >0.303389</td>\n",
" <td id=\"T_0b35b_row1_col2\" class=\"data row1 col2\" >0.527911</td>\n",
" <td id=\"T_0b35b_row1_col3\" class=\"data row1 col3\" >0.896517</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row2\" class=\"row_heading level0 row2\" >random_forest</th>\n",
" <td id=\"T_0b35b_row2_col0\" class=\"data row2 col0\" >0.226420</td>\n",
" <td id=\"T_0b35b_row2_col1\" class=\"data row2 col1\" >0.341014</td>\n",
" <td id=\"T_0b35b_row2_col2\" class=\"data row2 col2\" >0.545765</td>\n",
" <td id=\"T_0b35b_row2_col3\" class=\"data row2 col3\" >0.869259</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row3\" class=\"row_heading level0 row3\" >ridge</th>\n",
" <td id=\"T_0b35b_row3_col0\" class=\"data row3 col0\" >0.472399</td>\n",
" <td id=\"T_0b35b_row3_col1\" class=\"data row3 col1\" >0.378573</td>\n",
" <td id=\"T_0b35b_row3_col2\" class=\"data row3 col2\" >0.559409</td>\n",
" <td id=\"T_0b35b_row3_col3\" class=\"data row3 col3\" >0.838873</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
" <td id=\"T_0b35b_row4_col0\" class=\"data row4 col0\" >0.054533</td>\n",
" <td id=\"T_0b35b_row4_col1\" class=\"data row4 col1\" >0.379017</td>\n",
" <td id=\"T_0b35b_row4_col2\" class=\"data row4 col2\" >0.587467</td>\n",
" <td id=\"T_0b35b_row4_col3\" class=\"data row4 col3\" >0.838495</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row5\" class=\"row_heading level0 row5\" >linear</th>\n",
" <td id=\"T_0b35b_row5_col0\" class=\"data row5 col0\" >0.441760</td>\n",
" <td id=\"T_0b35b_row5_col1\" class=\"data row5 col1\" >0.428940</td>\n",
" <td id=\"T_0b35b_row5_col2\" class=\"data row5 col2\" >0.617212</td>\n",
" <td id=\"T_0b35b_row5_col3\" class=\"data row5 col3\" >0.793147</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_0b35b_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
" <td id=\"T_0b35b_row6_col0\" class=\"data row6 col0\" >0.666903</td>\n",
" <td id=\"T_0b35b_row6_col1\" class=\"data row6 col1\" >0.566901</td>\n",
" <td id=\"T_0b35b_row6_col2\" class=\"data row6 col2\" >0.702700</td>\n",
" <td id=\"T_0b35b_row6_col3\" class=\"data row6 col3\" >0.638689</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x24995879c40>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'criterion': 'poisson', 'max_depth': 9, 'min_samples_split': 2}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from sklearn import model_selection\n",
"\n",
"parameters = {\n",
" \"criterion\": [\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"],\n",
" \"max_depth\": np.arange(1, 21).tolist()[0::2],\n",
" \"min_samples_split\": np.arange(2, 20).tolist()[0::2],\n",
"}\n",
"\n",
"grid = model_selection.GridSearchCV(\n",
" tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1\n",
")\n",
"\n",
"grid.fit(viscosity_train, viscosity_y_train)\n",
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'RMSE_test': 0.37901722760783496,\n",
" 'RMAE_test': 0.5874671455143883,\n",
" 'R2_test': 0.8384951109125148}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'RMSE_test': 0.39412315184917696,\n",
" 'RMAE_test': 0.593196723643326,\n",
" 'R2_test': 0.8253648477295591}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = grid.best_estimator_\n",
"y_pred = model.predict(viscosity_test)\n",
"old_metrics = {\n",
" \"RMSE_test\": models[\"decision_tree\"][\"RMSE_test\"],\n",
" \"RMAE_test\": models[\"decision_tree\"][\"RMAE_test\"],\n",
" \"R2_test\": models[\"decision_tree\"][\"R2_test\"],\n",
"}\n",
"new_metrics = {}\n",
"new_metrics[\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_test, y_pred)\n",
")\n",
"new_metrics[\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(viscosity_y_test, y_pred)\n",
")\n",
"new_metrics[\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_pred)\n",
"\n",
"display(old_metrics)\n",
"display(new_metrics)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- T <= 32.50\n",
"| |--- TiO2 <= 0.18\n",
"| | |--- Al2O3 <= 0.18\n",
"| | | |--- T <= 22.50\n",
"| | | | |--- TiO2 <= 0.03\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [3.71]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [4.66]\n",
"| | | | |--- TiO2 > 0.03\n",
"| | | | | |--- value: [4.88]\n",
"| | | |--- T > 22.50\n",
"| | | | |--- TiO2 <= 0.03\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [3.18]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [3.38]\n",
"| | | | |--- TiO2 > 0.03\n",
"| | | | | |--- value: [4.24]\n",
"| | |--- Al2O3 > 0.18\n",
"| | | |--- T <= 22.50\n",
"| | | | |--- value: [6.67]\n",
"| | | |--- T > 22.50\n",
"| | | | |--- T <= 27.50\n",
"| | | | | |--- value: [5.59]\n",
"| | | | |--- T > 27.50\n",
"| | | | | |--- value: [4.73]\n",
"| |--- TiO2 > 0.18\n",
"| | |--- T <= 22.50\n",
"| | | |--- value: [7.13]\n",
"| | |--- T > 22.50\n",
"| | | |--- T <= 27.50\n",
"| | | | |--- value: [5.87]\n",
"| | | |--- T > 27.50\n",
"| | | | |--- value: [4.94]\n",
"|--- T > 32.50\n",
"| |--- T <= 47.50\n",
"| | |--- TiO2 <= 0.18\n",
"| | | |--- Al2O3 <= 0.18\n",
"| | | | |--- T <= 42.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | | |--- value: [2.36]\n",
"| | | | | | |--- Al2O3 > 0.03\n",
"| | | | | | | |--- value: [2.68]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- T <= 37.50\n",
"| | | | | | | |--- value: [3.12]\n",
"| | | | | | |--- T > 37.50\n",
"| | | | | | | |--- value: [2.65]\n",
"| | | | |--- T > 42.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- value: [1.83]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- value: [2.40]\n",
"| | | |--- Al2O3 > 0.18\n",
"| | | | |--- T <= 37.50\n",
"| | | | | |--- value: [4.12]\n",
"| | | | |--- T > 37.50\n",
"| | | | | |--- value: [3.56]\n",
"| | |--- TiO2 > 0.18\n",
"| | | |--- T <= 40.00\n",
"| | | | |--- value: [4.35]\n",
"| | | |--- T > 40.00\n",
"| | | | |--- value: [3.56]\n",
"| |--- T > 47.50\n",
"| | |--- TiO2 <= 0.18\n",
"| | | |--- Al2O3 <= 0.18\n",
"| | | | |--- T <= 52.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | | |--- value: [1.63]\n",
"| | | | | | |--- Al2O3 > 0.03\n",
"| | | | | | | |--- value: [1.90]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- value: [2.11]\n",
"| | | | |--- T > 52.50\n",
"| | | | | |--- T <= 65.00\n",
"| | | | | | |--- TiO2 <= 0.03\n",
"| | | | | | | |--- value: [1.55]\n",
"| | | | | | |--- TiO2 > 0.03\n",
"| | | | | | | |--- value: [1.66]\n",
"| | | | | |--- T > 65.00\n",
"| | | | | | |--- TiO2 <= 0.03\n",
"| | | | | | | |--- value: [1.19]\n",
"| | | | | | |--- TiO2 > 0.03\n",
"| | | | | | | |--- value: [1.29]\n",
"| | | |--- Al2O3 > 0.18\n",
"| | | | |--- T <= 65.00\n",
"| | | | | |--- T <= 57.50\n",
"| | | | | | |--- value: [2.43]\n",
"| | | | | |--- T > 57.50\n",
"| | | | | | |--- value: [2.16]\n",
"| | | | |--- T > 65.00\n",
"| | | | | |--- value: [1.73]\n",
"| | |--- TiO2 > 0.18\n",
"| | | |--- T <= 65.00\n",
"| | | | |--- T <= 57.50\n",
"| | | | | |--- value: [2.84]\n",
"| | | | |--- T > 57.50\n",
"| | | | | |--- value: [2.54]\n",
"| | | |--- T > 65.00\n",
"| | | | |--- value: [1.91]\n",
"\n"
]
}
],
"source": [
"rules = tree.export_text(\n",
" models[\"decision_tree\"][\"fitted\"],\n",
" feature_names=viscosity_train.columns.values.tolist(),\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(models[\"decision_tree\"][\"fitted\"], open(\"data/vtree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

1769
viscosity_tree.ipynb Normal file

File diff suppressed because one or more lines are too long