ckiias/lec4-2-nlp-dense.ipynb

1730 lines
74 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "0fd64751",
"metadata": {},
"source": [
"#### Инициализация Keras"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "507915ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.9.2\n"
]
}
],
"source": [
"import os\n",
"\n",
"os.environ[\"KERAS_BACKEND\"] = \"torch\"\n",
"import keras\n",
"\n",
"print(keras.__version__)"
]
},
{
"cell_type": "markdown",
"id": "c5e00991",
"metadata": {},
"source": [
"#### Загрузка данных для классификации с помощью глубоких сетей\n",
"\n",
"В качестве набора данных используется набор отзывов к фильмам с сайта IMDB.\n",
"\n",
"Набор включает 50 000 отзывов, половина из которых находится в обучающем наборе данных (x_train), а половина - в тестовом (x_valid). \n",
"\n",
"Данные уже предобработаны для простоты работы с ними.\n",
"\n",
"unique_words - в векторное пространство включается только слова, которые встречаются в корпусе не менее 5000 раз.\n",
"\n",
"max_length - максимальная длина отзыва (если больше, то обрезается, если меньше, то дополняется \"пустыми\" словами)."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e0043e5c",
"metadata": {},
"outputs": [],
"source": [
"from keras.api.datasets import imdb\n",
"import os\n",
"\n",
"unique_words = 5000\n",
"max_length = 100\n",
"\n",
"output_dir = \"tmp\"\n",
"if not os.path.exists(output_dir):\n",
" os.makedirs(output_dir)\n",
"\n",
"(X_train, y_train), (X_valid, y_valid) = imdb.load_data(num_words=unique_words, skip_top=50)"
]
},
{
"cell_type": "markdown",
"id": "022cf1f8",
"metadata": {},
"source": [
"#### Исследование набора данных\n",
"\n",
"Все слова закодированы числовыми идентификаторами для снижения расхода памяти\n",
"\n",
"Идентификаторы 0, 1 и 2 зарезервированы:\n",
"- 0 (PAD) - заполняющее (\"пустое\") слово для дополнения отзывов до длины 100;\n",
"- 1 (START) - определяет начло отзыва;\n",
"- 2 (UNK) - отфильтрованные при загрузке отзывов слова (редкие слова или стоп-слов).\n",
"\n",
"Далее идентификаторы определяют слова в порядке снижения частоты их встречаемости в корпусе."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "aadc3471",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{34704: 'fawn',\n",
" 52009: 'tsukino',\n",
" 52010: 'nunnery',\n",
" 16819: 'sonja',\n",
" 63954: 'vani',\n",
" 1411: 'woods',\n",
" 16118: 'spiders',\n",
" 2348: 'hanging',\n",
" 2292: 'woody',\n",
" 52011: 'trawling',\n",
" 52012: \"hold's\",\n",
" 11310: 'comically',\n",
" 40833: 'localized',\n",
" 30571: 'disobeying',\n",
" 52013: \"'royale\",\n",
" 40834: \"harpo's\",\n",
" 52014: 'canet',\n",
" 19316: 'aileen',\n",
" 52015: 'acurately',\n",
" 52016: \"diplomat's\",\n",
" 25245: 'rickman',\n",
" 6749: 'arranged',\n",
" 52017: 'rumbustious',\n",
" 52018: 'familiarness',\n",
" 52019: \"spider'\",\n",
" 68807: 'hahahah',\n",
" 52020: \"wood'\",\n",
" 40836: 'transvestism',\n",
" 34705: \"hangin'\",\n",
" 2341: 'bringing',\n",
" 40837: 'seamier',\n",
" 34706: 'wooded',\n",
" 52021: 'bravora',\n",
" 16820: 'grueling',\n",
" 1639: 'wooden',\n",
" 16821: 'wednesday',\n",
" 52022: \"'prix\",\n",
" 34707: 'altagracia',\n",
" 52023: 'circuitry',\n",
" 11588: 'crotch',\n",
" 57769: 'busybody',\n",
" 52024: \"tart'n'tangy\",\n",
" 14132: 'burgade',\n",
" 52026: 'thrace',\n",
" 11041: \"tom's\",\n",
" 52028: 'snuggles',\n",
" 29117: 'francesco',\n",
" 52030: 'complainers',\n",
" 52128: 'templarios',\n",
" 40838: '272',\n",
" 52031: '273',\n",
" 52133: 'zaniacs',\n",
" 34709: '275',\n",
" 27634: 'consenting',\n",
" 40839: 'snuggled',\n",
" 15495: 'inanimate',\n",
" 52033: 'uality',\n",
" 11929: 'bronte',\n",
" 4013: 'errors',\n",
" 3233: 'dialogs',\n",
" 52034: \"yomada's\",\n",
" 34710: \"madman's\",\n",
" 30588: 'dialoge',\n",
" 52036: 'usenet',\n",
" 40840: 'videodrome',\n",
" 26341: \"kid'\",\n",
" 52037: 'pawed',\n",
" 30572: \"'girlfriend'\",\n",
" 52038: \"'pleasure\",\n",
" 52039: \"'reloaded'\",\n",
" 40842: \"kazakos'\",\n",
" 52040: 'rocque',\n",
" 52041: 'mailings',\n",
" 11930: 'brainwashed',\n",
" 16822: 'mcanally',\n",
" 52042: \"tom''\",\n",
" 25246: 'kurupt',\n",
" 21908: 'affiliated',\n",
" 52043: 'babaganoosh',\n",
" 40843: \"noe's\",\n",
" 40844: 'quart',\n",
" 362: 'kids',\n",
" 5037: 'uplifting',\n",
" 7096: 'controversy',\n",
" 21909: 'kida',\n",
" 23382: 'kidd',\n",
" 52044: \"error'\",\n",
" 52045: 'neurologist',\n",
" 18513: 'spotty',\n",
" 30573: 'cobblers',\n",
" 9881: 'projection',\n",
" 40845: 'fastforwarding',\n",
" 52046: 'sters',\n",
" 52047: \"eggar's\",\n",
" 52048: 'etherything',\n",
" 40846: 'gateshead',\n",
" 34711: 'airball',\n",
" 25247: 'unsinkable',\n",
" 7183: 'stern',\n",
" 52049: \"cervi's\",\n",
" 40847: 'dnd',\n",
" 11589: 'dna',\n",
" 20601: 'insecurity',\n",
" 52050: \"'reboot'\",\n",
" 11040: 'trelkovsky',\n",
" 52051: 'jaekel',\n",
" 52052: 'sidebars',\n",
" 52053: \"sforza's\",\n",
" 17636: 'distortions',\n",
" 52054: 'mutinies',\n",
" 30605: 'sermons',\n",
" 40849: '7ft',\n",
" 52055: 'boobage',\n",
" 52056: \"o'bannon's\",\n",
" 23383: 'populations',\n",
" 52057: 'chulak',\n",
" 27636: 'mesmerize',\n",
" 52058: 'quinnell',\n",
" 10310: 'yahoo',\n",
" 52060: 'meteorologist',\n",
" 42580: 'beswick',\n",
" 15496: 'boorman',\n",
" 40850: 'voicework',\n",
" 52061: \"ster'\",\n",
" 22925: 'blustering',\n",
" 52062: 'hj',\n",
" 27637: 'intake',\n",
" 5624: 'morally',\n",
" 40852: 'jumbling',\n",
" 52063: 'bowersock',\n",
" 52064: \"'porky's'\",\n",
" 16824: 'gershon',\n",
" 40853: 'ludicrosity',\n",
" 52065: 'coprophilia',\n",
" 40854: 'expressively',\n",
" 19503: \"india's\",\n",
" 34713: \"post's\",\n",
" 52066: 'wana',\n",
" 5286: 'wang',\n",
" 30574: 'wand',\n",
" 25248: 'wane',\n",
" 52324: 'edgeways',\n",
" 34714: 'titanium',\n",
" 40855: 'pinta',\n",
" 181: 'want',\n",
" 30575: 'pinto',\n",
" 52068: 'whoopdedoodles',\n",
" 21911: 'tchaikovsky',\n",
" 2106: 'travel',\n",
" 52069: \"'victory'\",\n",
" 11931: 'copious',\n",
" 22436: 'gouge',\n",
" 52070: \"chapters'\",\n",
" 6705: 'barbra',\n",
" 30576: 'uselessness',\n",
" 52071: \"wan'\",\n",
" 27638: 'assimilated',\n",
" 16119: 'petiot',\n",
" 52072: 'most\\x85and',\n",
" 3933: 'dinosaurs',\n",
" 355: 'wrong',\n",
" 52073: 'seda',\n",
" 52074: 'stollen',\n",
" 34715: 'sentencing',\n",
" 40856: 'ouroboros',\n",
" 40857: 'assimilates',\n",
" 40858: 'colorfully',\n",
" 27639: 'glenne',\n",
" 52075: 'dongen',\n",
" 4763: 'subplots',\n",
" 52076: 'kiloton',\n",
" 23384: 'chandon',\n",
" 34716: \"effect'\",\n",
" 27640: 'snugly',\n",
" 40859: 'kuei',\n",
" 9095: 'welcomed',\n",
" 30074: 'dishonor',\n",
" 52078: 'concurrence',\n",
" 23385: 'stoicism',\n",
" 14899: \"guys'\",\n",
" 52080: \"beroemd'\",\n",
" 6706: 'butcher',\n",
" 40860: \"melfi's\",\n",
" 30626: 'aargh',\n",
" 20602: 'playhouse',\n",
" 11311: 'wickedly',\n",
" 1183: 'fit',\n",
" 52081: 'labratory',\n",
" 40862: 'lifeline',\n",
" 1930: 'screaming',\n",
" 4290: 'fix',\n",
" 52082: 'cineliterate',\n",
" 52083: 'fic',\n",
" 52084: 'fia',\n",
" 34717: 'fig',\n",
" 52085: 'fmvs',\n",
" 52086: 'fie',\n",
" 52087: 'reentered',\n",
" 30577: 'fin',\n",
" 52088: 'doctresses',\n",
" 52089: 'fil',\n",
" 12609: 'zucker',\n",
" 31934: 'ached',\n",
" 52091: 'counsil',\n",
" 52092: 'paterfamilias',\n",
" 13888: 'songwriter',\n",
" 34718: 'shivam',\n",
" 9657: 'hurting',\n",
" 302: 'effects',\n",
" 52093: 'slauther',\n",
" 52094: \"'flame'\",\n",
" 52095: 'sommerset',\n",
" 52096: 'interwhined',\n",
" 27641: 'whacking',\n",
" 52097: 'bartok',\n",
" 8778: 'barton',\n",
" 21912: 'frewer',\n",
" 52098: \"fi'\",\n",
" 6195: 'ingrid',\n",
" 30578: 'stribor',\n",
" 52099: 'approporiately',\n",
" 52100: 'wobblyhand',\n",
" 52101: 'tantalisingly',\n",
" 52102: 'ankylosaurus',\n",
" 17637: 'parasites',\n",
" 52103: 'childen',\n",
" 52104: \"jenkins'\",\n",
" 52105: 'metafiction',\n",
" 17638: 'golem',\n",
" 40863: 'indiscretion',\n",
" 23386: \"reeves'\",\n",
" 57784: \"inamorata's\",\n",
" 52107: 'brittannica',\n",
" 7919: 'adapt',\n",
" 30579: \"russo's\",\n",
" 48249: 'guitarists',\n",
" 10556: 'abbott',\n",
" 40864: 'abbots',\n",
" 17652: 'lanisha',\n",
" 40866: 'magickal',\n",
" 52108: 'mattter',\n",
" 52109: \"'willy\",\n",
" 34719: 'pumpkins',\n",
" 52110: 'stuntpeople',\n",
" 30580: 'estimate',\n",
" 40867: 'ugghhh',\n",
" 11312: 'gameplay',\n",
" 52111: \"wern't\",\n",
" 40868: \"n'sync\",\n",
" 16120: 'sickeningly',\n",
" 40869: 'chiara',\n",
" 4014: 'disturbed',\n",
" 40870: 'portmanteau',\n",
" 52112: 'ineffectively',\n",
" 82146: \"duchonvey's\",\n",
" 37522: \"nasty'\",\n",
" 1288: 'purpose',\n",
" 52115: 'lazers',\n",
" 28108: 'lightened',\n",
" 52116: 'kaliganj',\n",
" 52117: 'popularism',\n",
" 18514: \"damme's\",\n",
" 30581: 'stylistics',\n",
" 52118: 'mindgaming',\n",
" 46452: 'spoilerish',\n",
" 52120: \"'corny'\",\n",
" 34721: 'boerner',\n",
" 6795: 'olds',\n",
" 52121: 'bakelite',\n",
" 27642: 'renovated',\n",
" 27643: 'forrester',\n",
" 52122: \"lumiere's\",\n",
" 52027: 'gaskets',\n",
" 887: 'needed',\n",
" 34722: 'smight',\n",
" 1300: 'master',\n",
" 25908: \"edie's\",\n",
" 40871: 'seeber',\n",
" 52123: 'hiya',\n",
" 52124: 'fuzziness',\n",
" 14900: 'genesis',\n",
" 12610: 'rewards',\n",
" 30582: 'enthrall',\n",
" 40872: \"'about\",\n",
" 52125: \"recollection's\",\n",
" 11042: 'mutilated',\n",
" 52126: 'fatherlands',\n",
" 52127: \"fischer's\",\n",
" 5402: 'positively',\n",
" 34708: '270',\n",
" 34723: 'ahmed',\n",
" 9839: 'zatoichi',\n",
" 13889: 'bannister',\n",
" 52130: 'anniversaries',\n",
" 30583: \"helm's\",\n",
" 52131: \"'work'\",\n",
" 34724: 'exclaimed',\n",
" 52132: \"'unfunny'\",\n",
" 52032: '274',\n",
" 547: 'feeling',\n",
" 52134: \"wanda's\",\n",
" 33269: 'dolan',\n",
" 52136: '278',\n",
" 52137: 'peacoat',\n",
" 40873: 'brawny',\n",
" 40874: 'mishra',\n",
" 40875: 'worlders',\n",
" 52138: 'protags',\n",
" 52139: 'skullcap',\n",
" 57599: 'dastagir',\n",
" 5625: 'affairs',\n",
" 7802: 'wholesome',\n",
" 52140: 'hymen',\n",
" 25249: 'paramedics',\n",
" 52141: 'unpersons',\n",
" 52142: 'heavyarms',\n",
" 52143: 'affaire',\n",
" 52144: 'coulisses',\n",
" 40876: 'hymer',\n",
" 52145: 'kremlin',\n",
" 30584: 'shipments',\n",
" 52146: 'pixilated',\n",
" 30585: \"'00s\",\n",
" 18515: 'diminishing',\n",
" 1360: 'cinematic',\n",
" 14901: 'resonates',\n",
" 40877: 'simplify',\n",
" 40878: \"nature'\",\n",
" 40879: 'temptresses',\n",
" 16825: 'reverence',\n",
" 19505: 'resonated',\n",
" 34725: 'dailey',\n",
" 52147: '2\\x85',\n",
" 27644: 'treize',\n",
" 52148: 'majo',\n",
" 21913: 'kiya',\n",
" 52149: 'woolnough',\n",
" 39800: 'thanatos',\n",
" 35734: 'sandoval',\n",
" 40882: 'dorama',\n",
" 52150: \"o'shaughnessy\",\n",
" 4991: 'tech',\n",
" 32021: 'fugitives',\n",
" 30586: 'teck',\n",
" 76128: \"'e'\",\n",
" 40884: 'doesnt',\n",
" 52152: 'purged',\n",
" 660: 'saying',\n",
" 41098: \"martians'\",\n",
" 23421: 'norliss',\n",
" 27645: 'dickey',\n",
" 52155: 'dicker',\n",
" 52156: \"'sependipity\",\n",
" 8425: 'padded',\n",
" 57795: 'ordell',\n",
" 40885: \"sturges'\",\n",
" 52157: 'independentcritics',\n",
" 5748: 'tempted',\n",
" 34727: \"atkinson's\",\n",
" 25250: 'hounded',\n",
" 52158: 'apace',\n",
" 15497: 'clicked',\n",
" 30587: \"'humor'\",\n",
" 17180: \"martino's\",\n",
" 52159: \"'supporting\",\n",
" 52035: 'warmongering',\n",
" 34728: \"zemeckis's\",\n",
" 21914: 'lube',\n",
" 52160: 'shocky',\n",
" 7479: 'plate',\n",
" 40886: 'plata',\n",
" 40887: 'sturgess',\n",
" 40888: \"nerds'\",\n",
" 20603: 'plato',\n",
" 34729: 'plath',\n",
" 40889: 'platt',\n",
" 52162: 'mcnab',\n",
" 27646: 'clumsiness',\n",
" 3902: 'altogether',\n",
" 42587: 'massacring',\n",
" 52163: 'bicenntinial',\n",
" 40890: 'skaal',\n",
" 14363: 'droning',\n",
" 8779: 'lds',\n",
" 21915: 'jaguar',\n",
" 34730: \"cale's\",\n",
" 1780: 'nicely',\n",
" 4591: 'mummy',\n",
" 18516: \"lot's\",\n",
" 10089: 'patch',\n",
" 50205: 'kerkhof',\n",
" 52164: \"leader's\",\n",
" 27647: \"'movie\",\n",
" 52165: 'uncomfirmed',\n",
" 40891: 'heirloom',\n",
" 47363: 'wrangle',\n",
" 52166: 'emotion\\x85',\n",
" 52167: \"'stargate'\",\n",
" 40892: 'pinoy',\n",
" 40893: 'conchatta',\n",
" 41131: 'broeke',\n",
" 40894: 'advisedly',\n",
" 17639: \"barker's\",\n",
" 52169: 'descours',\n",
" 775: 'lots',\n",
" 9262: 'lotr',\n",
" 9882: 'irs',\n",
" 52170: 'lott',\n",
" 40895: 'xvi',\n",
" 34731: 'irk',\n",
" 52171: 'irl',\n",
" 6890: 'ira',\n",
" 21916: 'belzer',\n",
" 52172: 'irc',\n",
" 27648: 'ire',\n",
" 40896: 'requisites',\n",
" 7696: 'discipline',\n",
" 52964: 'lyoko',\n",
" 11313: 'extend',\n",
" 876: 'nature',\n",
" 52173: \"'dickie'\",\n",
" 40897: 'optimist',\n",
" 30589: 'lapping',\n",
" 3903: 'superficial',\n",
" 52174: 'vestment',\n",
" 2826: 'extent',\n",
" 52175: 'tendons',\n",
" 52176: \"heller's\",\n",
" 52177: 'quagmires',\n",
" 52178: 'miyako',\n",
" 20604: 'moocow',\n",
" 52179: \"coles'\",\n",
" 40898: 'lookit',\n",
" 52180: 'ravenously',\n",
" 40899: 'levitating',\n",
" 52181: 'perfunctorily',\n",
" 30590: 'lookin',\n",
" 40901: \"lot'\",\n",
" 52182: 'lookie',\n",
" 34873: 'fearlessly',\n",
" 52184: 'libyan',\n",
" 40902: 'fondles',\n",
" 35717: 'gopher',\n",
" 40904: 'wearying',\n",
" 52185: \"nz's\",\n",
" 27649: 'minuses',\n",
" 52186: 'puposelessly',\n",
" 52187: 'shandling',\n",
" 31271: 'decapitates',\n",
" 11932: 'humming',\n",
" 40905: \"'nother\",\n",
" 21917: 'smackdown',\n",
" 30591: 'underdone',\n",
" 40906: 'frf',\n",
" 52188: 'triviality',\n",
" 25251: 'fro',\n",
" 8780: 'bothers',\n",
" 52189: \"'kensington\",\n",
" 76: 'much',\n",
" 34733: 'muco',\n",
" 22618: 'wiseguy',\n",
" 27651: \"richie's\",\n",
" 40907: 'tonino',\n",
" 52190: 'unleavened',\n",
" 11590: 'fry',\n",
" 40908: \"'tv'\",\n",
" 40909: 'toning',\n",
" 14364: 'obese',\n",
" 30592: 'sensationalized',\n",
" 40910: 'spiv',\n",
" 6262: 'spit',\n",
" 7367: 'arkin',\n",
" 21918: 'charleton',\n",
" 16826: 'jeon',\n",
" 21919: 'boardroom',\n",
" 4992: 'doubts',\n",
" 3087: 'spin',\n",
" 53086: 'hepo',\n",
" 27652: 'wildcat',\n",
" 10587: 'venoms',\n",
" 52194: 'misconstrues',\n",
" 18517: 'mesmerising',\n",
" 40911: 'misconstrued',\n",
" 52195: 'rescinds',\n",
" 52196: 'prostrate',\n",
" 40912: 'majid',\n",
" 16482: 'climbed',\n",
" 34734: 'canoeing',\n",
" 52198: 'majin',\n",
" 57807: 'animie',\n",
" 40913: 'sylke',\n",
" 14902: 'conditioned',\n",
" 40914: 'waddell',\n",
" 52199: '3\\x85',\n",
" 41191: 'hyperdrive',\n",
" 34735: 'conditioner',\n",
" 53156: 'bricklayer',\n",
" 2579: 'hong',\n",
" 52201: 'memoriam',\n",
" 30595: 'inventively',\n",
" 25252: \"levant's\",\n",
" 20641: 'portobello',\n",
" 52203: 'remand',\n",
" 19507: 'mummified',\n",
" 27653: 'honk',\n",
" 19508: 'spews',\n",
" 40915: 'visitations',\n",
" 52204: 'mummifies',\n",
" 25253: 'cavanaugh',\n",
" 23388: 'zeon',\n",
" 40916: \"jungle's\",\n",
" 34736: 'viertel',\n",
" 27654: 'frenchmen',\n",
" 52205: 'torpedoes',\n",
" 52206: 'schlessinger',\n",
" 34737: 'torpedoed',\n",
" 69879: 'blister',\n",
" 52207: 'cinefest',\n",
" 34738: 'furlough',\n",
" 52208: 'mainsequence',\n",
" 40917: 'mentors',\n",
" 9097: 'academic',\n",
" 20605: 'stillness',\n",
" 40918: 'academia',\n",
" 52209: 'lonelier',\n",
" 52210: 'nibby',\n",
" 52211: \"losers'\",\n",
" 40919: 'cineastes',\n",
" 4452: 'corporate',\n",
" 40920: 'massaging',\n",
" 30596: 'bellow',\n",
" 19509: 'absurdities',\n",
" 53244: 'expetations',\n",
" 40921: 'nyfiken',\n",
" 75641: 'mehras',\n",
" 52212: 'lasse',\n",
" 52213: 'visability',\n",
" 33949: 'militarily',\n",
" 52214: \"elder'\",\n",
" 19026: 'gainsbourg',\n",
" 20606: 'hah',\n",
" 13423: 'hai',\n",
" 34739: 'haj',\n",
" 25254: 'hak',\n",
" 4314: 'hal',\n",
" 4895: 'ham',\n",
" 53262: 'duffer',\n",
" 52216: 'haa',\n",
" 69: 'had',\n",
" 11933: 'advancement',\n",
" 16828: 'hag',\n",
" 25255: \"hand'\",\n",
" 13424: 'hay',\n",
" 20607: 'mcnamara',\n",
" 52217: \"mozart's\",\n",
" 30734: 'duffel',\n",
" 30597: 'haq',\n",
" 13890: 'har',\n",
" 47: 'has',\n",
" 2404: 'hat',\n",
" 40922: 'hav',\n",
" 30598: 'haw',\n",
" 52218: 'figtings',\n",
" 15498: 'elders',\n",
" 52219: 'underpanted',\n",
" 52220: 'pninson',\n",
" 27655: 'unequivocally',\n",
" 23676: \"barbara's\",\n",
" 52222: \"bello'\",\n",
" 13000: 'indicative',\n",
" 40923: 'yawnfest',\n",
" 52223: 'hexploitation',\n",
" 52224: \"loder's\",\n",
" 27656: 'sleuthing',\n",
" 32625: \"justin's\",\n",
" 52225: \"'ball\",\n",
" 52226: \"'summer\",\n",
" 34938: \"'demons'\",\n",
" 52228: \"mormon's\",\n",
" 34740: \"laughton's\",\n",
" 52229: 'debell',\n",
" 39727: 'shipyard',\n",
" 30600: 'unabashedly',\n",
" 40404: 'disks',\n",
" 2293: 'crowd',\n",
" 10090: 'crowe',\n",
" 56437: \"vancouver's\",\n",
" 34741: 'mosques',\n",
" 6630: 'crown',\n",
" 52230: 'culpas',\n",
" 27657: 'crows',\n",
" 53347: 'surrell',\n",
" 52232: 'flowless',\n",
" 52233: 'sheirk',\n",
" 40926: \"'three\",\n",
" 52234: \"peterson'\",\n",
" 52235: 'ooverall',\n",
" 40927: 'perchance',\n",
" 1324: 'bottom',\n",
" 53366: 'chabert',\n",
" 52236: 'sneha',\n",
" 13891: 'inhuman',\n",
" 52237: 'ichii',\n",
" 52238: 'ursla',\n",
" 30601: 'completly',\n",
" 40928: 'moviedom',\n",
" 52239: 'raddick',\n",
" 51998: 'brundage',\n",
" 40929: 'brigades',\n",
" 1184: 'starring',\n",
" 52240: \"'goal'\",\n",
" 52241: 'caskets',\n",
" 52242: 'willcock',\n",
" 52243: \"threesome's\",\n",
" 52244: \"mosque'\",\n",
" 52245: \"cover's\",\n",
" 17640: 'spaceships',\n",
" 40930: 'anomalous',\n",
" 27658: 'ptsd',\n",
" 52246: 'shirdan',\n",
" 21965: 'obscenity',\n",
" 30602: 'lemmings',\n",
" 30603: 'duccio',\n",
" 52247: \"levene's\",\n",
" 52248: \"'gorby'\",\n",
" 25258: \"teenager's\",\n",
" 5343: 'marshall',\n",
" 9098: 'honeymoon',\n",
" 3234: 'shoots',\n",
" 12261: 'despised',\n",
" 52249: 'okabasho',\n",
" 8292: 'fabric',\n",
" 18518: 'cannavale',\n",
" 3540: 'raped',\n",
" 52250: \"tutt's\",\n",
" 17641: 'grasping',\n",
" 18519: 'despises',\n",
" 40931: \"thief's\",\n",
" 8929: 'rapes',\n",
" 52251: 'raper',\n",
" 27659: \"eyre'\",\n",
" 52252: 'walchek',\n",
" 23389: \"elmo's\",\n",
" 40932: 'perfumes',\n",
" 21921: 'spurting',\n",
" 52253: \"exposition'\\x85\",\n",
" 52254: 'denoting',\n",
" 34743: 'thesaurus',\n",
" 40933: \"shoot'\",\n",
" 49762: 'bonejack',\n",
" 52256: 'simpsonian',\n",
" 30604: 'hebetude',\n",
" 34744: \"hallow's\",\n",
" 52257: 'desperation\\x85',\n",
" 34745: 'incinerator',\n",
" 10311: 'congratulations',\n",
" 52258: 'humbled',\n",
" 5927: \"else's\",\n",
" 40848: 'trelkovski',\n",
" 52259: \"rape'\",\n",
" 59389: \"'chapters'\",\n",
" 52260: '1600s',\n",
" 7256: 'martian',\n",
" 25259: 'nicest',\n",
" 52262: 'eyred',\n",
" 9460: 'passenger',\n",
" 6044: 'disgrace',\n",
" 52263: 'moderne',\n",
" 5123: 'barrymore',\n",
" 52264: 'yankovich',\n",
" 40934: 'moderns',\n",
" 52265: 'studliest',\n",
" 52266: 'bedsheet',\n",
" 14903: 'decapitation',\n",
" 52267: 'slurring',\n",
" 52268: \"'nunsploitation'\",\n",
" 34746: \"'character'\",\n",
" 9883: 'cambodia',\n",
" 52269: 'rebelious',\n",
" 27660: 'pasadena',\n",
" 40935: 'crowne',\n",
" 52270: \"'bedchamber\",\n",
" 52271: 'conjectural',\n",
" 52272: 'appologize',\n",
" 52273: 'halfassing',\n",
" 57819: 'paycheque',\n",
" 20609: 'palms',\n",
" 52274: \"'islands\",\n",
" 40936: 'hawked',\n",
" 21922: 'palme',\n",
" 40937: 'conservatively',\n",
" 64010: 'larp',\n",
" 5561: 'palma',\n",
" 21923: 'smelling',\n",
" 13001: 'aragorn',\n",
" 52275: 'hawker',\n",
" 52276: 'hawkes',\n",
" 3978: 'explosions',\n",
" 8062: 'loren',\n",
" 52277: \"pyle's\",\n",
" 6707: 'shootout',\n",
" 18520: \"mike's\",\n",
" 52278: \"driscoll's\",\n",
" 40938: 'cogsworth',\n",
" 52279: \"britian's\",\n",
" 34747: 'childs',\n",
" 52280: \"portrait's\",\n",
" 3629: 'chain',\n",
" 2500: 'whoever',\n",
" 52281: 'puttered',\n",
" 52282: 'childe',\n",
" 52283: 'maywether',\n",
" 3039: 'chair',\n",
" 52284: \"rance's\",\n",
" 34748: 'machu',\n",
" 4520: 'ballet',\n",
" 34749: 'grapples',\n",
" 76155: 'summerize',\n",
" 30606: 'freelance',\n",
" 52286: \"andrea's\",\n",
" 52287: '\\x91very',\n",
" 45882: 'coolidge',\n",
" 18521: 'mache',\n",
" 52288: 'balled',\n",
" 40940: 'grappled',\n",
" 18522: 'macha',\n",
" 21924: 'underlining',\n",
" 5626: 'macho',\n",
" 19510: 'oversight',\n",
" 25260: 'machi',\n",
" 11314: 'verbally',\n",
" 21925: 'tenacious',\n",
" 40941: 'windshields',\n",
" 18560: 'paychecks',\n",
" 3399: 'jerk',\n",
" 11934: \"good'\",\n",
" 34751: 'prancer',\n",
" 21926: 'prances',\n",
" 52289: 'olympus',\n",
" 21927: 'lark',\n",
" 10788: 'embark',\n",
" 7368: 'gloomy',\n",
" 52290: 'jehaan',\n",
" 52291: 'turaqui',\n",
" 20610: \"child'\",\n",
" 2897: 'locked',\n",
" 52292: 'pranced',\n",
" 2591: 'exact',\n",
" 52293: 'unattuned',\n",
" 786: 'minute',\n",
" 16121: 'skewed',\n",
" 40943: 'hodgins',\n",
" 34752: 'skewer',\n",
" 52294: 'think\\x85',\n",
" 38768: 'rosenstein',\n",
" 52295: 'helmit',\n",
" 34753: 'wrestlemanias',\n",
" 16829: 'hindered',\n",
" 30607: \"martha's\",\n",
" 52296: 'cheree',\n",
" 52297: \"pluckin'\",\n",
" 40944: 'ogles',\n",
" 11935: 'heavyweight',\n",
" 82193: 'aada',\n",
" 11315: 'chopping',\n",
" 61537: 'strongboy',\n",
" 41345: 'hegemonic',\n",
" 40945: 'adorns',\n",
" 41349: 'xxth',\n",
" 34754: 'nobuhiro',\n",
" 52301: 'capitães',\n",
" 52302: 'kavogianni',\n",
" 13425: 'antwerp',\n",
" 6541: 'celebrated',\n",
" 52303: 'roarke',\n",
" 40946: 'baggins',\n",
" 31273: 'cheeseburgers',\n",
" 52304: 'matras',\n",
" 52305: \"nineties'\",\n",
" 52306: \"'craig'\",\n",
" 13002: 'celebrates',\n",
" 3386: 'unintentionally',\n",
" 14365: 'drafted',\n",
" 52307: 'climby',\n",
" 52308: '303',\n",
" 18523: 'oldies',\n",
" 9099: 'climbs',\n",
" 9658: 'honour',\n",
" 34755: 'plucking',\n",
" 30077: '305',\n",
" 5517: 'address',\n",
" 40947: 'menjou',\n",
" 42595: \"'freak'\",\n",
" 19511: 'dwindling',\n",
" 9461: 'benson',\n",
" 52310: 'whites',\n",
" 40948: 'shamelessness',\n",
" 21928: 'impacted',\n",
" 52311: 'upatz',\n",
" 3843: 'cusack',\n",
" 37570: \"flavia's\",\n",
" 52312: 'effette',\n",
" 34756: 'influx',\n",
" 52313: 'boooooooo',\n",
" 52314: 'dimitrova',\n",
" 13426: 'houseman',\n",
" 25262: 'bigas',\n",
" 52315: 'boylen',\n",
" 52316: 'phillipenes',\n",
" 40949: 'fakery',\n",
" 27661: \"grandpa's\",\n",
" 27662: 'darnell',\n",
" 19512: 'undergone',\n",
" 52318: 'handbags',\n",
" 21929: 'perished',\n",
" 37781: 'pooped',\n",
" 27663: 'vigour',\n",
" 3630: 'opposed',\n",
" 52319: 'etude',\n",
" 11802: \"caine's\",\n",
" 52320: 'doozers',\n",
" 34757: 'photojournals',\n",
" 52321: 'perishes',\n",
" 34758: 'constrains',\n",
" 40951: 'migenes',\n",
" 30608: 'consoled',\n",
" 16830: 'alastair',\n",
" 52322: 'wvs',\n",
" 52323: 'ooooooh',\n",
" 34759: 'approving',\n",
" 40952: 'consoles',\n",
" 52067: 'disparagement',\n",
" 52325: 'futureistic',\n",
" 52326: 'rebounding',\n",
" 52327: \"'date\",\n",
" 52328: 'gregoire',\n",
" 21930: 'rutherford',\n",
" 34760: 'americanised',\n",
" 82199: 'novikov',\n",
" 1045: 'following',\n",
" 34761: 'munroe',\n",
" 52329: \"morita'\",\n",
" 52330: 'christenssen',\n",
" 23109: 'oatmeal',\n",
" 25263: 'fossey',\n",
" 40953: 'livered',\n",
" 13003: 'listens',\n",
" 76167: \"'marci\",\n",
" 52333: \"otis's\",\n",
" 23390: 'thanking',\n",
" 16022: 'maude',\n",
" 34762: 'extensions',\n",
" 52335: 'ameteurish',\n",
" 52336: \"commender's\",\n",
" 27664: 'agricultural',\n",
" 4521: 'convincingly',\n",
" 17642: 'fueled',\n",
" 54017: 'mahattan',\n",
" 40955: \"paris's\",\n",
" 52339: 'vulkan',\n",
" 52340: 'stapes',\n",
" 52341: 'odysessy',\n",
" 12262: 'harmon',\n",
" 4255: 'surfing',\n",
" 23497: 'halloran',\n",
" 49583: 'unbelieveably',\n",
" 52342: \"'offed'\",\n",
" 30610: 'quadrant',\n",
" 19513: 'inhabiting',\n",
" 34763: 'nebbish',\n",
" 40956: 'forebears',\n",
" 34764: 'skirmish',\n",
" 52343: 'ocassionally',\n",
" 52344: \"'resist\",\n",
" 21931: 'impactful',\n",
" 52345: 'spicier',\n",
" 40957: 'touristy',\n",
" 52346: \"'football'\",\n",
" 40958: 'webpage',\n",
" 52348: 'exurbia',\n",
" 52349: 'jucier',\n",
" 14904: 'professors',\n",
" 34765: 'structuring',\n",
" 30611: 'jig',\n",
" 40959: 'overlord',\n",
" 25264: 'disconnect',\n",
" 82204: 'sniffle',\n",
" 40960: 'slimeball',\n",
" 40961: 'jia',\n",
" 16831: 'milked',\n",
" 40962: 'banjoes',\n",
" 1240: 'jim',\n",
" 52351: 'workforces',\n",
" 52352: 'jip',\n",
" 52353: 'rotweiller',\n",
" 34766: 'mundaneness',\n",
" 52354: \"'ninja'\",\n",
" 11043: \"dead'\",\n",
" 40963: \"cipriani's\",\n",
" 20611: 'modestly',\n",
" 52355: \"professor'\",\n",
" 40964: 'shacked',\n",
" 34767: 'bashful',\n",
" 23391: 'sorter',\n",
" 16123: 'overpowering',\n",
" 18524: 'workmanlike',\n",
" 27665: 'henpecked',\n",
" 18525: 'sorted',\n",
" 52357: \"jōb's\",\n",
" 52358: \"'always\",\n",
" 34768: \"'baptists\",\n",
" 52359: 'dreamcatchers',\n",
" 52360: \"'silence'\",\n",
" 21932: 'hickory',\n",
" 52361: 'fun\\x97yet',\n",
" 52362: 'breakumentary',\n",
" 15499: 'didn',\n",
" 52363: 'didi',\n",
" 52364: 'pealing',\n",
" 40965: 'dispite',\n",
" 25265: \"italy's\",\n",
" 21933: 'instability',\n",
" 6542: 'quarter',\n",
" 12611: 'quartet',\n",
" 52365: 'padmé',\n",
" 52366: \"'bleedmedry\",\n",
" 52367: 'pahalniuk',\n",
" 52368: 'honduras',\n",
" 10789: 'bursting',\n",
" 41468: \"pablo's\",\n",
" 52370: 'irremediably',\n",
" 40966: 'presages',\n",
" 57835: 'bowlegged',\n",
" 65186: 'dalip',\n",
" 6263: 'entering',\n",
" 76175: 'newsradio',\n",
" 54153: 'presaged',\n",
" 27666: \"giallo's\",\n",
" 40967: 'bouyant',\n",
" 52371: 'amerterish',\n",
" 18526: 'rajni',\n",
" 30613: 'leeves',\n",
" 34770: 'macauley',\n",
" 615: 'seriously',\n",
" 52372: 'sugercoma',\n",
" 52373: 'grimstead',\n",
" 52374: \"'fairy'\",\n",
" 30614: 'zenda',\n",
" 52375: \"'twins'\",\n",
" 17643: 'realisation',\n",
" 27667: 'highsmith',\n",
" 7820: 'raunchy',\n",
" 40968: 'incentives',\n",
" 52377: 'flatson',\n",
" 35100: 'snooker',\n",
" 16832: 'crazies',\n",
" 14905: 'crazier',\n",
" 7097: 'grandma',\n",
" 52378: 'napunsaktha',\n",
" 30615: 'workmanship',\n",
" 52379: 'reisner',\n",
" 61309: \"sanford's\",\n",
" 52380: '\\x91doña',\n",
" 6111: 'modest',\n",
" 19156: \"everything's\",\n",
" 40969: 'hamer',\n",
" 52382: \"couldn't'\",\n",
" 13004: 'quibble',\n",
" 52383: 'socking',\n",
" 21934: 'tingler',\n",
" 52384: 'gutman',\n",
" 40970: 'lachlan',\n",
" 52385: 'tableaus',\n",
" 52386: 'headbanger',\n",
" 2850: 'spoken',\n",
" 34771: 'cerebrally',\n",
" 23493: \"'road\",\n",
" 21935: 'tableaux',\n",
" 40971: \"proust's\",\n",
" 40972: 'periodical',\n",
" 52388: \"shoveller's\",\n",
" 25266: 'tamara',\n",
" 17644: 'affords',\n",
" 3252: 'concert',\n",
" 87958: \"yara's\",\n",
" 52389: 'someome',\n",
" 8427: 'lingering',\n",
" 41514: \"abraham's\",\n",
" 34772: 'beesley',\n",
" 34773: 'cherbourg',\n",
" 28627: 'kagan',\n",
" 9100: 'snatch',\n",
" 9263: \"miyazaki's\",\n",
" 25267: 'absorbs',\n",
" 40973: \"koltai's\",\n",
" 64030: 'tingled',\n",
" 19514: 'crossroads',\n",
" 16124: 'rehab',\n",
" 52392: 'falworth',\n",
" 52393: 'sequals',\n",
" ...}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_index = imdb.get_word_index()\n",
"word_index = { k: (v + 3) for k, v in word_index.items() }\n",
"word_index[\"PAD\"] = 0\n",
"word_index[\"START\"] = 1\n",
"word_index[\"UNK\"] = 2\n",
"index_word = { v: k for k, v in word_index.items() }\n",
"index_word"
]
},
{
"cell_type": "markdown",
"id": "a04781ef",
"metadata": {},
"source": [
"#### Вывод первого отзыва из тренировочной выборки\n",
"\n",
"Отзывы содержат только идентификаторы для экономии памяти"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "059670a8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 530,\n",
" 973,\n",
" 1622,\n",
" 1385,\n",
" 65,\n",
" 458,\n",
" 4468,\n",
" 66,\n",
" 3941,\n",
" 2,\n",
" 173,\n",
" 2,\n",
" 256,\n",
" 2,\n",
" 2,\n",
" 100,\n",
" 2,\n",
" 838,\n",
" 112,\n",
" 50,\n",
" 670,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 480,\n",
" 284,\n",
" 2,\n",
" 150,\n",
" 2,\n",
" 172,\n",
" 112,\n",
" 167,\n",
" 2,\n",
" 336,\n",
" 385,\n",
" 2,\n",
" 2,\n",
" 172,\n",
" 4536,\n",
" 1111,\n",
" 2,\n",
" 546,\n",
" 2,\n",
" 2,\n",
" 447,\n",
" 2,\n",
" 192,\n",
" 50,\n",
" 2,\n",
" 2,\n",
" 147,\n",
" 2025,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 1920,\n",
" 4613,\n",
" 469,\n",
" 2,\n",
" 2,\n",
" 71,\n",
" 87,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 530,\n",
" 2,\n",
" 76,\n",
" 2,\n",
" 2,\n",
" 1247,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 515,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 626,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 62,\n",
" 386,\n",
" 2,\n",
" 2,\n",
" 316,\n",
" 2,\n",
" 106,\n",
" 2,\n",
" 2,\n",
" 2223,\n",
" 2,\n",
" 2,\n",
" 480,\n",
" 66,\n",
" 3785,\n",
" 2,\n",
" 2,\n",
" 130,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 619,\n",
" 2,\n",
" 2,\n",
" 124,\n",
" 51,\n",
" 2,\n",
" 135,\n",
" 2,\n",
" 2,\n",
" 1415,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 215,\n",
" 2,\n",
" 77,\n",
" 52,\n",
" 2,\n",
" 2,\n",
" 407,\n",
" 2,\n",
" 82,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 107,\n",
" 117,\n",
" 2,\n",
" 2,\n",
" 256,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 3766,\n",
" 2,\n",
" 723,\n",
" 2,\n",
" 71,\n",
" 2,\n",
" 530,\n",
" 476,\n",
" 2,\n",
" 400,\n",
" 317,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 1029,\n",
" 2,\n",
" 104,\n",
" 88,\n",
" 2,\n",
" 381,\n",
" 2,\n",
" 297,\n",
" 98,\n",
" 2,\n",
" 2071,\n",
" 56,\n",
" 2,\n",
" 141,\n",
" 2,\n",
" 194,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 226,\n",
" 2,\n",
" 2,\n",
" 134,\n",
" 476,\n",
" 2,\n",
" 480,\n",
" 2,\n",
" 144,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 51,\n",
" 2,\n",
" 2,\n",
" 224,\n",
" 92,\n",
" 2,\n",
" 104,\n",
" 2,\n",
" 226,\n",
" 65,\n",
" 2,\n",
" 2,\n",
" 1334,\n",
" 88,\n",
" 2,\n",
" 2,\n",
" 283,\n",
" 2,\n",
" 2,\n",
" 4472,\n",
" 113,\n",
" 103,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 178,\n",
" 2]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train[0]"
]
},
{
"cell_type": "markdown",
"id": "d0eca5f5",
"metadata": {},
"source": [
"#### Можно заменить идентификаторы на реальные слова с учетом предобработки"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "4c1912fa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"UNK UNK UNK UNK UNK brilliant casting location scenery story direction everyone's really suited UNK part UNK played UNK UNK could UNK imagine being there robert UNK UNK UNK amazing actor UNK now UNK same being director UNK father came UNK UNK same scottish island UNK myself UNK UNK loved UNK fact there UNK UNK real connection UNK UNK UNK UNK witty remarks throughout UNK UNK were great UNK UNK UNK brilliant UNK much UNK UNK bought UNK UNK UNK soon UNK UNK UNK released UNK UNK UNK would recommend UNK UNK everyone UNK watch UNK UNK fly UNK UNK amazing really cried UNK UNK end UNK UNK UNK sad UNK UNK know what UNK say UNK UNK cry UNK UNK UNK UNK must UNK been good UNK UNK definitely UNK also UNK UNK UNK two little UNK UNK played UNK UNK UNK norman UNK paul UNK were UNK brilliant children UNK often left UNK UNK UNK UNK list UNK think because UNK stars UNK play them UNK grown up UNK such UNK big UNK UNK UNK whole UNK UNK these children UNK amazing UNK should UNK UNK UNK what UNK UNK done don't UNK think UNK whole story UNK UNK lovely because UNK UNK true UNK UNK someone's life after UNK UNK UNK UNK UNK us UNK\""
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\" \".join(index_word[id] for id in X_train[0])"
]
},
{
"cell_type": "markdown",
"id": "ab903f3e",
"metadata": {},
"source": [
"#### Можно вывести изначальный отзыв (если выключить удаление редких слов и стоп-слов)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "faf79c8d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"START this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all\""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(textual_X_train, _), _ = imdb.load_data()\n",
"\" \".join(index_word[id] for id in textual_X_train[0])"
]
},
{
"cell_type": "markdown",
"id": "35e4c578",
"metadata": {},
"source": [
"#### Приведение отзывов к длине max_length (100)\n",
"\n",
"padding и truncating - дополнение и обрезка отзывов начинается с начала (учитывается специфика затухания градиента в рекуррентных сетях)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "131e125a",
"metadata": {},
"outputs": [],
"source": [
"from keras.api.preprocessing.sequence import pad_sequences\n",
"\n",
"X_train = pad_sequences(X_train, maxlen=max_length, padding=\"pre\", truncating=\"pre\", value=0)\n",
"X_valid = pad_sequences(X_valid, maxlen=max_length, padding=\"pre\", truncating=\"pre\", value=0)"
]
},
{
"cell_type": "markdown",
"id": "87eac800",
"metadata": {},
"source": [
"#### Формирование архитектуры глубокой полносвязанной сети\n",
"\n",
"Первый слой (Embedding) выполняет векторизацию"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6a2e7a0e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_1\"</span>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential_1\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">100</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">320,000</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Flatten</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">6400</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">409,664</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">65</span> │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m100\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m320,000\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten_1 (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6400\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m409,664\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_3 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m65\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">729,729</span> (2.78 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m729,729\u001b[0m (2.78 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">729,729</span> (2.78 MB)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m729,729\u001b[0m (2.78 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from keras.api.models import Sequential\n",
"from keras.api.layers import Dense, Flatten, Dropout, Embedding, InputLayer\n",
"\n",
"simple_model = Sequential()\n",
"simple_model.add(InputLayer(shape=(max_length,), dtype=\"float32\"))\n",
"simple_model.add(Embedding(unique_words, 64))\n",
"simple_model.add(Flatten())\n",
"simple_model.add(Dense(64, activation=\"relu\"))\n",
"simple_model.add(Dropout(0.5))\n",
"simple_model.add(Dense(1, activation=\"sigmoid\"))\n",
"\n",
"simple_model.summary()"
]
},
{
"cell_type": "markdown",
"id": "0ff9c40a",
"metadata": {},
"source": [
"#### Обучение модели\n",
"\n",
"Веса модели сохраняются в каталог tmp после каждой эпохи обучения с помощью callback-параметра\n",
"\n",
"В дальнейшем веса можно загрузить"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "52043fc5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/4\n",
"\u001b[1m196/196\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 19ms/step - accuracy: 0.5753 - loss: 0.6517 - val_accuracy: 0.8346 - val_loss: 0.3689\n",
"Epoch 2/4\n",
"\u001b[1m196/196\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 17ms/step - accuracy: 0.8922 - loss: 0.2751 - val_accuracy: 0.8460 - val_loss: 0.3510\n",
"Epoch 3/4\n",
"\u001b[1m196/196\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 18ms/step - accuracy: 0.9724 - loss: 0.1080 - val_accuracy: 0.8335 - val_loss: 0.4402\n",
"Epoch 4/4\n",
"\u001b[1m196/196\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 18ms/step - accuracy: 0.9974 - loss: 0.0224 - val_accuracy: 0.8337 - val_loss: 0.5407\n"
]
},
{
"data": {
"text/plain": [
"<keras.src.callbacks.history.History at 0x365389a90>"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from keras.api.callbacks import ModelCheckpoint\n",
"\n",
"simple_model.compile(\n",
" loss=\"binary_crossentropy\",\n",
" optimizer=\"adam\",\n",
" metrics=[\"accuracy\"],\n",
")\n",
"\n",
"simple_model.fit(\n",
" X_train,\n",
" y_train,\n",
" batch_size=128,\n",
" epochs=4,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[ModelCheckpoint(filepath=output_dir + \"/simple_weights.{epoch:02d}.keras\")],\n",
")"
]
},
{
"cell_type": "markdown",
"id": "3c495301",
"metadata": {},
"source": [
"#### Загрузка лучшей модели и оценка ее качества\n",
"\n",
"Качество модели - 84.6 %."
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "73443ddb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 5ms/step - accuracy: 0.8436 - loss: 0.3559\n"
]
},
{
"data": {
"text/plain": [
"[0.3510318398475647, 0.8459600210189819]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"simple_model.load_weights(output_dir + \"/simple_weights.02.keras\")\n",
"simple_model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "markdown",
"id": "b1157104",
"metadata": {},
"source": [
"#### Визуализация распределения вероятностей результатов модели на валидационной выборке"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "069236c0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGdCAYAAAAMm0nCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvmklEQVR4nO3deXQUZb7/8U8WuhOW7gCabjIEiHIFoqgDjNAqzqC5RIxeHXAUzSAj24UJ3iE5l+3KIOICooKgLFdRwpyBQZgjXiUCxiBwlLAYjUaWqAMaZrAbHUwaELLW7w9+KWkBh45ZeOL7dU6d06nnW9XfegTqY6WqO8KyLEsAAAAGiWzqBgAAAMJFgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGCe6qRtoKDU1NTp06JDatGmjiIiIpm4HAACcB8uydPToUSUkJCgy8tzXWZptgDl06JASExObug0AAFAHBw8eVMeOHc853mwDTJs2bSSdmgCXy9XE3QCoN1XHpVcSTr0efEiKbtW0/QCoV8FgUImJifZ5/FyabYCp/bWRy+UiwADNSVWU1PL/v3a5CDBAM/Wvbv/gJl4AAGCcsAJMdXW1/vjHPyopKUmxsbG69NJL9cgjj+j0L7S2LEvTp09Xhw4dFBsbq5SUFH366ach+zly5IjS09PlcrkUFxenkSNH6tixYyE1H330kfr376+YmBglJiZqzpw5P+IwAQBAcxJWgHniiSe0ePFiPffcc9q7d6+eeOIJzZkzR88++6xdM2fOHC1YsEBLlizRjh071KpVK6WmpurkyZN2TXp6unbv3q3c3FytW7dOW7du1ZgxY+zxYDCogQMHqnPnziooKNCTTz6pGTNm6Pnnn6+HQwYAAMazwpCWlmaNGDEiZN3gwYOt9PR0y7Isq6amxvJ6vdaTTz5pj5eWllpOp9P6y1/+YlmWZe3Zs8eSZO3atcuuWb9+vRUREWH94x//sCzLshYtWmS1bdvWKi8vt2smT55sdevW7bx7LSsrsyRZZWVl4RwigAtd5THLWqFTS+Wxpu4GQD073/N3WFdgrr32WuXl5emTTz6RJH344Yd65513NGjQIEnSgQMH5Pf7lZKSYm/jdrvVt29f5efnS5Ly8/MVFxenPn362DUpKSmKjIzUjh077JobbrhBDofDrklNTVVxcbG++eabs/ZWXl6uYDAYsgAAgOYprKeQpkyZomAwqO7duysqKkrV1dV67LHHlJ6eLkny+/2SJI/HE7Kdx+Oxx/x+v+Lj40ObiI5Wu3btQmqSkpLO2EftWNu2bc/obdasWXr44YfDORwAAGCosK7ArF69WitWrNDKlSv1/vvva/ny5Xrqqae0fPnyhurvvE2dOlVlZWX2cvDgwaZuCQAANJCwrsBMnDhRU6ZM0dChQyVJPXv21BdffKFZs2Zp+PDh8nq9kqRAIKAOHTrY2wUCAV199dWSJK/Xq8OHD4fst6qqSkeOHLG393q9CgQCITW1P9fWfJ/T6ZTT6QzncAAAgKHCugLz7bffnvG9BFFRUaqpqZEkJSUlyev1Ki8vzx4PBoPasWOHfD6fJMnn86m0tFQFBQV2zaZNm1RTU6O+ffvaNVu3blVlZaVdk5ubq27dup3110cAAOCnJawAc9ttt+mxxx5TTk6OPv/8c61du1Zz587Vr3/9a0mnPjVvwoQJevTRR/Xaa6+pqKhI9913nxISEnTHHXdIknr06KGbb75Zo0eP1s6dO/Xuu+9q/PjxGjp0qBISTn08+L333iuHw6GRI0dq9+7devnllzV//nxlZWXV79EDAAAzhfNoUzAYtP7whz9YnTp1smJiYqxLLrnEevDBB0Med66pqbH++Mc/Wh6Px3I6ndZNN91kFRcXh+znn//8p3XPPfdYrVu3tlwul3X//fdbR48eDan58MMPreuvv95yOp3Wz372M2v27NnhtMpj1EBzxWPUQLN2vufvCMs67WN0m5FgMCi3262ysjK+CwloTqqOS6tbn3p91zG+CwloZs73/M13IQEAAOMQYAAAgHHCeowaAADUvy5Tcpq6hbB9PjutSd+fKzAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOOEFWC6dOmiiIiIM5aMjAxJ0smTJ5WRkaH27durdevWGjJkiAKBQMg+SkpKlJaWppYtWyo+Pl4TJ05UVVVVSM3mzZvVq1cvOZ1Ode3aVdnZ2T/uKAEAQLMSVoDZtWuXvvzyS3vJzc2VJP3mN7+RJGVmZur111/XmjVrtGXLFh06dEiDBw+2t6+urlZaWpoqKiq0bds2LV++XNnZ2Zo+fbpdc+DAAaWlpWnAgAEqLCzUhAkTNGrUKG3cuLE+jhcAADQDEZZlWXXdeMKECVq3bp0+/fRTBYNBXXzxxVq5cqXuvPNOSdK+ffvUo0cP5efnq1+/flq/fr1uvfVWHTp0SB6PR5K0ZMkSTZ48WV999ZUcDocmT56snJwcffzxx/b7DB06VKWlpdqwYcN59xYMBuV2u1VWViaXy1XXQwRwoak6Lq1ufer1Xcek6FZN2w9QD7pMyWnqFsL2+ey0Btnv+Z6/63wPTEVFhf785z9rxIgRioiIUEFBgSorK5WSkmLXdO/eXZ06dVJ+fr4kKT8/Xz179rTDiySlpqYqGAxq9+7dds3p+6itqd3HuZSXlysYDIYsAACgeapzgHn11VdVWlqq3/3ud5Ikv98vh8OhuLi4kDqPxyO/32/XnB5easdrx36oJhgM6sSJE+fsZ9asWXK73faSmJhY10MDAAAXuDoHmBdffFGDBg1SQkJCffZTZ1OnTlVZWZm9HDx4sKlbAgAADSS6Lht98cUXeuutt/TKK6/Y67xeryoqKlRaWhpyFSYQCMjr9do1O3fuDNlX7VNKp9d8/8mlQCAgl8ul2NjYc/bkdDrldDrrcjgAAMAwdboCs2zZMsXHxyst7bsbeHr37q0WLVooLy/PXldcXKySkhL5fD5Jks/nU1FRkQ4fPmzX5ObmyuVyKTk52a45fR+1NbX7AAAACDvA1NTUaNmyZRo+fLiio7+7gON2uzVy5EhlZWXp7bffVkFBge6//375fD7169dPkjRw4EAlJydr2LBh+vDDD7Vx40ZNmzZNGRkZ9tWTsWPHav/+/Zo0aZL27dunRYsWafXq1crMzKynQwYAAKYL+1dIb731lkpKSjRixIgzxubNm6fIyEgNGTJE5eXlSk1N1aJFi+zxqKgorVu3TuPGjZPP51OrVq00fPhwzZw5065JSkpSTk6OMjMzNX/+fHXs2FFLly5VampqHQ8RAAA0Nz/qc2AuZHwODNBM8TkwaIb4HJjvNPjnwAAAADQVAgwAADBOnR6j/qnjUh8AAE2LKzAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOOEHWD+8Y9/6Le//a3at2+v2NhY9ezZU++99549blmWpk+frg4dOig2NlYpKSn69NNPQ/Zx5MgRpaeny+VyKS4uTiNHjtSxY8dCaj766CP1799fMTExSkxM1Jw5c+p4iAAAoLkJK8B88803uu6669SiRQutX79ee/bs0dNPP622bdvaNXPmzNGCBQu0ZMkS7dixQ61atVJqaqpOnjxp16Snp2v37t3Kzc3VunXrtHXrVo0ZM8YeDwaDGjhwoDp37qyCggI9+eSTmjFjhp5//vl6OGQAAGC66HCKn3jiCSUmJmrZsmX2uqSkJPu1ZVl65plnNG3aNN1+++2SpD/96U/yeDx69dVXNXToUO3du1cbNmzQrl271KdPH0nSs88+q1tuuUVPPfWUEhIStGLFClVUVOill16Sw+HQ5ZdfrsLCQs2dOzck6AAAgJ+msK7AvPbaa+rTp49+85vfKD4+Xj//+c/1wgsv2OMHDhyQ3+9XSkqKvc7tdqtv377Kz8+XJOXn5ysuLs4OL5KUkpKiyMhI7dixw6654YYb5HA47JrU1FQVFxfrm2++OWtv5eXlCgaDIQsAAGiewgow+/fv1+LFi/Vv//Zv2rhxo8aNG6f/+q//0vLlyyVJfr9fkuTxeEK283g89pjf71d8fHzIeHR0tNq1axdSc7Z9nP4e3zdr1iy53W57SUxMDOfQAACAQcIKMDU1NerVq5cef/xx/fznP9eYMWM0evRoLVmypKH6O29Tp05VWVmZvRw8eLCpWwIAAA0krADToUMHJScnh6zr0aOHSkpKJEler1eSFAgEQmoCgYA95vV6dfjw4ZDxqqoqHTlyJKTmbPs4/T2+z+l0yuVyhSwAAKB5CivAXHfddSouLg5Z98knn6hz586STt3Q6/V6lZeXZ48Hg0Ht2LFDPp9PkuTz+VRaWqqCggK7ZtOmTaqpqVHfvn3tmq1bt6qystKuyc3NVbdu3UKeeAIAAD9NYQWYzMxMbd++XY8//rg+++wzrVy5Us8//7wyMjIkSREREZowYYIeffRRvfbaayoqKtJ9992nhIQE3XHHHZJOXbG5+eabNXr0aO3cuVPvvvuuxo8fr6FDhyohIUGSdO+998rhcGjkyJHavXu3Xn75Zc2fP19ZWVn1e/QAAMBIYT1G/Ytf/EJr167V1KlTNXPmTCUlJemZZ55Renq6XTNp0iQdP35cY8aMUWlpqa6//npt2LBBMTExds2KFSs0fvx43XTTTYqMjNSQIUO0YMECe9ztduvNN99URkaGevfurYsuukjTp0/nEWoAACBJirAsy2rqJhpCMBiU2+1WWVlZvd8P02VKTr3urzF8PjutqVsA6kfVcWl161Ov7zomRbdq2n6AesB55Tvne/7mu5AAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDhhBZgZM2YoIiIiZOnevbs9fvLkSWVkZKh9+/Zq3bq1hgwZokAgELKPkpISpaWlqWXLloqPj9fEiRNVVVUVUrN582b16tVLTqdTXbt2VXZ2dt2PEAAANDthX4G5/PLL9eWXX9rLO++8Y49lZmbq9ddf15o1a7RlyxYdOnRIgwcPtserq6uVlpamiooKbdu2TcuXL1d2dramT59u1xw4cEBpaWkaMGCACgsLNWHCBI0aNUobN278kYcKAACai+iwN4iOltfrPWN9WVmZXnzxRa1cuVI33nijJGnZsmXq0aOHtm/frn79+unNN9/Unj179NZbb8nj8ejqq6/WI488osmTJ2vGjBlyOBxasmSJkpKS9PTTT0uSevTooXfeeUfz5s1TamrqjzxcAADQHIR9BebTTz9VQkKCLrnkEqWnp6ukpESSVFBQoMrKSqWkpNi13bt3V6dOnZSfny9Jys/PV8+ePeXxeOya1NRUBYNB7d692645fR+1NbX7OJfy8nIFg8GQBQAANE9hBZi+ffsqOztbGzZs0OLFi3XgwAH1799fR48eld/vl8PhUFxcXMg2Ho9Hfr9fkuT3+0PCS+147dgP1QSDQZ04ceKcvc2aNUtut9teEhMTwzk0AABgkLB+hTRo0CD79ZVXXqm+ffuqc+fOWr16tWJjY+u9uXBMnTpVWVlZ9s/BYJAQAwBAM/WjHqOOi4vTZZddps8++0xer1cVFRUqLS0NqQkEAvY9M16v94ynkmp//lc1LpfrB0OS0+mUy+UKWQAAQPP0owLMsWPH9Le//U0dOnRQ79691aJFC+Xl5dnjxcXFKikpkc/nkyT5fD4VFRXp8OHDdk1ubq5cLpeSk5PtmtP3UVtTuw8AAICwAsx///d/a8uWLfr888+1bds2/frXv1ZUVJTuueceud1ujRw5UllZWXr77bdVUFCg+++/Xz6fT/369ZMkDRw4UMnJyRo2bJg+/PBDbdy4UdOmTVNGRoacTqckaezYsdq/f78mTZqkffv2adGiRVq9erUyMzPr/+gBAICRwroH5u9//7vuuece/fOf/9TFF1+s66+/Xtu3b9fFF18sSZo3b54iIyM1ZMgQlZeXKzU1VYsWLbK3j4qK0rp16zRu3Dj5fD61atVKw4cP18yZM+2apKQk5eTkKDMzU/Pnz1fHjh21dOlSHqEGAAC2CMuyrKZuoiEEg0G53W6VlZXV+/0wXabk1Ov+GsPns9OaugWgflQdl1a3PvX6rmNSdKum7QeoB5xXvnO+52++CwkAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjPOjAszs2bMVERGhCRMm2OtOnjypjIwMtW/fXq1bt9aQIUMUCARCtispKVFaWppatmyp+Ph4TZw4UVVVVSE1mzdvVq9eveR0OtW1a1dlZ2f/mFYBAEAzUucAs2vXLv3v//6vrrzyypD1mZmZev3117VmzRpt2bJFhw4d0uDBg+3x6upqpaWlqaKiQtu2bdPy5cuVnZ2t6dOn2zUHDhxQWlqaBgwYoMLCQk2YMEGjRo3Sxo0b69ouAABoRuoUYI4dO6b09HS98MILatu2rb2+rKxML774oubOnasbb7xRvXv31rJly7Rt2zZt375dkvTmm29qz549+vOf/6yrr75agwYN0iOPPKKFCxeqoqJCkrRkyRIlJSXp6aefVo8ePTR+/HjdeeedmjdvXj0cMgAAMF2dAkxGRobS0tKUkpISsr6goECVlZUh67t3765OnTopPz9fkpSfn6+ePXvK4/HYNampqQoGg9q9e7dd8/19p6am2vs4m/LycgWDwZAFAAA0T9HhbrBq1Sq9//772rVr1xljfr9fDodDcXFxIes9Ho/8fr9dc3p4qR2vHfuhmmAwqBMnTig2NvaM9541a5YefvjhcA8HAAAYKKwrMAcPHtQf/vAHrVixQjExMQ3VU51MnTpVZWVl9nLw4MGmbgkAADSQsAJMQUGBDh8+rF69eik6OlrR0dHasmWLFixYoOjoaHk8HlVUVKi0tDRku0AgIK/XK0nyer1nPJVU+/O/qnG5XGe9+iJJTqdTLpcrZAEAAM1TWAHmpptuUlFRkQoLC+2lT58+Sk9Pt1+3aNFCeXl59jbFxcUqKSmRz+eTJPl8PhUVFenw4cN2TW5urlwul5KTk+2a0/dRW1O7DwAA8NMW1j0wbdq00RVXXBGyrlWrVmrfvr29fuTIkcrKylK7du3kcrn0wAMPyOfzqV+/fpKkgQMHKjk5WcOGDdOcOXPk9/s1bdo0ZWRkyOl0SpLGjh2r5557TpMmTdKIESO0adMmrV69Wjk5OfVxzAAAwHBh38T7r8ybN0+RkZEaMmSIysvLlZqaqkWLFtnjUVFRWrduncaNGyefz6dWrVpp+PDhmjlzpl2TlJSknJwcZWZmav78+erYsaOWLl2q1NTU+m4XAAAYKMKyLKupm2gIwWBQbrdbZWVl9X4/TJcp5l0J+nx2WlO3ANSPquPS6tanXt91TIpu1bT9APWA88p3zvf8zXchAQAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4YQWYxYsX68orr5TL5ZLL5ZLP59P69evt8ZMnTyojI0Pt27dX69atNWTIEAUCgZB9lJSUKC0tTS1btlR8fLwmTpyoqqqqkJrNmzerV69ecjqd6tq1q7Kzs+t+hAAAoNkJK8B07NhRs2fPVkFBgd577z3deOONuv3227V7925JUmZmpl5//XWtWbNGW7Zs0aFDhzR48GB7++rqaqWlpamiokLbtm3T8uXLlZ2drenTp9s1Bw4cUFpamgYMGKDCwkJNmDBBo0aN0saNG+vpkAEAgOkiLMuyfswO2rVrpyeffFJ33nmnLr74Yq1cuVJ33nmnJGnfvn3q0aOH8vPz1a9fP61fv1633nqrDh06JI/HI0lasmSJJk+erK+++koOh0OTJ09WTk6OPv74Y/s9hg4dqtLSUm3YsOG8+woGg3K73SorK5PL5foxh3iGLlNy6nV/jeHz2WlN3QJQP6qOS6tbn3p91zEpulXT9gPUA84r3znf83ed74Gprq7WqlWrdPz4cfl8PhUUFKiyslIpKSl2Tffu3dWpUyfl5+dLkvLz89WzZ087vEhSamqqgsGgfRUnPz8/ZB+1NbX7OJfy8nIFg8GQBQAANE9hB5iioiK1bt1aTqdTY8eO1dq1a5WcnCy/3y+Hw6G4uLiQeo/HI7/fL0ny+/0h4aV2vHbsh2qCwaBOnDhxzr5mzZolt9ttL4mJieEeGgAAMETYAaZbt24qLCzUjh07NG7cOA0fPlx79uxpiN7CMnXqVJWVldnLwYMHm7olAADQQKLD3cDhcKhr166SpN69e2vXrl2aP3++7r77blVUVKi0tDTkKkwgEJDX65Ukeb1e7dy5M2R/tU8pnV7z/SeXAoGAXC6XYmNjz9mX0+mU0+kM93AAAICBfvTnwNTU1Ki8vFy9e/dWixYtlJeXZ48VFxerpKREPp9PkuTz+VRUVKTDhw/bNbm5uXK5XEpOTrZrTt9HbU3tPgAAAMK6AjN16lQNGjRInTp10tGjR7Vy5Upt3rxZGzdulNvt1siRI5WVlaV27drJ5XLpgQcekM/nU79+/SRJAwcOVHJysoYNG6Y5c+bI7/dr2rRpysjIsK+ejB07Vs8995wmTZqkESNGaNOmTVq9erVycsy7QxsAADSMsALM4cOHdd999+nLL7+U2+3WlVdeqY0bN+rf//3fJUnz5s1TZGSkhgwZovLycqWmpmrRokX29lFRUVq3bp3GjRsnn8+nVq1aafjw4Zo5c6Zdk5SUpJycHGVmZmr+/Pnq2LGjli5dqtTU1Ho6ZAAAYLof/TkwFyo+ByYUnwODZoPPgUEzxHnlOw3+OTAAAABNhQADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA40Q3dQMAANSnLlNymroFNAKuwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwTlgBZtasWfrFL36hNm3aKD4+XnfccYeKi4tDak6ePKmMjAy1b99erVu31pAhQxQIBEJqSkpKlJaWppYtWyo+Pl4TJ05UVVVVSM3mzZvVq1cvOZ1Ode3aVdnZ2XU7QgAA0OyEFWC2bNmijIwMbd++Xbm5uaqsrNTAgQN1/PhxuyYzM1Ovv/661qxZoy1btujQoUMaPHiwPV5dXa20tDRVVFRo27ZtWr58ubKzszV9+nS75sCBA0pLS9OAAQNUWFioCRMmaNSoUdq4cWM9HDIAADBdhGVZVl03/uqrrxQfH68tW7bohhtuUFlZmS6++GKtXLlSd955pyRp37596tGjh/Lz89WvXz+tX79et956qw4dOiSPxyNJWrJkiSZPnqyvvvpKDodDkydPVk5Ojj7++GP7vYYOHarS0lJt2LDhvHoLBoNyu90qKyuTy+Wq6yGeVZcpOfW6v8bw+ey0pm4BqB9Vx6XVrU+9vuuYFN2qafvBBcfEf6NN1FDnlfM9f/+oe2DKysokSe3atZMkFRQUqLKyUikpKXZN9+7d1alTJ+Xn50uS8vPz1bNnTzu8SFJqaqqCwaB2795t15y+j9qa2n2cTXl5uYLBYMgCAACapzoHmJqaGk2YMEHXXXedrrjiCkmS3++Xw+FQXFxcSK3H45Hf77drTg8vteO1Yz9UEwwGdeLEibP2M2vWLLndbntJTEys66EBAIALXJ0DTEZGhj7++GOtWrWqPvups6lTp6qsrMxeDh482NQtAQCABhJdl43Gjx+vdevWaevWrerYsaO93uv1qqKiQqWlpSFXYQKBgLxer12zc+fOkP3VPqV0es33n1wKBAJyuVyKjY09a09Op1NOp7Muh/OTYOrvhLl3BwBwNmFdgbEsS+PHj9fatWu1adMmJSUlhYz37t1bLVq0UF5enr2uuLhYJSUl8vl8kiSfz6eioiIdPnzYrsnNzZXL5VJycrJdc/o+amtq9wEAAH7awroCk5GRoZUrV+r//u//1KZNG/ueFbfbrdjYWLndbo0cOVJZWVlq166dXC6XHnjgAfl8PvXr10+SNHDgQCUnJ2vYsGGaM2eO/H6/pk2bpoyMDPsKytixY/Xcc89p0qRJGjFihDZt2qTVq1crJ8fMqwgAAKB+hXUFZvHixSorK9OvfvUrdejQwV5efvllu2bevHm69dZbNWTIEN1www3yer165ZVX7PGoqCitW7dOUVFR8vl8+u1vf6v77rtPM2fOtGuSkpKUk5Oj3NxcXXXVVXr66ae1dOlSpaam1sMhAwAA04V1BeZ8PjImJiZGCxcu1MKFC89Z07lzZ73xxhs/uJ9f/epX+uCDD8JpDwAA/ETwXUgAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjRDd1AwCAC1eXKTlN3QJwVlyBAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDo9R44Jm4iOcn89Oa+oWAKDZ4woMAAAwDgEGAAAYhwADAACME3aA2bp1q2677TYlJCQoIiJCr776asi4ZVmaPn26OnTooNjYWKWkpOjTTz8NqTly5IjS09PlcrkUFxenkSNH6tixYyE1H330kfr376+YmBglJiZqzpw54R8dAABolsIOMMePH9dVV12lhQsXnnV8zpw5WrBggZYsWaIdO3aoVatWSk1N1cmTJ+2a9PR07d69W7m5uVq3bp22bt2qMWPG2OPBYFADBw5U586dVVBQoCeffFIzZszQ888/X4dDBAAAzU3YTyENGjRIgwYNOuuYZVl65plnNG3aNN1+++2SpD/96U/yeDx69dVXNXToUO3du1cbNmzQrl271KdPH0nSs88+q1tuuUVPPfWUEhIStGLFClVUVOill16Sw+HQ5ZdfrsLCQs2dOzck6AAAgJ+mer0H5sCBA/L7/UpJSbHXud1u9e3bV/n5+ZKk/Px8xcXF2eFFklJSUhQZGakdO3bYNTfccIMcDoddk5qaquLiYn3zzTdnfe/y8nIFg8GQBQAANE/1GmD8fr8kyePxhKz3eDz2mN/vV3x8fMh4dHS02rVrF1Jztn2c/h7fN2vWLLndbntJTEz88QcEAAAuSM3mg+ymTp2qrKws++dgMEiIAXBBMfGDGYELVb1egfF6vZKkQCAQsj4QCNhjXq9Xhw8fDhmvqqrSkSNHQmrOto/T3+P7nE6nXC5XyAIAAJqneg0wSUlJ8nq9ysvLs9cFg0Ht2LFDPp9PkuTz+VRaWqqCggK7ZtOmTaqpqVHfvn3tmq1bt6qystKuyc3NVbdu3dS2bdv6bBkAABgo7ABz7NgxFRYWqrCwUNKpG3cLCwtVUlKiiIgITZgwQY8++qhee+01FRUV6b777lNCQoLuuOMOSVKPHj108803a/To0dq5c6feffddjR8/XkOHDlVCQoIk6d5775XD4dDIkSO1e/duvfzyy5o/f37Ir4gAAMBPV9j3wLz33nsaMGCA/XNtqBg+fLiys7M1adIkHT9+XGPGjFFpaamuv/56bdiwQTExMfY2K1as0Pjx43XTTTcpMjJSQ4YM0YIFC+xxt9utN998UxkZGerdu7cuuugiTZ8+nUeoYQQT73PgCygBmCbCsiyrqZtoCMFgUG63W2VlZfV+P4yJJyjghxgVYKqOS6tbS5J6FP1VJ6yYf7EBgIbQUP9unO/5u9k8hQSg7kwK5bERJ7W3Z1N3AaCp8WWOAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4F3SAWbhwobp06aKYmBj17dtXO3fubOqWAADABeCCDTAvv/yysrKy9NBDD+n999/XVVddpdTUVB0+fLipWwMAAE3sgg0wc+fO1ejRo3X//fcrOTlZS5YsUcuWLfXSSy81dWsAAKCJRTd1A2dTUVGhgoICTZ061V4XGRmplJQU5efnn3Wb8vJylZeX2z+XlZVJkoLBYL33V1P+bb3vE8D5qY44qeD//ytYXf6taqyapm0I+IlqiPPr6fu1LOsH6y7IAPP111+rurpaHo8nZL3H49G+ffvOus2sWbP08MMPn7E+MTGxQXoE0HTc9qv7mrAL4KfN/UzD7v/o0aNyu93nHL8gA0xdTJ06VVlZWfbPNTU1OnLkiNq3b6+IiIh6e59gMKjExEQdPHhQLper3vaLMzHXjYN5bhzMc+NgnhtHQ86zZVk6evSoEhISfrDuggwwF110kaKiohQIBELWBwIBeb3es27jdDrldDpD1sXFxTVUi3K5XPzlaCTMdeNgnhsH89w4mOfG0VDz/ENXXmpdkDfxOhwO9e7dW3l5efa6mpoa5eXlyefzNWFnAADgQnBBXoGRpKysLA0fPlx9+vTRNddco2eeeUbHjx/X/fff39StAQCAJnbBBpi7775bX331laZPny6/36+rr75aGzZsOOPG3sbmdDr10EMPnfHrKtQ/5rpxMM+Ng3luHMxz47gQ5jnC+lfPKQEAAFxgLsh7YAAAAH4IAQYAABiHAAMAAIxDgAEAAMYhwJzFwoUL1aVLF8XExKhv377auXPnD9avWbNG3bt3V0xMjHr27Kk33nijkTo1Xzhz/cILL6h///5q27at2rZtq5SUlH/53wanhPtnutaqVasUERGhO+64o2EbbCbCnefS0lJlZGSoQ4cOcjqduuyyy/j34zyEO8/PPPOMunXrptjYWCUmJiozM1MnT55spG7NtHXrVt12221KSEhQRESEXn311X+5zebNm9WrVy85nU517dpV2dnZDdukhRCrVq2yHA6H9dJLL1m7d++2Ro8ebcXFxVmBQOCs9e+++64VFRVlzZkzx9qzZ481bdo0q0WLFlZRUVEjd26ecOf63nvvtRYuXGh98MEH1t69e63f/e53ltvttv7+9783cudmCXeeax04cMD62c9+ZvXv39+6/fbbG6dZg4U7z+Xl5VafPn2sW265xXrnnXesAwcOWJs3b7YKCwsbuXOzhDvPK1assJxOp7VixQrrwIED1saNG60OHTpYmZmZjdy5Wd544w3rwQcftF555RVLkrV27dofrN+/f7/VsmVLKysry9qzZ4/17LPPWlFRUdaGDRsarEcCzPdcc801VkZGhv1zdXW1lZCQYM2aNeus9XfddZeVlpYWsq5v377Wf/7nfzZon81BuHP9fVVVVVabNm2s5cuXN1SLzUJd5rmqqsq69tprraVLl1rDhw8nwJyHcOd58eLF1iWXXGJVVFQ0VovNQrjznJGRYd14440h67KysqzrrruuQftsTs4nwEyaNMm6/PLLQ9bdfffdVmpqaoP1xa+QTlNRUaGCggKlpKTY6yIjI5WSkqL8/PyzbpOfnx9SL0mpqannrMcpdZnr7/v2229VWVmpdu3aNVSbxqvrPM+cOVPx8fEaOXJkY7RpvLrM82uvvSafz6eMjAx5PB5dccUVevzxx1VdXd1YbRunLvN87bXXqqCgwP410/79+/XGG2/olltuaZSefyqa4lx4wX4Sb1P4+uuvVV1dfcan/Xo8Hu3bt++s2/j9/rPW+/3+BuuzOajLXH/f5MmTlZCQcMZfGnynLvP8zjvv6MUXX1RhYWEjdNg81GWe9+/fr02bNik9PV1vvPGGPvvsM/3+979XZWWlHnroocZo2zh1med7771XX3/9ta6//npZlqWqqiqNHTtW//M//9MYLf9knOtcGAwGdeLECcXGxtb7e3IFBkaaPXu2Vq1apbVr1yomJqap22k2jh49qmHDhumFF17QRRdd1NTtNGs1NTWKj4/X888/r969e+vuu+/Wgw8+qCVLljR1a83K5s2b9fjjj2vRokV6//339corrygnJ0ePPPJIU7eGH4krMKe56KKLFBUVpUAgELI+EAjI6/WedRuv1xtWPU6py1zXeuqppzR79my99dZbuvLKKxuyTeOFO89/+9vf9Pnnn+u2226z19XU1EiSoqOjVVxcrEsvvbRhmzZQXf48d+jQQS1atFBUVJS9rkePHvL7/aqoqJDD4WjQnk1Ul3n+4x//qGHDhmnUqFGSpJ49e+r48eMaM2aMHnzwQUVG8v/x9eFc50KXy9UgV18krsCEcDgc6t27t/Ly8ux1NTU1ysvLk8/nO+s2Pp8vpF6ScnNzz1mPU+oy15I0Z84cPfLII9qwYYP69OnTGK0aLdx57t69u4qKilRYWGgv//Ef/6EBAwaosLBQiYmJjdm+Mery5/m6667TZ599ZgdESfrkk0/UoUMHwss51GWev/322zNCSm1otPgqwHrTJOfCBrs92FCrVq2ynE6nlZ2dbe3Zs8caM2aMFRcXZ/n9fsuyLGvYsGHWlClT7Pp3333Xio6Otp566ilr79691kMPPcRj1Ocp3LmePXu25XA4rL/+9a/Wl19+aS9Hjx5tqkMwQrjz/H08hXR+wp3nkpISq02bNtb48eOt4uJia926dVZ8fLz16KOPNtUhGCHceX7ooYesNm3aWH/5y1+s/fv3W2+++aZ16aWXWnfddVdTHYIRjh49an3wwQfWBx98YEmy5s6da33wwQfWF198YVmWZU2ZMsUaNmyYXV/7GPXEiROtvXv3WgsXLuQx6qbw7LPPWp06dbIcDod1zTXXWNu3b7fHfvnLX1rDhw8PqV+9erV12WWXWQ6Hw7r88sutnJycRu7YXOHMdefOnS1JZywPPfRQ4zdumHD/TJ+OAHP+wp3nbdu2WX379rWcTqd1ySWXWI899phVVVXVyF2bJ5x5rqystGbMmGFdeumlVkxMjJWYmGj9/ve/t7755pvGb9wgb7/99ln/va2d2+HDh1u//OUvz9jm6quvthwOh3XJJZdYy5Yta9AeIyyLa2gAAMAs3AMDAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHH+H/d199tGubfiAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.hist(simple_model.predict(X_valid))\n",
"_ = plt.axvline(x=0.5, color=\"orange\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv (3.12.10)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}