From 9c09239b0294471a2943b0d876b72c6e9b3d41a4 Mon Sep 17 00:00:00 2001 From: Aleksey Filippov Date: Fri, 6 Dec 2024 15:38:26 +0400 Subject: [PATCH] Lec3 --- data/orders/customers.csv | 101 + data/orders/order_items.csv | 116 + data/orders/orders.csv | 101 + data/orders/products.csv | 101 + data/orders/sellers.csv | 88 + lec3.ipynb | 4278 +++++++++++++++++++++++++++++++++++ poetry.lock | 128 +- pyproject.toml | 1 + 8 files changed, 4913 insertions(+), 1 deletion(-) create mode 100644 data/orders/customers.csv create mode 100644 data/orders/order_items.csv create mode 100644 data/orders/orders.csv create mode 100644 data/orders/products.csv create mode 100644 data/orders/sellers.csv create mode 100644 lec3.ipynb diff --git a/data/orders/customers.csv b/data/orders/customers.csv new file mode 100644 index 0000000..f3427ae --- /dev/null +++ b/data/orders/customers.csv @@ -0,0 +1,101 @@ +customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state +41ce2a54c0b03bf3443c3d931a367089,3a653a41f6f9fc3d2a113cf8398680e8,75265,vianopolis,GO +7f8c8b9c2ae27bf3300f670c3d478be8,634f09f6075fe9032e6c19609ffe995a,44024,feira de santana,BA +569cf68214806a39acc0f39344aea67f,c2551ea089b7ebbc67a2ea8757152514,44380,cruz das almas,BA +d3e3b74c766bc6214e0c830b17ee2341,e97109680b052ee858d93a539597bba7,35400,ouro preto,MG +d2b091571da224a1b36412c18bc3bbfe,d699688533772c15a061e8ce81cb56df,4001,sao paulo,SP +3b6828a50ffe546942b7a473d70ac0fc,ccafc1c3f270410521c3c6f3b249870f,74820,goiania,GO +148348ff65384b4249b762579532e248,db979bdfe0bbba29ecd3df3f6c50bea2,87711,paranavai,PR +4f28355e5c17a4a42d3ce2439a1d4501,4acce2834231e13b1514915adda5ec2b,21910,rio de janeiro,RJ +3187789bec990987628d7a9beb4dd6ac,6087cfc70fd833cf2db637a5e6e9d76b,88780,imbituba,SC +8628fac2267e8c8804525da99c10ed0e,7973a6ba9c81ecaeb3d628c33c7c7c48,85555,palmas,PR +4fa1cd166fa598be6de80fa84eaade43,68954feaafe4dd638f3bd3e2afa174ec,8473,sao paulo,SP +cf8ffeddf027932e51e4eae73b384059,6cbe8a392b76916e84c2faf69d0d0da0,13454,santa barbara d'oeste,SP +48558a50a7ba1aab61891936d2ca7681,42f80af2e6c585667e4eb416859ae153,39370,jequitai,MG +8b212b9525f9e74e85e37ed6df37693e,f4a7ef6bd931f83d75d83b71c94e90df,13568,sao carlos,SP +c77ee2d8ba1614a4d489a44166894938,9c9cef121cb812cb301babddc2d8331e,38067,uberaba,MG +be8c14c16a4d47194ccdfe10f1fc5b1a,c86a25b8f5f6c203bb3471553bdc3200,13157,cosmopolis,SP +f5618502bee8eafdee72fb6955e2ebdf,fa0ee7ceb94193fb02aa78ce3a55695a,6395,carapicuiba,SP +3df704f53d3f1d4818840b34ec672a9f,04cf8185c71090d28baa4407b2e6d600,5271,sao paulo,SP +67407057a7d5ee17d1cd09523f484d13,7cfba6e55439cae3fd2479d62fafe67f,22240,rio de janeiro,RJ +afb19a4b667cb708caab312757ec3d3f,a7e7b19ff34ab885f7b7331de2417cf3,78043,cuiaba,MT +caded193e8e47b8362864762a83db3c5,08fb46d35bb3ab4037202c23592d1259,13215,jundiai,SP +241e78de29b3090cfa1b5d73a8130c72,c63e44efa43f3947087aee96b388d949,4658,sao paulo,SP +62b423aab58096ca514ba6aa06be2f98,9c9242ad7f1b52d926ea76778e1c0c57,18052,sorocaba,SP +494dded5b201313c64ed7f100595b95c,f2a85dec752b8517b5e58a06ff3cd937,20780,rio de janeiro,RJ +e28dd4261bed9c7ba89ecaf411b88f7c,b6aa1d5781553afaa244c3e42246d93c,88302,itajai,SC +4afc1dcca5fe8926fc97d60a4497f8ab,a464f750556546a0989d9326ec003ccf,8220,sao paulo,SP +761df82feda9778854c6dafdaeb567e4,1428917cd397d4f9ac0fde76dd6f2266,69317,boa vista,RR +82f0b75bb50fcb30711e5277e36b3983,4a8c8f751984985cd49f74249da95aae,8485,sao paulo,SP +f5458ddc3545711efa883dd7ae7c4497,661a5e18a28b34880ccc60112f2b8e8e,62360,ibiapina,CE +295ae9b35379e077273387ff64354b6f,f1f4f45c8602d0db1329eed1c8e935d4,19780,quata,SP +3a874b4d4c4b6543206ff5d89287f0c3,a25d5f94840d3c6a1a49f271ed83f4ec,21715,rio de janeiro,RJ +f54a9f0e6b351c431402b8461ea51999,39382392765b6dc74812866ee5ee92a7,99655,faxinalzinho,RS +456dc10730fbdba34615447ea195d643,1974875b4a1d2e2ee6d586e3ba4d7602,5634,sao paulo,SP +f178c1827f67a8467b0385b7378d951a,9d9ab3b77f0416765b3fbedf94a942a4,12070,taubate,SP +0bf19317b1830a69e55b40710576aa7a,5ddb4fdd9cef2450d17ae20639815885,13218,jundiai,SP +8644be24d48806bc3a88fd59fb47ceb1,4ca5f90433afb5493247f0bafb583483,73350,brasilia,DF +3391c4bc11a817e7973e498b0b023158,1b542f810484d8c042aed33a7c61a218,4561,sao paulo,SP +fee181bf648906d1c57f84f216976286,4754e3b66497719a91b36268ed9c5718,13760,tapiratiba,SP +74805bc388861fa350ed2fade8444e0b,5d710d9a48ebb7fe5ffc2940ff29f346,38401,uberlandia,MG +6772a0a230a2667d16c3620f000e1348,c7a9a76a4b24a7e7b2caff982409b7ee,58600,santa luzia,PB +4632eb5a8f175f6fe020520ae0c678f3,6da92ae920ab16fc4eceb8fcd7bd43ce,8280,sao paulo,SP +d9ef95f98d8da3b492bb8c0447910498,a2649503b92028291f011a976619b322,26572,mesquita,RJ +059f7fc5719c7da6cbafe370971a8d70,d0ff1a7468fcc46b8fc658ab35d2a12c,13186,hortolandia,SP +ddaff536587109b89777e0353215e150,c796780c7daeab9e94cc052b1f103b21,26600,paracambi,RJ +dd5095632e3953fc0947b8ab5176b0be,da45a9a1df408c39f013b9b0b505042c,70680,brasilia,DF +df9b032b2ad0fd6bf37dfb48e5f83845,410979f3cfd34e467d4fad78bd0f0219,89440,irineopolis,SC +684fa6da5134b9e4dab731e00011712d,ddf60e20e6e262e2136801ce5cd628b0,49030,aracaju,SE +2b56e94c2f66f2d97cfa63356f69cee8,cc1a30280651daf2d918ed7868575771,95270,flores da cunha,RS +9f6618c17568ac301465fe7ad056c674,e3bcfea9bab07b492391664fc1ffc28a,44180,antonio cardoso,BA +29cb486c739f9774c8eb542e07b56cd2,2ae3c67452283d5a0d30b32e0d33296e,71505,brasilia,DF +5f16605299d698660e0606f7eae2d2f9,92fd8aa5948e20c43a014c44c025c5e1,77480,alvorada,TO +f88197465ea7920adcdbec7375364d82,7c142cf63193a1473d2e66489a9ae977,59296,sao goncalo do amarante,RN +5dda11942d4f77bee3a46d71e442aec4,6a0e43f0d7e1b5539e4c58a26ebe35da,46740,boninal,BA +a90391a47de936d56c66a5366cba1462,32de2a7a93dbfc527b3f584744b9c6ce,37310,bom jardim de minas,MG +9916715c2ab6ee1710c9c32f0a534ad2,bf0303939d54b8df5da4762bbaab1955,22631,rio de janeiro,RJ +636e15840ab051faa13d3f781b6e4233,65e5aaf9f721945f29cdba45c206cb83,14090,ribeirao preto,SP +8ab97904e6daea8866dbdbc4fb7aad2c,72632f0f9dd73dfee390c9b22eb56dd6,9195,santo andre,SP +388025bec8128ff20ec1a316ed4dcf02,f9effeed3df9ae063a58c0759b96f8b2,85804,cascavel,PR +c7340080e394356141681bd4c9b8fe31,3e4fd73f1e86b135b9b121d6abbe9597,19400,presidente venceslau,SP +503740e9ca751ccdda7ba28e9ab8f608,80bb27c7c16e8f973207a5086ab329e2,86320,congonhinhas,PR +816f8653d5361cbf94e58c33f2502a5c,37363700139c1aef873bbcd916e57dfd,5778,sao paulo,SP +9ef432eb6251297304e76186b10a928d,7c396fd4830fd04220f754e42b4e5bff,3149,sao paulo,SP +68451b39b1314302c08c65a29f1140fc,781ae350edb16842380e81d7c7feb431,20740,rio de janeiro,RJ +b673f0597cb0c4d12778f731045f361a,04e495a3f45df8b41be2e934bbc16961,94055,gravatai,RS +3a897024068ed42a183de61d5727d866,adeefbe14d26d3bf90facfeaae35d605,4845,sao paulo,SP +52142aa69d8d0e1247ab0cada0f76023,a6fefcd9f434474cf6fcd8ed1102fd63,55540,palmares,PE +55e6b290205c84ddd23ddf5eb134efd4,7f2eb9cf900070f2e7a7f0e95719f85b,13145,paulinia,SP +ed0271e0b7da060a393796590e7b737a,36edbb3fb164b1f16485364b6fb04c73,98900,santa rosa,RS +a9d37ddc8ba4d9f6dbac7d8ec378cc95,3c0402bcc3ec3b33fc4430eb6c08720a,89225,joinville,SC +b0830fb4747a6c6d20dea0b8c802d7ef,af07308b275d755c9edb36a90c618231,47813,barreiras,BA +bb2f5e670f7155dc622c57e4b31d0a69,31b8fa2573bde01af4737e8ed29c348b,2346,sao paulo,SP +756fb9391752dad934e0fe3733378e57,394b2ce444baae9ae609f5d32000de0f,47850,luis eduardo magalhaes,BA +5bb39c890c91b1d26801aa19a9336eac,a71cac9f356cfeb9db35061020806212,2407,sao paulo,SP +7e20bf5ca92da68200643bda76c504c6,576ea0cab426cd8a00fad9a9c90a4494,41213,salvador,BA +2a1dfb647f32f4390e7b857c67458536,5f7d7732b351ce851a158528581af05f,54330,jaboatao dos guararapes,PE +cce89a605105b148387c52e286ac8335,bd13608b9c6033892ce62269b50a0afc,9182,santo andre,SP +738b086814c6fcc74b8cc583f8516ee3,6e26bbeaa107ec34112c64e1ee31c0f5,21381,rio de janeiro,RJ +81e08b08e5ed4472008030d70327c71f,0e764fc1a13e47e900c3d59a989753e8,36045,juiz de fora,MG +911e4c37f5cafe1604fe6767034bf1ae,51838d41add414a0b1b989b7d251d9ee,13068,campinas,SP +f26a435864aebedff7f7c84f82ee229f,bb4d84a2b45b22ed710ac8c0dec63d1a,8552,poa,SP +9bdf08b4b3b52b5526ff42d37d47f222,932afa1e708222e5821dac9cd5db4cae,26525,nilopolis,RJ +64fb950e760ec8b0db79154a1fa9c1bf,b11b7871c2b8be2d11fab954f58542f2,18017,sorocaba,SP +bf141bf67fbe428d558bcf0e018eab60,c756e1910755edd88c00ac3f46baba4b,31255,belo horizonte,MG +3135962ee745ef39b85576df7ddbaa99,00b2ca23369b68c4d4105ecea9c0cb93,62970,alto santo,CE +1833a0540067becaf59368fe4cd4303a,ca73adc05ad5d0d880de79b5ea3253b3,4053,sao paulo,SP +7f2178c5d771e17f507d3c1637339298,12e7a2c201751ddc979e7a45cef500f3,1038,sao paulo,SP +c622b892a190735ef81c0087973fa16d,439ced9aafa171a1ac88efa951c7db0a,85618,flor da serra do sul,PR +79183cd650e2bb0d475b0067d45946ac,c77154776ead8e798c2d684205938f71,90620,porto alegre,RS +332df68ccac2f2f7d9e11299188f8bce,bb7ef994cc22b1fc694ac59fb377b824,39135,presidente kubitschek,MG +a166da34890074091a942054b36e4265,451e48381edab7f1f6dbfa6d728616ff,89070,blumenau,SC +f5afca14dfa9dc64251cf2b45c54c363,38cad70d154a4dcc42b598d5c01f7ef1,25211,duque de caxias,RJ +31ad1d1b63eb9962463f764d4e6e0c9d,299905e3934e9e181bfb2e164dd4b4f8,18075,sorocaba,SP +7711cf624183d843aafe81855097bc37,782987b81c92239d922aa49d6bd4200b,4278,sao paulo,SP +72ae281627a6102d9b3718528b420f8a,b8df986511d928829c3192c2ed081eba,3323,sao paulo,SP +12fd2740039676063a874b9567dfa651,372e0fc66eacb8698e4f9997d366d961,12230,sao jose dos campos,SP +19402a48fe860416adf93348aba37740,e2dfa3127fedbbca9707b36304996dab,4812,sao paulo,SP +9b18f3fc296990b97854e351334a32f6,b2cac0b16835dabf811b204127f58afa,6330,carapicuiba,SP +05e996469a2bf9559c7122b87e156724,5229b8e4d7d2b9b676c2083c17b1ecd0,93180,portao,RS +2932d241d1f31e6df6c701d52370ae02,f7603d34c795584792a484186233e6e5,3942,sao paulo,SP +93ada7a24817edda9f4ab998fa823d16,cd148470c375939669971e8a032b16b4,14091,ribeirao preto,SP diff --git a/data/orders/order_items.csv b/data/orders/order_items.csv new file mode 100644 index 0000000..fa664bb --- /dev/null +++ b/data/orders/order_items.csv @@ -0,0 +1,116 @@ +order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value +0a4a2fccb27bd83a892fa503987a595b,1,f7d7b5c58704fb359a74580622800051,4a3ca9315b744ce9f8e9374361493884,2017-04-28 20:55:09,38.5,24.84 +0e782c3705510e717d28907746cbda82,1,79da264732f717f10ebf5d102aa6c32a,562fc2f2c2863ab7e79a9e4388a58a14,2018-05-07 08:52:58,29.99,7.39 +10c320f977c6a18f91b2d14be13128c6,1,b3be1f83cef05668c25e134852d44545,3b15288545f8928d3e65a8f949a28291,2017-05-16 21:02:45,110.99,21.27 +116f0b09343b49556bbad5f35bee0cdf,1,a47295965bd091207681b541b26e40a5,ea8482cd71df3c1969d7b9473ff13abc,2018-01-02 23:50:22,27.99,15.1 +136cce7faa42fdb2cefd53fdc79a6098,1,a1804276d9941ac0733cfd409f5206eb,dc8798cbf453b7e0f98745e396cc5616,2017-04-19 13:25:17,49.9,16.05 +138849fd84dff2fb4ca70a0a34c4aa1c,1,304fad8dc4d2012dc4062839972f2d96,6860153b69cc696d5dcfe1cdaaafcf62,2018-02-08 02:53:07,39.47,13.37 +1790eea0b567cf50911c057cf20f90f9,1,2d8f2be4f08788ee3bf5356af2b2ee6c,d91fb3b7d041e83b64a00a3edfb37e4f,2018-04-22 22:10:26,186.9,38.0 +1e7aff52cdbb2451ace09d0f848c3699,1,8c591ab0ca519558779df02023177f44,a1043bafd471dff536d0c462352beb48,2017-05-25 19:05:17,119.99,34.2 +203096f03d82e0dffbc41ebc2e2bcfb7,1,5ac9d9e379c606e36a8094a6046f75dc,633ecdf879b94b5337cca303328e4a25,2017-09-25 04:04:09,109.9,8.96 +20e0101b20700188cadb288126949685,1,64d0feb1bcf9c7fe7b5dad3271c10910,e5a38146df062edaf55c38afa99e42dc,2018-01-26 19:36:35,89.18,16.38 +23f553848a03aaab35bb3f9f87725125,1,cac9e5692471a0700418aa3400b9b2b1,36890be00bbfc1cdb9a4a38a6af05a69,2018-06-15 09:31:23,99.2,18.57 +25f4376934e13d3508486352e11a5db0,1,aca2eb7d00ea1a7b8ebd4e68314663af,955fee9216a65b617aa5c0531780ce60,2018-05-22 01:17:39,69.9,12.43 +2711a938db643b3f0b62ee2c8a2784aa,1,ad1128daf194f4b6ac4256e16233497c,1ca7077d890b907f89be8c954a02686a,2017-12-29 02:15:31,45.0,14.1 +2807d0e504d6d4894d41672727bc139f,1,6893767814d1ac82a81bcd365e1cc918,8b321bb669392f5163d04c59e235e066,2018-02-08 20:50:22,9.5,7.78 +2ce1ad82022c1ba30c2079502ac725aa,1,f35927953ed82e19d06ad3aac2f06353,669ae81880e08f269a64487cfb287169,2017-08-17 04:15:29,115.0,15.56 +2edfd6d1f0b4cd0db4bf37b1b224d855,1,30469bb5ea377eae7121981e2f0778e4,80e6699fe29150b372a0c8a1ebf7dcc8,2017-06-21 03:05:45,113.0,28.15 +34513ce0c4fab462a55830c0989c7edb,1,f7e0fa615b386bc9a8b9eb52bc1fff76,87142160b41353c4e5fca2360caf6f92,2017-07-19 20:10:08,98.0,16.13 +3bc77ce8be27211bac313c2daa402d1a,1,f497ba62f1d6b4f6a3a3266fa8623ad3,6df688df543f90e9b38f4319e75a9d88,2017-04-12 22:50:24,58.2,8.78 +403b97836b0c04a622354cf531062e5f,1,638bbb2a5e4f360b71f332ddfebfd672,c4af86330efa7a2620772227d2d670c9,2018-01-12 19:09:04,1299.0,77.45 +40c5e18f7d112b59b3e5113a59a905b3,1,595fac2a385ac33a80bd5114aec74eb8,ef0ace09169ac090589d85746e3e036f,2018-06-15 10:58:32,119.9,8.78 +41bb5cee06dbf170878a9ef93ac7e7f5,1,43ee88561093499d9e571d4db5f20b79,23613d49c3ac2bd302259e55c06c050c,2018-05-28 08:52:24,10.9,12.79 +432aaf21d85167c2c86ec9448c4e42cc,1,72d3bf1d3a790f8874096fcf860e3eff,0bae85eb84b9fb3bd773911e89288d54,2018-03-07 15:10:47,38.25,16.11 +434d158e96bdd6972ad6e6d73ddcfd22,1,c7df652246ed7b3300aaf46960c141e4,a5cba26a62b8b4d0145b68b841e62e7f,2018-06-13 03:35:15,445.0,63.17 +47770eb9100c2d0c44946d9cf07ec65d,1,aa4383b373c6aca5d8797843e5594415,4869f7a5dfa277a7dca6462dcf3b52b2,2018-08-13 08:55:23,159.9,19.22 +47aa4816b27ba60ec948cd019cc1afc1,1,1501b0033c68a37fa9560033a440e529,33cbbec1e7e1044aaf11d152172c776f,2018-06-29 03:31:40,53.44,18.47 +53cdb2fc8bc7dce0b6741e2150273451,1,595fac2a385ac33a80bd5114aec74eb8,289cdb325fb7e7f891c38608bf9e0962,2018-07-30 03:24:27,118.7,22.76 +5820a1100976432c7968a52da59e9364,1,1deda1acffb44ed38494667d7e49a9f3,f52c2422904463fdd7741f99045fecb6,2018-07-31 11:44:19,33.9,18.34 +5acce57f8d9dfd55fa48e212a641a69d,1,0cd9f302c8a5b076ffa5c3567c6705fd,85d9eb9ddc5d00ca9336a2219c97bb13,2017-08-08 02:56:02,27.9,15.1 +5ff96c15d0b717ac6ad1f3d77225a350,1,10adb53d8faa890ca7c2f0cbcb68d777,1900267e848ceeba8fa32d80c1a5f5a8,2018-07-27 17:55:14,19.9,12.8 +60550084e6b4c0cb89a87df1f3e5ebd9,1,9b37a918bcf2c8e1064e867cf1df4637,f27e33c6d29b5138fa9967bcd445b6d5,2018-03-01 02:10:52,39.9,26.89 +634e8f4c0f6744a626f77f39770ac6aa,1,69d980b4120a76616d7b237d731d6156,744dac408745240a2c2528fb1b6028f3,2017-08-15 18:45:18,219.0,15.28 +641fb0752bf5b5940c376b3a8bb9dc52,1,60184212dae4e6b0da32bf54271a8c4a,b33e7c55446eabf8fe1a42d037ac7d6d,2017-12-21 00:14:55,369.0,17.33 +6514b8ad8028c9f2cc2374ded245783f,1,4520766ec412348b8d4caa5e8a18c464,16090f2ca825584b5a147ab24aa30c86,2017-05-22 13:22:11,59.99,15.17 +66e4624ae69e7dc89bd50222b59f581f,1,b37b72d5a56f887725c2862184b8cab8,db4350fd57ae30082dec7acbaacc17f9,2018-03-15 15:30:45,22.99,22.85 +66e4624ae69e7dc89bd50222b59f581f,2,b37b72d5a56f887725c2862184b8cab8,db4350fd57ae30082dec7acbaacc17f9,2018-03-15 15:30:45,22.99,22.85 +686541986ecfb7d9296eb67719973bf0,1,3014e35fd70fce29095ced5cdc89f4ce,5656537e588803a555b8eb41f07a944b,2018-02-15 13:35:31,24.89,15.1 +688052146432ef8253587b930b01a06d,1,d1c427060a0f73f6b889a5c7c61f2ac4,a1043bafd471dff536d0c462352beb48,2018-04-26 09:31:11,119.0,24.97 +688052146432ef8253587b930b01a06d,2,db56f6d2b04c89eae4daba188842fd7b,2a84855fd20af891be03bc5924d2b453,2018-04-26 09:31:11,199.0,3.12 +68873cf91053cd11e6b49a766db5af1a,1,15a9e834e89eab39d973492882c658d6,a673821011d0cec28146ea42f5ab767f,2017-12-07 02:51:18,79.9,11.76 +68e48e68da1f50f7c5838ea75e3a20dd,1,a659cb33082b851fb87a33af8f0fff29,817245bcc3badd82bbd222e0366951a6,2018-06-22 17:00:57,84.9,13.25 +68e48e68da1f50f7c5838ea75e3a20dd,2,a659cb33082b851fb87a33af8f0fff29,817245bcc3badd82bbd222e0366951a6,2018-06-22 17:00:57,84.9,13.25 +68e48e68da1f50f7c5838ea75e3a20dd,3,a659cb33082b851fb87a33af8f0fff29,817245bcc3badd82bbd222e0366951a6,2018-06-22 17:00:57,84.9,13.25 +68e48e68da1f50f7c5838ea75e3a20dd,4,a659cb33082b851fb87a33af8f0fff29,817245bcc3badd82bbd222e0366951a6,2018-06-22 17:00:57,84.9,13.25 +6a0a8bfbbe700284feb0845d95e0867f,1,f8a8f05a35976a91aed5cccc3992c357,4a3ca9315b744ce9f8e9374361493884,2017-11-28 11:46:50,83.9,17.84 +6abaad69b8b349c3a529b4b91ce18e46,1,3dd6c9d499e7c311a29e08afe1fd8fc6,537eb890efff034a88679788b647c564,2018-02-21 09:47:59,42.9,14.1 +6b860b35691d486e45dc98e3514ec5f6,1,c827fb43ad0fb8708f34c2911fdc164b,76d5af76d0271110f9af36c92573f765,2017-12-14 02:49:54,544.0,30.36 +6d25592267349b322799e2beb687871e,1,c3ba4e8d3cb30049213b682e751e9d00,6560211a19b47992c3666cc44a7e94c0,2018-08-30 04:10:18,93.0,7.91 +6d25592267349b322799e2beb687871e,2,c3ba4e8d3cb30049213b682e751e9d00,6560211a19b47992c3666cc44a7e94c0,2018-08-30 04:10:18,93.0,7.91 +6ea2f835b4556291ffdc53fa0b3b95e8,1,be021417a6acb56b9b50d3fd2714baa8,f5f46307a4d15880ca14fab4ad9dfc9b,2017-11-30 00:21:09,339.0,17.12 +6ebaec694d7025e2ad4a05dba887c032,1,e251ebd2858be1aa7d9b2087a6992580,001cca7ae9ae17fb1caed9dfb1094831,2017-05-24 14:05:17,139.0,14.72 +7206b86ea789983f7a273ea7fa0bc2a8,1,9a469eaf45dfbc43d39ba1977a3c07af,d2374cbcbb3ca4ab1086534108cc3ab7,2018-03-30 17:27:57,36.9,12.79 +734e7d1bbaeb2ff82521ca0fe6fb6f79,1,278b3c6462e86b4556b99989513ddf73,d1ef48b38baca7e831711c4a0aeb398f,2018-06-13 08:31:12,29.99,13.47 +76c6e866289321a7c93b82b54852dc33,1,ac1789e492dcd698c5c10b97a671243a,63b9ae557efed31d1f7687917d248a8d,2017-01-27 18:29:09,19.9,16.05 +77e9941864fc840be8e4b1ba5347c0f7,1,a01d1cbb398e386a4a8f8364401a7584,d566c37fa119d5e66c4e9052e83ee4ea,2018-08-07 09:10:14,65.9,37.37 +82566a660a982b15fb86e904c8d32918,1,72a97c271b2e429974398f46b93ae530,094ced053e257ae8cae57205592d6712,2018-06-18 03:13:12,31.9,18.23 +82bce245b1c9148f8d19a55b9ff70644,1,a5a0e71a81ae65aa335e71c06261e260,c8417879a15366a17c30af34c798c332,2017-04-27 05:15:56,38.0,15.56 +82bce245b1c9148f8d19a55b9ff70644,2,a5a0e71a81ae65aa335e71c06261e260,c8417879a15366a17c30af34c798c332,2017-04-27 05:15:56,38.0,15.56 +82bce245b1c9148f8d19a55b9ff70644,3,a5a0e71a81ae65aa335e71c06261e260,c8417879a15366a17c30af34c798c332,2017-04-27 05:15:56,38.0,15.56 +82bce245b1c9148f8d19a55b9ff70644,4,a5a0e71a81ae65aa335e71c06261e260,c8417879a15366a17c30af34c798c332,2017-04-27 05:15:56,38.0,15.56 +82bce245b1c9148f8d19a55b9ff70644,5,a5a0e71a81ae65aa335e71c06261e260,c8417879a15366a17c30af34c798c332,2017-04-27 05:15:56,38.0,15.56 +83018ec114eee8641c97e08f7b4e926f,1,c35498fbb4358837ae16850f50c3fd22,70a12e78e608ac31179aea7f8422044b,2017-11-01 16:07:35,76.0,16.97 +8447ff843b2616c50c0ced28ab1dae03,1,7a10781637204d8d10485c71a6108a2e,4869f7a5dfa277a7dca6462dcf3b52b2,2017-12-29 02:37:45,219.9,18.79 +8563039e855156e48fccee4d611a3196,1,bff2010b28e8fbcff5a9db9d3fea5ac4,955fee9216a65b617aa5c0531780ce60,2018-02-22 15:15:34,78.0,28.95 +85ce859fd6dc634de8d2f1e290444043,1,cce679660c66e6fbd5c8091dfd29e9cd,d2374cbcbb3ca4ab1086534108cc3ab7,2017-11-29 00:14:22,17.9,11.85 +86f21bf63784876b9fd6d35f46581d72,1,5526b1ae9ab2688cf600783cece249df,0b90b6df587eb83608a64ea8b390cf07,2018-04-23 22:49:32,98.44,22.4 +8f06cc6465925031568537b815f1198d,1,12087840651e83b48206b82c213b76fd,5b925e1d006e9476d738aa200751b73b,2017-11-21 11:46:42,299.0,18.34 +91b2a010e1e45e6ba3d133fa997597be,1,ba74c6b75d2ad7503175809688d5a03c,7d13fca15225358621be4086e1eb0964,2018-05-09 12:55:01,178.99,13.69 +948097deef559c742e7ce321e5e58919,1,cd935d283d47f1050c505e1c39c48b67,a3a38f4affed601eb87a97788c949667,2017-08-10 17:25:11,69.9,25.77 +949d5b44dbf5de918fe9c16f97b45f8a,1,d0b61bfb1de832b15ba9d266ca96e5b0,66922902710d126a0e7d26b0e3805106,2017-11-23 19:45:59,45.0,27.2 +95266dbfb7e20354baba07964dac78d5,1,bb7181410b4e02f93f3697f765db53c7,855668e0971d4dfd7bef1b6a4133b41b,2018-01-26 08:07:31,129.99,57.58 +974c1993ab8024d3ed16229183c2308d,1,5e2ba75ad255ff60b1c76c5bf526ae9b,f84a00e60c73a49e7e851c9bdca3a5bb,2017-02-24 11:45:39,69.9,14.66 +989225ba6d0ebd5873335f7e01de2ae7,1,6b64362e89896be7589621df54be089e,77530e9772f57a62c906e1c21538ab82,2017-12-20 13:54:13,49.0,14.1 +9defaf92cff22420e4e8ef7784815a55,1,cf944645d4ff2a3eed3ae17f641ea861,a6fe7de3d16f6149ffe280349a8535a0,2018-05-23 13:30:30,49.9,12.79 +9faeb9b2746b9d7526aef5acb08e2aa0,1,f48eb5c2fde13ca63664f0bb05f55346,f7ba60f8c3f99e7ee4042fdef03b70c4,2018-07-30 14:55:10,60.0,15.52 +9faeb9b2746b9d7526aef5acb08e2aa0,2,f48eb5c2fde13ca63664f0bb05f55346,f7ba60f8c3f99e7ee4042fdef03b70c4,2018-07-30 14:55:10,60.0,15.52 +a4591c265e18cb1dcee52889e2d8acc3,1,060cb19345d90064d1015407193c233d,8581055ce74af1daba164fdbd55a40de,2017-07-13 22:10:13,147.9,27.36 +a685d016c8a26f71a0bb67821070e398,1,ebd7c847c1e1cb69ec374ae0ebee1f4c,391fc6631aebcf3004804e51b40bcf1e,2017-03-17 18:14:36,84.9,14.36 +a6aeb116d2cb5013eb8a94585b71ffef,1,163e6400e6dadd0fe04775c5e9331fda,855668e0971d4dfd7bef1b6a4133b41b,2017-09-19 14:44:39,50.0,9.34 +a910f58086d58b3ae6f37aa712d377b9,1,75d6b6963340c6063f7f4cfcccfe6a30,cc419e0650a3c5ba77189a1882b7556a,2017-09-22 09:35:18,56.99,15.84 +a910f58086d58b3ae6f37aa712d377b9,2,75d6b6963340c6063f7f4cfcccfe6a30,cc419e0650a3c5ba77189a1882b7556a,2017-09-22 09:35:18,56.99,15.84 +acce194856392f074dbf9dada14d8d82,1,d70f38e7f79c630f8ea00c993897042c,977f9f63dd360c2a32ece2f93ad6d306,2018-06-13 00:35:10,90.9,48.64 +acce194856392f074dbf9dada14d8d82,2,9451e630d725c4bb7a5a206b48b99486,d673a59aac7a70d8b01e6902bf090a11,2018-06-13 00:35:10,39.5,48.64 +ad21c59c0840e6cb83a9ceb5573f8159,1,65266b2da20d04dbe00c5c2d3bb7859e,2c9e548be18521d1c43cde1c582c6de8,2018-02-19 20:31:37,19.9,8.72 +b276e4f8c0fb86bd82fce576f21713e0,1,c6c1f263e076bd9c1f1640250a5d0c29,fe2032dab1a61af8794248c8196565c9,2018-08-02 23:45:15,179.0,9.41 +b52cc4919de82b4d696a4380d10804a3,1,7564c1759c04fc0a38f2aa84f7a370ee,6860153b69cc696d5dcfe1cdaaafcf62,2018-06-19 02:30:26,42.99,12.03 +b8801cccd8068de30112e4f49903d74a,1,154e7e31ebfa092203795c972e5804a6,cc419e0650a3c5ba77189a1882b7556a,2017-08-08 03:25:08,19.99,7.78 +bd4bd0194d6d29f83b8557d4b89b572a,1,7f457254a89d62960399e075711b3deb,ea8482cd71df3c1969d7b9473ff13abc,2018-08-02 03:50:24,24.99,12.84 +cadbb3657dac2dbbd5b84b12e7b78aad,1,9d2ff462feaaf88912539b8647e17ab4,00fc707aaaad2d31347cf883cd2dfe10,2018-03-13 02:48:54,394.9,14.89 +ccbabeb0b02433bd0fcbac46e70339f2,1,89321f94e35fc6d7903d36f74e351d40,16090f2ca825584b5a147ab24aa30c86,2018-02-27 03:31:34,27.9,15.1 +d17dc4a904426827ca80f2ccb3a6be56,1,ba4bfbf74dbe7ab37e263b9326da0523,f8db351d8c4c4c22c6835c19a46f01b0,2017-05-18 20:42:45,36.9,17.92 +d22e9fa5731b9e30e8b27afcdc2f8563,1,f410090aec61f7c73748ca894286edcd,980640c45d7a4635885491d077167e4d,2018-08-07 23:35:13,99.0,22.62 +d3d6788577c9592da441752e8a1dd5e3,1,7c1bd920dbdf22470b68bde975dd3ccf,cc419e0650a3c5ba77189a1882b7556a,2017-09-27 07:55:14,58.99,17.66 +d887b52c6516beb39e8cd44a5f8b60f7,1,39a9942865c056ed2006a5e8c11d9537,ba5daa4041e1f15cdf34b76e3e18a450,2018-02-08 12:50:30,84.9,15.35 +dcb36b511fcac050b97cd5c05de84dc3,1,009c09f439988bc06a93d6b8186dce73,89a51f50b8095ea78d5768f34c13a76f,2018-06-18 18:59:02,132.4,14.05 +dd78f560c270f1909639c11b925620ea,1,00baba5b58e274d0332a0c8a0a66f877,d3f39f05462b79a4562d35893a28f159,2018-03-16 02:30:56,47.9,12.79 +e425680f760cbc130be3e53a9773c584,1,9ecadb84c81da840dbf3564378b586e9,1025f0e2d44d7041d6cf58b6550e0bfa,2017-09-08 08:30:17,38.4,11.85 +e481f51cbdc54678b7cc49136f2d6af7,1,87285b34884572647811a353c7ac498a,3504c0cb71d7fa48d967e0e4c94d59d9,2017-10-06 11:07:15,29.99,8.72 +e4de6d53ecff736bc68804b0b6e9f635,1,90b58782fdd04cb829667fcc41fb65f5,7c67e1448b00f6e969d365cea6b010ab,2017-10-27 03:49:34,179.99,51.13 +e69bfb5eb88e0ed6a785585b27e16dbf,1,9a78fb9862b10749a117f7fc3c31f051,7c67e1448b00f6e969d365cea6b010ab,2017-08-11 12:05:32,149.99,19.77 +e6ce16cb79ec1d90b1da9085a6118aeb,1,08574b074924071f4e201e151b152b4e,001cca7ae9ae17fb1caed9dfb1094831,2017-05-22 19:50:18,99.0,30.53 +e6ce16cb79ec1d90b1da9085a6118aeb,2,08574b074924071f4e201e151b152b4e,001cca7ae9ae17fb1caed9dfb1094831,2017-05-22 19:50:18,99.0,30.53 +ec341c54a5ebf8ee0a67a8632aa7579b,1,22f5b63060a1185e5ec7721efd622321,4c8b8048e33af2bf94f2eb547746a916,2017-08-31 17:04:12,14.82,15.1 +ecab90c9933c58908d3d6add7c6f5ae3,1,c0db539123a403f670c50237d970b215,f7720c4fa8e3aba4546301ab80ea1f1b,2018-03-01 14:28:03,30.1,33.24 +ee64d42b8cf066f35eac1cf57de1aa85,1,c50ca07e9e4db9ea5011f06802c0aea0,e9779976487b77c6d4ac45f75ec7afe9,2018-06-13 04:30:33,14.49,7.87 +f169bd689fb8b32ccd62df9050aebc0b,1,20a8603c265d777e25da064113d556f5,e70053bf73d1b5863932e53a9fa47496,2018-04-29 23:31:10,759.0,13.08 +f271576bed568e896f99eb710cd3a6f8,1,d457916b4fdc60154ed93b5dd3e6fd69,76d64c4aca3a7baf218bf93ef7fa768d,2018-01-11 21:51:28,329.9,82.48 +f271576bed568e896f99eb710cd3a6f8,2,d457916b4fdc60154ed93b5dd3e6fd69,76d64c4aca3a7baf218bf93ef7fa768d,2018-01-11 21:51:28,329.9,82.48 +f346ad4ee8f630e5e4ddaf862a34e6dd,1,4ce99ff9dcb7821acd8e599d5d4a6531,70125af26c2d6d4ef401a1d02ae7701f,2018-08-07 13:24:34,39.9,13.76 +f3e7c359154d965827355f39d6b1fdac,1,e99d69efe684efaa643f99805f7c81bc,55c26bcb609f480eb7868594245febb5,2018-08-14 03:24:51,89.9,14.21 +f70a0aff17df5a6cdd9a7196128bd354,1,cafd558df4c3c9d1c338ba6930ea9a62,5dceca129747e92ff8ef7a997dc4f8ca,2017-08-17 02:45:24,279.0,34.19 +f7959f8385f34c4f645327465a1c9fc4,1,c1234c80dafde7ef3311b3eabd5069ed,dc4a0fc896dc34b0d5bfec8438291c80,2017-04-11 08:05:08,17.9,10.96 +f848643eec1d69395095eb3840d2051e,1,2b4609f8948be18874494203496bc318,cc419e0650a3c5ba77189a1882b7556a,2018-03-23 09:09:31,79.99,8.91 +fa516182d28f96f5f5c651026b0749ee,1,e932008cf0ea7c93a077dd8d7e5f49eb,fcdd820084f17e9982427971e4e9d47f,2018-04-19 13:30:02,190.0,19.41 +fbf9ac61453ac646ce8ad9783d7d0af6,1,7b717060aa783eb7f23a747a3a733dd7,c0563dd588b775f2e37747ef6ad6c92c,2018-02-28 02:30:44,109.9,15.53 +fdf128b3630c21adc9ca4fb8a51b68ec,1,89321f94e35fc6d7903d36f74e351d40,16090f2ca825584b5a147ab24aa30c86,2018-07-18 14:31:10,27.9,18.3 diff --git a/data/orders/orders.csv b/data/orders/orders.csv new file mode 100644 index 0000000..608714f --- /dev/null +++ b/data/orders/orders.csv @@ -0,0 +1,101 @@ +order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date +e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00 +53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00 +47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00 +949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00 +ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00 +a4591c265e18cb1dcee52889e2d8acc3,503740e9ca751ccdda7ba28e9ab8f608,delivered,2017-07-09 21:57:05,2017-07-09 22:10:13,2017-07-11 14:58:04,2017-07-26 10:57:55,2017-08-01 00:00:00 +136cce7faa42fdb2cefd53fdc79a6098,ed0271e0b7da060a393796590e7b737a,invoiced,2017-04-11 12:22:08,2017-04-13 13:25:17,,,2017-05-09 00:00:00 +6514b8ad8028c9f2cc2374ded245783f,9bdf08b4b3b52b5526ff42d37d47f222,delivered,2017-05-16 13:10:30,2017-05-16 13:22:11,2017-05-22 10:07:46,2017-05-26 12:55:51,2017-06-07 00:00:00 +76c6e866289321a7c93b82b54852dc33,f54a9f0e6b351c431402b8461ea51999,delivered,2017-01-23 18:29:09,2017-01-25 02:50:47,2017-01-26 14:16:31,2017-02-02 14:08:10,2017-03-06 00:00:00 +e69bfb5eb88e0ed6a785585b27e16dbf,31ad1d1b63eb9962463f764d4e6e0c9d,delivered,2017-07-29 11:55:02,2017-07-29 12:05:32,2017-08-10 19:45:24,2017-08-16 17:14:30,2017-08-23 00:00:00 +e6ce16cb79ec1d90b1da9085a6118aeb,494dded5b201313c64ed7f100595b95c,delivered,2017-05-16 19:41:10,2017-05-16 19:50:18,2017-05-18 11:40:40,2017-05-29 11:18:31,2017-06-07 00:00:00 +34513ce0c4fab462a55830c0989c7edb,7711cf624183d843aafe81855097bc37,delivered,2017-07-13 19:58:11,2017-07-13 20:10:08,2017-07-14 18:43:29,2017-07-19 14:04:48,2017-08-08 00:00:00 +82566a660a982b15fb86e904c8d32918,d3e3b74c766bc6214e0c830b17ee2341,delivered,2018-06-07 10:06:19,2018-06-09 03:13:12,2018-06-11 13:29:00,2018-06-19 12:05:52,2018-07-18 00:00:00 +5ff96c15d0b717ac6ad1f3d77225a350,19402a48fe860416adf93348aba37740,delivered,2018-07-25 17:44:10,2018-07-25 17:55:14,2018-07-26 13:16:00,2018-07-30 15:52:25,2018-08-08 00:00:00 +432aaf21d85167c2c86ec9448c4e42cc,3df704f53d3f1d4818840b34ec672a9f,delivered,2018-03-01 14:14:28,2018-03-01 15:10:47,2018-03-02 21:09:20,2018-03-12 23:36:26,2018-03-21 00:00:00 +dcb36b511fcac050b97cd5c05de84dc3,3b6828a50ffe546942b7a473d70ac0fc,delivered,2018-06-07 19:03:12,2018-06-12 23:31:02,2018-06-11 14:54:00,2018-06-21 15:34:32,2018-07-04 00:00:00 +403b97836b0c04a622354cf531062e5f,738b086814c6fcc74b8cc583f8516ee3,delivered,2018-01-02 19:00:43,2018-01-02 19:09:04,2018-01-03 18:19:09,2018-01-20 01:38:59,2018-02-06 00:00:00 +116f0b09343b49556bbad5f35bee0cdf,3187789bec990987628d7a9beb4dd6ac,delivered,2017-12-26 23:41:31,2017-12-26 23:50:22,2017-12-28 18:33:05,2018-01-08 22:36:36,2018-01-29 00:00:00 +85ce859fd6dc634de8d2f1e290444043,059f7fc5719c7da6cbafe370971a8d70,delivered,2017-11-21 00:03:41,2017-11-21 00:14:22,2017-11-23 21:32:26,2017-11-27 18:28:00,2017-12-11 00:00:00 +83018ec114eee8641c97e08f7b4e926f,7f8c8b9c2ae27bf3300f670c3d478be8,delivered,2017-10-26 15:54:26,2017-10-26 16:08:14,2017-10-26 21:46:53,2017-11-08 22:22:00,2017-11-23 00:00:00 +203096f03d82e0dffbc41ebc2e2bcfb7,d2b091571da224a1b36412c18bc3bbfe,delivered,2017-09-18 14:31:30,2017-09-19 04:04:09,2017-10-06 17:50:03,2017-10-09 22:23:46,2017-09-28 00:00:00 +f848643eec1d69395095eb3840d2051e,4fa1cd166fa598be6de80fa84eaade43,delivered,2018-03-15 08:52:40,2018-03-15 09:09:31,2018-03-15 19:52:48,2018-03-19 18:08:32,2018-03-29 00:00:00 +2807d0e504d6d4894d41672727bc139f,72ae281627a6102d9b3718528b420f8a,delivered,2018-02-03 20:37:35,2018-02-03 20:50:22,2018-02-05 22:37:28,2018-02-08 16:13:46,2018-02-21 00:00:00 +95266dbfb7e20354baba07964dac78d5,a166da34890074091a942054b36e4265,delivered,2018-01-08 07:55:29,2018-01-08 08:07:31,2018-01-24 23:16:37,2018-01-26 17:32:38,2018-02-21 00:00:00 +f3e7c359154d965827355f39d6b1fdac,62b423aab58096ca514ba6aa06be2f98,delivered,2018-08-09 11:44:40,2018-08-10 03:24:51,2018-08-10 12:29:00,2018-08-13 18:24:27,2018-08-17 00:00:00 +fbf9ac61453ac646ce8ad9783d7d0af6,3a874b4d4c4b6543206ff5d89287f0c3,delivered,2018-02-20 23:46:53,2018-02-22 02:30:46,2018-02-26 22:25:22,2018-03-21 22:03:54,2018-03-12 00:00:00 +acce194856392f074dbf9dada14d8d82,7e20bf5ca92da68200643bda76c504c6,delivered,2018-06-04 00:00:13,2018-06-05 00:35:10,2018-06-05 13:24:00,2018-06-16 15:20:55,2018-07-18 00:00:00 +dd78f560c270f1909639c11b925620ea,8b212b9525f9e74e85e37ed6df37693e,delivered,2018-03-12 01:50:26,2018-03-12 03:28:34,2018-03-12 21:06:37,2018-03-21 14:41:50,2018-03-28 00:00:00 +91b2a010e1e45e6ba3d133fa997597be,cce89a605105b148387c52e286ac8335,delivered,2018-05-02 11:45:38,2018-05-03 12:55:01,2018-05-10 16:16:00,2018-05-16 20:56:24,2018-05-23 00:00:00 +ecab90c9933c58908d3d6add7c6f5ae3,761df82feda9778854c6dafdaeb567e4,delivered,2018-02-25 13:50:30,2018-02-25 14:47:35,2018-02-26 22:28:50,2018-03-27 23:29:14,2018-04-13 00:00:00 +f70a0aff17df5a6cdd9a7196128bd354,456dc10730fbdba34615447ea195d643,delivered,2017-08-10 11:58:33,2017-08-12 02:45:24,2017-08-17 15:35:07,2017-08-18 14:28:02,2017-08-23 00:00:00 +1790eea0b567cf50911c057cf20f90f9,52142aa69d8d0e1247ab0cada0f76023,delivered,2018-04-16 21:15:39,2018-04-16 22:10:26,2018-04-18 13:05:09,2018-05-05 12:28:34,2018-05-15 00:00:00 +989225ba6d0ebd5873335f7e01de2ae7,816f8653d5361cbf94e58c33f2502a5c,delivered,2017-12-12 13:56:04,2017-12-14 13:54:13,2017-12-16 00:18:57,2018-01-03 18:03:36,2018-01-08 00:00:00 +d887b52c6516beb39e8cd44a5f8b60f7,d9ef95f98d8da3b492bb8c0447910498,delivered,2018-02-03 12:38:58,2018-02-03 12:50:30,2018-02-05 21:26:53,2018-02-22 00:07:55,2018-03-07 00:00:00 +b276e4f8c0fb86bd82fce576f21713e0,cf8ffeddf027932e51e4eae73b384059,delivered,2018-07-29 23:34:51,2018-07-29 23:45:15,2018-07-30 14:43:00,2018-07-31 22:48:50,2018-08-06 00:00:00 +8563039e855156e48fccee4d611a3196,5f16605299d698660e0606f7eae2d2f9,delivered,2018-02-17 15:59:46,2018-02-17 16:15:34,2018-02-20 23:03:56,2018-03-20 00:59:25,2018-03-20 00:00:00 +60550084e6b4c0cb89a87df1f3e5ebd9,f5458ddc3545711efa883dd7ae7c4497,delivered,2018-02-21 18:15:12,2018-02-23 02:10:52,2018-02-27 18:52:09,2018-03-13 23:58:43,2018-03-29 00:00:00 +5acce57f8d9dfd55fa48e212a641a69d,295ae9b35379e077273387ff64354b6f,delivered,2017-07-31 21:37:10,2017-08-02 02:56:02,2017-08-03 18:32:48,2017-08-08 21:24:41,2017-08-22 00:00:00 +434d158e96bdd6972ad6e6d73ddcfd22,2a1dfb647f32f4390e7b857c67458536,delivered,2018-06-01 12:23:13,2018-06-05 03:35:15,2018-06-08 11:49:00,2018-06-18 21:32:52,2018-07-17 00:00:00 +7206b86ea789983f7a273ea7fa0bc2a8,3391c4bc11a817e7973e498b0b023158,delivered,2018-03-26 17:12:18,2018-03-26 17:28:27,2018-03-28 17:22:53,2018-04-05 22:11:18,2018-04-12 00:00:00 +1e7aff52cdbb2451ace09d0f848c3699,ddaff536587109b89777e0353215e150,delivered,2017-05-19 18:53:40,2017-05-19 19:05:17,2017-05-22 10:16:07,2017-05-31 13:58:46,2017-06-12 00:00:00 +6ea2f835b4556291ffdc53fa0b3b95e8,c7340080e394356141681bd4c9b8fe31,delivered,2017-11-24 21:27:48,2017-11-25 00:21:09,2017-12-13 21:14:05,2017-12-28 18:59:23,2017-12-21 00:00:00 +948097deef559c742e7ce321e5e58919,8644be24d48806bc3a88fd59fb47ceb1,delivered,2017-08-04 17:10:39,2017-08-04 17:25:11,2017-08-07 17:52:01,2017-08-12 14:08:40,2017-09-01 00:00:00 +d22e9fa5731b9e30e8b27afcdc2f8563,756fb9391752dad934e0fe3733378e57,delivered,2018-08-04 23:25:30,2018-08-04 23:35:13,2018-08-06 15:03:00,2018-08-13 23:34:42,2018-09-13 00:00:00 +ee64d42b8cf066f35eac1cf57de1aa85,caded193e8e47b8362864762a83db3c5,shipped,2018-06-04 16:44:48,2018-06-05 04:31:18,2018-06-05 14:32:00,,2018-06-28 00:00:00 +6ebaec694d7025e2ad4a05dba887c032,4f28355e5c17a4a42d3ce2439a1d4501,delivered,2017-05-18 13:55:47,2017-05-18 14:05:17,2017-05-19 12:01:38,2017-05-29 12:47:20,2017-06-09 00:00:00 +d17dc4a904426827ca80f2ccb3a6be56,569cf68214806a39acc0f39344aea67f,delivered,2017-05-14 20:28:25,2017-05-14 20:42:45,2017-05-16 08:17:46,2017-05-25 09:14:31,2017-06-12 00:00:00 +25f4376934e13d3508486352e11a5db0,12fd2740039676063a874b9567dfa651,delivered,2018-05-17 16:59:11,2018-05-18 01:17:39,2018-05-18 13:02:00,2018-05-21 15:22:11,2018-05-25 00:00:00 +5820a1100976432c7968a52da59e9364,2b56e94c2f66f2d97cfa63356f69cee8,delivered,2018-07-29 11:24:17,2018-07-29 11:44:19,2018-07-30 13:47:00,2018-08-02 22:09:11,2018-08-13 00:00:00 +2ce1ad82022c1ba30c2079502ac725aa,7f2178c5d771e17f507d3c1637339298,delivered,2017-08-09 20:19:05,2017-08-11 04:15:29,2017-08-11 17:52:32,2017-08-16 17:16:44,2017-08-31 00:00:00 +138849fd84dff2fb4ca70a0a34c4aa1c,9b18f3fc296990b97854e351334a32f6,delivered,2018-02-01 14:02:19,2018-02-03 02:53:07,2018-02-06 19:13:26,2018-02-14 13:41:59,2018-02-23 00:00:00 +47aa4816b27ba60ec948cd019cc1afc1,148348ff65384b4249b762579532e248,delivered,2018-06-26 13:42:52,2018-06-27 08:35:32,2018-06-27 13:20:00,2018-07-03 18:37:46,2018-07-20 00:00:00 +9faeb9b2746b9d7526aef5acb08e2aa0,79183cd650e2bb0d475b0067d45946ac,delivered,2018-07-26 14:39:59,2018-07-26 14:55:10,2018-07-27 12:04:00,2018-07-31 22:26:55,2018-08-16 00:00:00 +641fb0752bf5b5940c376b3a8bb9dc52,f5afca14dfa9dc64251cf2b45c54c363,delivered,2017-12-15 00:06:10,2017-12-15 00:14:55,2017-12-19 01:58:00,2018-01-03 15:09:32,2018-01-16 00:00:00 +e425680f760cbc130be3e53a9773c584,f178c1827f67a8467b0385b7378d951a,delivered,2017-08-31 08:15:24,2017-08-31 08:30:17,2017-08-31 20:06:14,2017-09-04 20:59:55,2017-09-20 00:00:00 +40c5e18f7d112b59b3e5113a59a905b3,67407057a7d5ee17d1cd09523f484d13,delivered,2018-06-11 10:25:52,2018-06-11 10:58:32,2018-06-14 13:03:00,2018-06-19 00:31:13,2018-07-16 00:00:00 +734e7d1bbaeb2ff82521ca0fe6fb6f79,2932d241d1f31e6df6c701d52370ae02,delivered,2018-06-11 08:18:19,2018-06-11 08:31:50,2018-06-11 14:54:00,2018-06-14 21:32:21,2018-07-05 00:00:00 +66e4624ae69e7dc89bd50222b59f581f,684fa6da5134b9e4dab731e00011712d,delivered,2018-03-09 14:50:15,2018-03-09 15:40:39,2018-03-15 00:31:19,2018-04-03 13:28:46,2018-04-02 00:00:00 +a685d016c8a26f71a0bb67821070e398,911e4c37f5cafe1604fe6767034bf1ae,delivered,2017-03-13 18:14:36,2017-03-13 18:14:36,2017-03-22 14:03:09,2017-04-06 13:37:16,2017-03-30 00:00:00 +2edfd6d1f0b4cd0db4bf37b1b224d855,241e78de29b3090cfa1b5d73a8130c72,delivered,2017-06-13 21:11:26,2017-06-15 03:05:45,2017-06-16 14:55:37,2017-06-19 18:51:28,2017-07-06 00:00:00 +68873cf91053cd11e6b49a766db5af1a,4632eb5a8f175f6fe020520ae0c678f3,delivered,2017-11-30 22:02:15,2017-12-02 02:51:18,2017-12-04 22:07:01,2017-12-05 20:28:40,2017-12-18 00:00:00 +f346ad4ee8f630e5e4ddaf862a34e6dd,dd5095632e3953fc0947b8ab5176b0be,delivered,2018-08-05 13:09:48,2018-08-05 13:24:34,2018-08-06 13:41:00,2018-08-10 18:35:40,2018-08-15 00:00:00 +8f06cc6465925031568537b815f1198d,9916715c2ab6ee1710c9c32f0a534ad2,delivered,2017-11-15 11:31:41,2017-11-15 11:46:42,2017-11-16 22:03:00,2017-11-22 22:41:07,2017-12-05 00:00:00 +ccbabeb0b02433bd0fcbac46e70339f2,c77ee2d8ba1614a4d489a44166894938,delivered,2018-02-19 20:31:09,2018-02-21 06:15:25,2018-02-22 21:04:23,2018-03-09 22:22:25,2018-03-13 00:00:00 +688052146432ef8253587b930b01a06d,81e08b08e5ed4472008030d70327c71f,delivered,2018-04-22 08:48:13,2018-04-24 18:25:22,2018-04-23 19:19:14,2018-04-24 19:31:58,2018-05-15 00:00:00 +f271576bed568e896f99eb710cd3a6f8,5dda11942d4f77bee3a46d71e442aec4,delivered,2018-01-07 21:44:54,2018-01-07 21:51:28,2018-01-10 21:56:09,2018-01-17 20:26:31,2018-02-14 00:00:00 +686541986ecfb7d9296eb67719973bf0,74805bc388861fa350ed2fade8444e0b,delivered,2018-02-10 13:26:59,2018-02-10 13:35:31,2018-02-14 20:47:38,2018-02-20 22:13:08,2018-03-12 00:00:00 +68e48e68da1f50f7c5838ea75e3a20dd,4afc1dcca5fe8926fc97d60a4497f8ab,delivered,2018-06-18 16:02:23,2018-06-18 17:00:57,2018-06-19 15:55:00,2018-06-22 21:18:51,2018-07-13 00:00:00 +b52cc4919de82b4d696a4380d10804a3,be8c14c16a4d47194ccdfe10f1fc5b1a,delivered,2018-06-13 13:47:39,2018-06-15 02:37:29,2018-06-15 14:22:00,2018-06-18 22:32:44,2018-06-26 00:00:00 +fdf128b3630c21adc9ca4fb8a51b68ec,a9d37ddc8ba4d9f6dbac7d8ec378cc95,delivered,2018-07-15 08:33:19,2018-07-16 14:31:10,2018-07-17 15:33:00,2018-07-24 16:41:18,2018-08-02 00:00:00 +a6aeb116d2cb5013eb8a94585b71ffef,bb2f5e670f7155dc622c57e4b31d0a69,delivered,2017-09-13 14:27:11,2017-09-13 14:44:39,2017-09-15 18:42:29,2017-09-16 15:40:08,2017-09-25 00:00:00 +fa516182d28f96f5f5c651026b0749ee,55e6b290205c84ddd23ddf5eb134efd4,delivered,2018-04-13 08:44:17,2018-04-13 13:30:02,2018-04-13 22:19:21,2018-04-19 20:41:45,2018-05-08 00:00:00 +6abaad69b8b349c3a529b4b91ce18e46,f5618502bee8eafdee72fb6955e2ebdf,delivered,2018-02-15 10:33:30,2018-02-15 10:47:59,2018-02-20 14:15:09,2018-02-24 19:15:56,2018-03-07 00:00:00 +974c1993ab8024d3ed16229183c2308d,a90391a47de936d56c66a5366cba1462,delivered,2017-02-20 11:45:39,2017-02-22 03:10:20,2017-02-23 06:47:35,2017-03-09 14:27:58,2017-03-21 00:00:00 +82bce245b1c9148f8d19a55b9ff70644,388025bec8128ff20ec1a316ed4dcf02,delivered,2017-04-20 17:15:46,2017-04-21 05:15:56,2017-04-24 09:34:13,2017-05-10 09:17:55,2017-05-12 00:00:00 +a910f58086d58b3ae6f37aa712d377b9,afb19a4b667cb708caab312757ec3d3f,delivered,2017-09-15 09:19:48,2017-09-15 09:35:18,2017-09-18 18:20:00,2017-09-25 20:14:48,2017-10-11 00:00:00 +bd4bd0194d6d29f83b8557d4b89b572a,636e15840ab051faa13d3f781b6e4233,delivered,2018-07-28 16:52:55,2018-07-31 03:50:24,2018-08-01 16:01:00,2018-08-06 18:44:46,2018-08-08 00:00:00 +634e8f4c0f6744a626f77f39770ac6aa,05e996469a2bf9559c7122b87e156724,delivered,2017-08-09 18:32:47,2017-08-09 18:45:18,2017-08-10 20:21:53,2017-08-16 18:17:54,2017-08-31 00:00:00 +6d25592267349b322799e2beb687871e,5bb39c890c91b1d26801aa19a9336eac,delivered,2018-08-26 22:04:34,2018-08-28 04:10:18,2018-08-28 12:56:00,2018-08-29 12:40:53,2018-08-30 00:00:00 +b8801cccd8068de30112e4f49903d74a,f26a435864aebedff7f7c84f82ee229f,delivered,2017-07-30 03:06:35,2017-07-30 03:25:08,2017-07-31 16:42:54,2017-08-01 14:27:31,2017-08-16 00:00:00 +2711a938db643b3f0b62ee2c8a2784aa,29cb486c739f9774c8eb542e07b56cd2,delivered,2017-12-22 00:17:37,2017-12-23 02:15:31,2017-12-27 19:54:46,2018-01-09 19:52:32,2018-01-19 00:00:00 +3bc77ce8be27211bac313c2daa402d1a,bf141bf67fbe428d558bcf0e018eab60,delivered,2017-04-06 22:39:29,2017-04-06 22:50:24,2017-04-07 14:54:18,2017-04-11 12:31:36,2017-04-27 00:00:00 +10c320f977c6a18f91b2d14be13128c6,b673f0597cb0c4d12778f731045f361a,delivered,2017-05-09 20:48:59,2017-05-09 21:02:45,2017-05-10 11:22:15,2017-05-18 13:22:35,2017-06-01 00:00:00 +0a4a2fccb27bd83a892fa503987a595b,6772a0a230a2667d16c3620f000e1348,delivered,2017-04-20 20:42:44,2017-04-20 20:55:09,2017-04-25 08:23:08,2017-05-11 13:07:46,2017-05-25 00:00:00 +e4de6d53ecff736bc68804b0b6e9f635,9f6618c17568ac301465fe7ad056c674,delivered,2017-10-16 14:56:50,2017-10-17 03:49:34,2017-10-27 22:14:21,2017-11-08 21:25:24,2017-11-21 00:00:00 +6b860b35691d486e45dc98e3514ec5f6,fee181bf648906d1c57f84f216976286,delivered,2017-12-08 09:42:43,2017-12-09 02:49:54,2017-12-11 15:19:04,2017-12-19 18:43:35,2018-01-03 00:00:00 +ec341c54a5ebf8ee0a67a8632aa7579b,df9b032b2ad0fd6bf37dfb48e5f83845,delivered,2017-08-26 16:53:30,2017-08-27 17:04:12,2017-08-30 13:26:32,2017-09-08 20:39:56,2017-09-21 00:00:00 +cadbb3657dac2dbbd5b84b12e7b78aad,93ada7a24817edda9f4ab998fa823d16,delivered,2018-02-27 12:55:42,2018-03-01 02:48:54,2018-03-03 02:27:03,2018-03-16 14:59:01,2018-03-29 00:00:00 +9defaf92cff22420e4e8ef7784815a55,64fb950e760ec8b0db79154a1fa9c1bf,delivered,2018-05-11 13:10:51,2018-05-11 13:36:50,2018-05-16 14:43:00,2018-05-21 16:09:55,2018-06-05 00:00:00 +20e0101b20700188cadb288126949685,48558a50a7ba1aab61891936d2ca7681,delivered,2018-01-22 19:22:22,2018-01-22 19:36:35,2018-01-24 23:32:21,2018-02-15 20:08:15,2018-02-19 00:00:00 +0e782c3705510e717d28907746cbda82,3a897024068ed42a183de61d5727d866,delivered,2018-05-01 08:12:37,2018-05-01 08:52:58,2018-05-02 19:01:00,2018-05-04 14:02:26,2018-05-16 00:00:00 +d3d6788577c9592da441752e8a1dd5e3,8628fac2267e8c8804525da99c10ed0e,delivered,2017-09-19 22:17:15,2017-09-20 07:55:14,2017-09-22 17:23:09,2017-10-10 18:43:53,2017-10-13 00:00:00 +86f21bf63784876b9fd6d35f46581d72,332df68ccac2f2f7d9e11299188f8bce,delivered,2018-04-11 22:32:31,2018-04-11 22:49:32,2018-04-14 00:02:39,2018-04-27 23:14:42,2018-05-21 00:00:00 +8447ff843b2616c50c0ced28ab1dae03,e28dd4261bed9c7ba89ecaf411b88f7c,delivered,2017-12-20 23:45:07,2017-12-22 02:37:45,2017-12-23 13:10:45,2018-01-09 18:14:02,2018-01-22 00:00:00 +f169bd689fb8b32ccd62df9050aebc0b,82f0b75bb50fcb30711e5277e36b3983,delivered,2018-04-22 23:23:18,2018-04-24 19:24:14,2018-04-27 13:46:00,2018-04-30 17:57:25,2018-05-07 00:00:00 +77e9941864fc840be8e4b1ba5347c0f7,3135962ee745ef39b85576df7ddbaa99,delivered,2018-08-03 08:59:39,2018-08-03 09:31:36,2018-08-03 10:10:00,2018-08-17 00:49:41,2018-08-27 00:00:00 +41bb5cee06dbf170878a9ef93ac7e7f5,1833a0540067becaf59368fe4cd4303a,delivered,2018-05-14 08:35:33,2018-05-14 08:52:24,2018-05-16 14:46:00,2018-05-18 14:48:38,2018-06-08 00:00:00 +6a0a8bfbbe700284feb0845d95e0867f,68451b39b1314302c08c65a29f1140fc,delivered,2017-11-22 11:32:22,2017-11-22 11:46:50,2017-11-27 13:39:35,2017-12-28 19:43:00,2017-12-11 00:00:00 +f7959f8385f34c4f645327465a1c9fc4,0bf19317b1830a69e55b40710576aa7a,delivered,2017-03-30 07:50:33,2017-03-30 08:05:08,2017-03-30 10:55:54,2017-04-10 02:59:52,2017-04-26 00:00:00 +23f553848a03aaab35bb3f9f87725125,c622b892a190735ef81c0087973fa16d,delivered,2018-06-05 09:10:34,2018-06-05 09:32:22,2018-06-06 15:37:00,2018-06-18 12:36:54,2018-07-23 00:00:00 diff --git a/data/orders/products.csv b/data/orders/products.csv new file mode 100644 index 0000000..22db433 --- /dev/null +++ b/data/orders/products.csv @@ -0,0 +1,101 @@ +product_id,product_category_name,product_name_lenght,product_description_lenght,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm +278b3c6462e86b4556b99989513ddf73,eletroportateis,58.0,587.0,3.0,350.0,20.0,20.0,20.0 +3014e35fd70fce29095ced5cdc89f4ce,telefonia,51.0,244.0,1.0,125.0,17.0,10.0,14.0 +15a9e834e89eab39d973492882c658d6,cama_mesa_banho,52.0,530.0,6.0,949.0,30.0,20.0,26.0 +db56f6d2b04c89eae4daba188842fd7b,malas_acessorios,56.0,450.0,3.0,12450.0,40.0,25.0,57.0 +154e7e31ebfa092203795c972e5804a6,beleza_saude,48.0,575.0,1.0,100.0,20.0,15.0,15.0 +20a8603c265d777e25da064113d556f5,telefonia,59.0,474.0,3.0,475.0,17.0,14.0,14.0 +87285b34884572647811a353c7ac498a,utilidades_domesticas,40.0,268.0,4.0,500.0,19.0,8.0,13.0 +7c1bd920dbdf22470b68bde975dd3ccf,beleza_saude,59.0,492.0,2.0,200.0,22.0,10.0,18.0 +b37b72d5a56f887725c2862184b8cab8,telefonia,59.0,566.0,1.0,150.0,19.0,4.0,11.0 +ac1789e492dcd698c5c10b97a671243a,moveis_decoracao,41.0,432.0,2.0,300.0,35.0,35.0,15.0 +f410090aec61f7c73748ca894286edcd,papelaria,60.0,1847.0,3.0,450.0,35.0,50.0,12.0 +e251ebd2858be1aa7d9b2087a6992580,ferramentas_jardim,34.0,511.0,4.0,8875.0,40.0,14.0,43.0 +1501b0033c68a37fa9560033a440e529,eletroportateis,58.0,1160.0,6.0,410.0,24.0,22.0,17.0 +43ee88561093499d9e571d4db5f20b79,moveis_decoracao,39.0,161.0,3.0,200.0,20.0,20.0,20.0 +f8a8f05a35976a91aed5cccc3992c357,moveis_decoracao,63.0,418.0,1.0,1500.0,45.0,15.0,35.0 +7564c1759c04fc0a38f2aa84f7a370ee,construcao_ferramentas_construcao,59.0,2432.0,3.0,1200.0,16.0,11.0,11.0 +ebd7c847c1e1cb69ec374ae0ebee1f4c,moveis_decoracao,50.0,228.0,3.0,1200.0,40.0,15.0,30.0 +2b4609f8948be18874494203496bc318,beleza_saude,59.0,492.0,3.0,250.0,22.0,10.0,18.0 +c7df652246ed7b3300aaf46960c141e4,beleza_saude,28.0,1455.0,1.0,683.0,29.0,15.0,22.0 +2d8f2be4f08788ee3bf5356af2b2ee6c,climatizacao,52.0,331.0,4.0,100.0,27.0,13.0,17.0 +f7d7b5c58704fb359a74580622800051,cama_mesa_banho,53.0,223.0,1.0,950.0,45.0,15.0,35.0 +304fad8dc4d2012dc4062839972f2d96,construcao_ferramentas_construcao,59.0,1775.0,2.0,1700.0,16.0,11.0,11.0 +60184212dae4e6b0da32bf54271a8c4a,relogios_presentes,59.0,476.0,2.0,394.0,17.0,11.0,14.0 +d1c427060a0f73f6b889a5c7c61f2ac4,informatica_acessorios,59.0,1893.0,1.0,6550.0,20.0,20.0,20.0 +cf944645d4ff2a3eed3ae17f641ea861,fashion_underwear_e_moda_praia,52.0,579.0,1.0,450.0,42.0,4.0,14.0 +6893767814d1ac82a81bcd365e1cc918,eletronicos,26.0,511.0,1.0,200.0,25.0,7.0,16.0 +d70f38e7f79c630f8ea00c993897042c,bebes,53.0,233.0,1.0,10950.0,41.0,40.0,40.0 +4520766ec412348b8d4caa5e8a18c464,automotivo,59.0,956.0,1.0,50.0,16.0,16.0,17.0 +5ac9d9e379c606e36a8094a6046f75dc,beleza_saude,46.0,2345.0,6.0,525.0,21.0,16.0,13.0 +9d2ff462feaaf88912539b8647e17ab4,informatica_acessorios,42.0,315.0,1.0,813.0,32.0,16.0,16.0 +72d3bf1d3a790f8874096fcf860e3eff,brinquedos,57.0,341.0,2.0,583.0,20.0,21.0,20.0 +64d0feb1bcf9c7fe7b5dad3271c10910,moveis_decoracao,58.0,696.0,7.0,750.0,25.0,15.0,35.0 +08574b074924071f4e201e151b152b4e,ferramentas_jardim,36.0,450.0,1.0,9000.0,42.0,12.0,39.0 +30469bb5ea377eae7121981e2f0778e4,esporte_lazer,57.0,574.0,4.0,5950.0,20.0,30.0,80.0 +9b37a918bcf2c8e1064e867cf1df4637,eletronicos,57.0,1710.0,6.0,1207.0,20.0,10.0,20.0 +65266b2da20d04dbe00c5c2d3bb7859e,papelaria,38.0,316.0,4.0,250.0,51.0,15.0,15.0 +00baba5b58e274d0332a0c8a0a66f877,perfumaria,27.0,406.0,4.0,200.0,18.0,7.0,12.0 +aa4383b373c6aca5d8797843e5594415,automotivo,46.0,232.0,1.0,420.0,24.0,19.0,21.0 +f497ba62f1d6b4f6a3a3266fa8623ad3,beleza_saude,45.0,1276.0,1.0,83.0,13.0,8.0,12.0 +a5a0e71a81ae65aa335e71c06261e260,utilidades_domesticas,57.0,698.0,3.0,705.0,34.0,22.0,28.0 +c0db539123a403f670c50237d970b215,ferramentas_jardim,56.0,1313.0,2.0,850.0,20.0,20.0,20.0 +aca2eb7d00ea1a7b8ebd4e68314663af,moveis_decoracao,44.0,903.0,6.0,2600.0,50.0,10.0,30.0 +c6c1f263e076bd9c1f1640250a5d0c29,perfumaria,32.0,102.0,1.0,425.0,24.0,12.0,16.0 +6b64362e89896be7589621df54be089e,moveis_decoracao,57.0,2435.0,2.0,3000.0,69.0,11.0,11.0 +f7e0fa615b386bc9a8b9eb52bc1fff76,informatica_acessorios,59.0,2574.0,1.0,325.0,21.0,21.0,21.0 +7a10781637204d8d10485c71a6108a2e,relogios_presentes,42.0,236.0,1.0,342.0,18.0,13.0,15.0 +9451e630d725c4bb7a5a206b48b99486,bebes,52.0,300.0,1.0,350.0,31.0,10.0,12.0 +ba4bfbf74dbe7ab37e263b9326da0523,esporte_lazer,60.0,521.0,1.0,650.0,24.0,10.0,20.0 +90b58782fdd04cb829667fcc41fb65f5,moveis_escritorio,34.0,794.0,1.0,7417.0,102.0,46.0,11.0 +79da264732f717f10ebf5d102aa6c32a,telefonia,59.0,675.0,5.0,150.0,17.0,8.0,14.0 +c827fb43ad0fb8708f34c2911fdc164b,esporte_lazer,53.0,699.0,1.0,10600.0,26.0,30.0,26.0 +ba74c6b75d2ad7503175809688d5a03c,relogios_presentes,59.0,1088.0,2.0,292.0,17.0,8.0,12.0 +c1234c80dafde7ef3311b3eabd5069ed,cama_mesa_banho,55.0,122.0,1.0,300.0,20.0,2.0,15.0 +5e2ba75ad255ff60b1c76c5bf526ae9b,beleza_saude,47.0,1346.0,2.0,500.0,20.0,8.0,20.0 +ad1128daf194f4b6ac4256e16233497c,telefonia,32.0,580.0,2.0,100.0,16.0,3.0,11.0 +163e6400e6dadd0fe04775c5e9331fda,bebes,29.0,462.0,1.0,500.0,47.0,10.0,36.0 +009c09f439988bc06a93d6b8186dce73,perfumaria,39.0,991.0,3.0,150.0,20.0,20.0,20.0 +7b717060aa783eb7f23a747a3a733dd7,cool_stuff,46.0,595.0,2.0,500.0,16.0,12.0,22.0 +9ecadb84c81da840dbf3564378b586e9,moveis_decoracao,41.0,789.0,1.0,950.0,20.0,35.0,20.0 +060cb19345d90064d1015407193c233d,automotivo,49.0,608.0,1.0,7150.0,65.0,10.0,65.0 +7f457254a89d62960399e075711b3deb,automotivo,60.0,558.0,6.0,300.0,17.0,4.0,12.0 +72a97c271b2e429974398f46b93ae530,perfumaria,59.0,685.0,1.0,450.0,16.0,17.0,16.0 +a47295965bd091207681b541b26e40a5,telefonia,60.0,818.0,6.0,300.0,17.0,4.0,12.0 +bb7181410b4e02f93f3697f765db53c7,bebes,36.0,1058.0,1.0,14950.0,77.0,20.0,53.0 +595fac2a385ac33a80bd5114aec74eb8,perfumaria,29.0,178.0,1.0,400.0,19.0,13.0,19.0 +22f5b63060a1185e5ec7721efd622321,cama_mesa_banho,32.0,606.0,2.0,400.0,90.0,6.0,12.0 +bff2010b28e8fbcff5a9db9d3fea5ac4,ferramentas_jardim,58.0,769.0,6.0,850.0,90.0,20.0,20.0 +c35498fbb4358837ae16850f50c3fd22,telefonia,59.0,973.0,1.0,325.0,19.0,8.0,22.0 +4ce99ff9dcb7821acd8e599d5d4a6531,esporte_lazer,51.0,192.0,2.0,450.0,35.0,10.0,11.0 +b3be1f83cef05668c25e134852d44545,cama_mesa_banho,52.0,413.0,1.0,1750.0,42.0,11.0,36.0 +c3ba4e8d3cb30049213b682e751e9d00,relogios_presentes,58.0,737.0,3.0,350.0,16.0,2.0,20.0 +a1804276d9941ac0733cfd409f5206eb,,,,,600.0,35.0,35.0,15.0 +cafd558df4c3c9d1c338ba6930ea9a62,bebes,45.0,1009.0,1.0,16450.0,44.0,70.0,32.0 +d457916b4fdc60154ed93b5dd3e6fd69,construcao_ferramentas_construcao,57.0,424.0,1.0,10000.0,30.0,20.0,30.0 +638bbb2a5e4f360b71f332ddfebfd672,construcao_ferramentas_construcao,38.0,143.0,2.0,20850.0,100.0,25.0,50.0 +75d6b6963340c6063f7f4cfcccfe6a30,perfumaria,51.0,999.0,2.0,400.0,18.0,11.0,20.0 +a659cb33082b851fb87a33af8f0fff29,automotivo,60.0,380.0,1.0,150.0,16.0,6.0,11.0 +e932008cf0ea7c93a077dd8d7e5f49eb,climatizacao,60.0,3270.0,4.0,7350.0,105.0,10.0,40.0 +89321f94e35fc6d7903d36f74e351d40,alimentos,59.0,982.0,1.0,150.0,17.0,13.0,13.0 +0cd9f302c8a5b076ffa5c3567c6705fd,informatica_acessorios,22.0,716.0,2.0,200.0,36.0,2.0,28.0 +c50ca07e9e4db9ea5011f06802c0aea0,beleza_saude,59.0,1782.0,1.0,125.0,25.0,14.0,18.0 +e99d69efe684efaa643f99805f7c81bc,papelaria,56.0,115.0,1.0,600.0,33.0,13.0,25.0 +be021417a6acb56b9b50d3fd2714baa8,utilidades_domesticas,48.0,664.0,6.0,14300.0,38.0,34.0,34.0 +cce679660c66e6fbd5c8091dfd29e9cd,cama_mesa_banho,43.0,125.0,1.0,250.0,40.0,4.0,30.0 +3dd6c9d499e7c311a29e08afe1fd8fc6,cool_stuff,60.0,396.0,4.0,250.0,19.0,12.0,12.0 +9a469eaf45dfbc43d39ba1977a3c07af,cama_mesa_banho,44.0,192.0,1.0,700.0,40.0,4.0,30.0 +cac9e5692471a0700418aa3400b9b2b1,bebes,57.0,2440.0,1.0,375.0,29.0,14.0,20.0 +5526b1ae9ab2688cf600783cece249df,informatica_acessorios,49.0,385.0,1.0,200.0,16.0,16.0,16.0 +9a78fb9862b10749a117f7fc3c31f051,moveis_escritorio,45.0,527.0,1.0,9750.0,42.0,41.0,42.0 +1deda1acffb44ed38494667d7e49a9f3,esporte_lazer,53.0,891.0,2.0,1150.0,27.0,12.0,17.0 +10adb53d8faa890ca7c2f0cbcb68d777,cama_mesa_banho,52.0,155.0,1.0,200.0,16.0,10.0,16.0 +d0b61bfb1de832b15ba9d266ca96e5b0,pet_shop,59.0,468.0,3.0,450.0,30.0,10.0,20.0 +8c591ab0ca519558779df02023177f44,ferramentas_jardim,47.0,1893.0,1.0,6050.0,20.0,20.0,20.0 +cd935d283d47f1050c505e1c39c48b67,esporte_lazer,32.0,658.0,5.0,281.0,30.0,14.0,25.0 +a01d1cbb398e386a4a8f8364401a7584,esporte_lazer,58.0,757.0,2.0,500.0,50.0,5.0,30.0 +12087840651e83b48206b82c213b76fd,esporte_lazer,27.0,521.0,1.0,1813.0,30.0,13.0,28.0 +f48eb5c2fde13ca63664f0bb05f55346,esporte_lazer,60.0,1153.0,2.0,100.0,20.0,11.0,11.0 +69d980b4120a76616d7b237d731d6156,relogios_presentes,60.0,1362.0,3.0,600.0,16.0,11.0,12.0 +39a9942865c056ed2006a5e8c11d9537,brinquedos,47.0,556.0,5.0,800.0,37.0,14.0,37.0 +f35927953ed82e19d06ad3aac2f06353,livros_interesse_geral,39.0,724.0,1.0,450.0,20.0,20.0,20.0 diff --git a/data/orders/sellers.csv b/data/orders/sellers.csv new file mode 100644 index 0000000..6585ba2 --- /dev/null +++ b/data/orders/sellers.csv @@ -0,0 +1,88 @@ +seller_id,seller_zip_code_prefix,seller_city,seller_state +669ae81880e08f269a64487cfb287169,89160,rio do sul,SC +817245bcc3badd82bbd222e0366951a6,17056,bauru,SP +7d13fca15225358621be4086e1eb0964,14050,ribeirao preto,SP +a3a38f4affed601eb87a97788c949667,89204,joinville,SC +744dac408745240a2c2528fb1b6028f3,83408,colombo,PR +8b321bb669392f5163d04c59e235e066,1212,sao paulo,SP +76d64c4aca3a7baf218bf93ef7fa768d,80215,curitiba,PR +537eb890efff034a88679788b647c564,20270,rio de janeiro,RJ +955fee9216a65b617aa5c0531780ce60,4782,sao paulo,SP +ba5daa4041e1f15cdf34b76e3e18a450,4363,sao paulo,SP +d3f39f05462b79a4562d35893a28f159,13730,mococa,SP +d1ef48b38baca7e831711c4a0aeb398f,86800,apucarana,PR +f7ba60f8c3f99e7ee4042fdef03b70c4,9628,sao bernardo do campo,SP +f84a00e60c73a49e7e851c9bdca3a5bb,20756,rio de janeiro,RJ +e5a38146df062edaf55c38afa99e42dc,1233,sao paulo,SP +ef0ace09169ac090589d85746e3e036f,24451,sao goncalo,RJ +87142160b41353c4e5fca2360caf6f92,90230,porto alegre,RS +289cdb325fb7e7f891c38608bf9e0962,31570,belo horizonte,SP +3504c0cb71d7fa48d967e0e4c94d59d9,9350,maua,SP +23613d49c3ac2bd302259e55c06c050c,13660,porto ferreira,SP +391fc6631aebcf3004804e51b40bcf1e,14940,ibitinga,SP +33cbbec1e7e1044aaf11d152172c776f,95705,bento goncalves,RS +e9779976487b77c6d4ac45f75ec7afe9,11701,praia grande,SP +db4350fd57ae30082dec7acbaacc17f9,3126,sao paulo,SP +70125af26c2d6d4ef401a1d02ae7701f,74435,goiania,GO +6560211a19b47992c3666cc44a7e94c0,5849,sao paulo,SP +0b90b6df587eb83608a64ea8b390cf07,87025,maringa,PR +55c26bcb609f480eb7868594245febb5,14910,tabatinga,SP +6df688df543f90e9b38f4319e75a9d88,31230,belo horizonte,MG +d673a59aac7a70d8b01e6902bf090a11,14940,ibitinga,SP +f52c2422904463fdd7741f99045fecb6,9230,santo andre/sao paulo,SP +ea8482cd71df3c1969d7b9473ff13abc,4160,sao paulo,SP +5b925e1d006e9476d738aa200751b73b,4567,sao paulo,SP +fe2032dab1a61af8794248c8196565c9,13030,campinas,SP +2a84855fd20af891be03bc5924d2b453,30111,belo horizonte,MG +c4af86330efa7a2620772227d2d670c9,8840,mogi das cruzes,SP +001cca7ae9ae17fb1caed9dfb1094831,29156,cariacica,ES +d91fb3b7d041e83b64a00a3edfb37e4f,11704,praia grande,SP +e70053bf73d1b5863932e53a9fa47496,5059,sao paulo,SP +7c67e1448b00f6e969d365cea6b010ab,8577,itaquaquecetuba,SP +980640c45d7a4635885491d077167e4d,13501,rio claro,SP +d2374cbcbb3ca4ab1086534108cc3ab7,14940,ibitinga,SP +0bae85eb84b9fb3bd773911e89288d54,88301,itajai,SP +6860153b69cc696d5dcfe1cdaaafcf62,13360,capivari,SP +76d5af76d0271110f9af36c92573f765,3194,sao paulo,SP +cc419e0650a3c5ba77189a1882b7556a,9015,santo andre,SP +977f9f63dd360c2a32ece2f93ad6d306,14910,tabatinga,SP +2c9e548be18521d1c43cde1c582c6de8,8752,mogi das cruzes,SP +855668e0971d4dfd7bef1b6a4133b41b,13257,itatiba,SP +77530e9772f57a62c906e1c21538ab82,80310,curitiba,PR +f7720c4fa8e3aba4546301ab80ea1f1b,81350,curitiba,PR +8581055ce74af1daba164fdbd55a40de,7112,guarulhos,SP +c8417879a15366a17c30af34c798c332,4445,sao paulo,SP +16090f2ca825584b5a147ab24aa30c86,12940,atibaia,SP +4c8b8048e33af2bf94f2eb547746a916,14940,ibitinga,SP +00fc707aaaad2d31347cf883cd2dfe10,87025,maringa,PR +562fc2f2c2863ab7e79a9e4388a58a14,13070,campinas,SP +85d9eb9ddc5d00ca9336a2219c97bb13,31255,belo horizonte,MG +a673821011d0cec28146ea42f5ab767f,3809,sao paulo,SP +dc4a0fc896dc34b0d5bfec8438291c80,14940,ibitinga,SP +a6fe7de3d16f6149ffe280349a8535a0,14401,franca,SP +f5f46307a4d15880ca14fab4ad9dfc9b,89165,rio do sul,SC +fcdd820084f17e9982427971e4e9d47f,14075,ribeirao preto,SP +36890be00bbfc1cdb9a4a38a6af05a69,6040,osasco,SP +f27e33c6d29b5138fa9967bcd445b6d5,4273,sao paulo,SP +c0563dd588b775f2e37747ef6ad6c92c,9220,santo andre,SP +633ecdf879b94b5337cca303328e4a25,4438,sao paulo,SP +80e6699fe29150b372a0c8a1ebf7dcc8,83323,pinhais,PR +5656537e588803a555b8eb41f07a944b,72015,brasilia,DF +5dceca129747e92ff8ef7a997dc4f8ca,13450,santa barbara d´oeste,SP +dc8798cbf453b7e0f98745e396cc5616,5455,sao paulo,SP +4a3ca9315b744ce9f8e9374361493884,14940,ibitinga,SP +b33e7c55446eabf8fe1a42d037ac7d6d,14850,pradopolis,SP +3b15288545f8928d3e65a8f949a28291,14940,ibitinga,SP +4869f7a5dfa277a7dca6462dcf3b52b2,14840,guariba,SP +094ced053e257ae8cae57205592d6712,14095,ribeirao preto,SP +a5cba26a62b8b4d0145b68b841e62e7f,87303,campo mourao,PR +1025f0e2d44d7041d6cf58b6550e0bfa,3204,sao paulo,SP +a1043bafd471dff536d0c462352beb48,37175,ilicinea,MG +f8db351d8c4c4c22c6835c19a46f01b0,13324,salto,SP +1ca7077d890b907f89be8c954a02686a,6506,santana de parnaiba,SP +89a51f50b8095ea78d5768f34c13a76f,71931,brasilia,DF +d566c37fa119d5e66c4e9052e83ee4ea,4131,sao paulo,SP +70a12e78e608ac31179aea7f8422044b,12327,jacarei,SP +66922902710d126a0e7d26b0e3805106,31842,belo horizonte,MG +1900267e848ceeba8fa32d80c1a5f5a8,14940,ibitinga,SP +63b9ae557efed31d1f7687917d248a8d,13720,sao jose do rio pardo,SP diff --git a/lec3.ipynb b/lec3.ipynb new file mode 100644 index 0000000..1006fc0 --- /dev/null +++ b/lec3.ipynb @@ -0,0 +1,4278 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Загрузка набора данных Titanic" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
PassengerId
103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
503Allen, Mr. William Henrymale35.0003734508.0500NaNS
....................................
88702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.4500NaNS
89011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
\n", + "

891 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Survived Pclass \\\n", + "PassengerId \n", + "1 0 3 \n", + "2 1 1 \n", + "3 1 3 \n", + "4 1 1 \n", + "5 0 3 \n", + "... ... ... \n", + "887 0 2 \n", + "888 1 1 \n", + "889 0 3 \n", + "890 1 1 \n", + "891 0 3 \n", + "\n", + " Name Sex Age \\\n", + "PassengerId \n", + "1 Braund, Mr. Owen Harris male 22.0 \n", + "2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", + "3 Heikkinen, Miss. Laina female 26.0 \n", + "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", + "5 Allen, Mr. William Henry male 35.0 \n", + "... ... ... ... \n", + "887 Montvila, Rev. Juozas male 27.0 \n", + "888 Graham, Miss. Margaret Edith female 19.0 \n", + "889 Johnston, Miss. Catherine Helen \"Carrie\" female NaN \n", + "890 Behr, Mr. Karl Howell male 26.0 \n", + "891 Dooley, Mr. Patrick male 32.0 \n", + "\n", + " SibSp Parch Ticket Fare Cabin Embarked \n", + "PassengerId \n", + "1 1 0 A/5 21171 7.2500 NaN S \n", + "2 1 0 PC 17599 71.2833 C85 C \n", + "3 0 0 STON/O2. 3101282 7.9250 NaN S \n", + "4 1 0 113803 53.1000 C123 S \n", + "5 0 0 373450 8.0500 NaN S \n", + "... ... ... ... ... ... ... \n", + "887 0 0 211536 13.0000 NaN S \n", + "888 0 0 112053 30.0000 B42 S \n", + "889 1 2 W./C. 6607 23.4500 NaN S \n", + "890 0 0 111369 30.0000 C148 C \n", + "891 0 0 370376 7.7500 NaN Q \n", + "\n", + "[891 rows x 11 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "titanic = pd.read_csv(\"data/titanic.csv\", index_col=\"PassengerId\")\n", + "\n", + "titanic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Унитарное кодирование\n", + "\n", + "Преобразование категориального признака в несколько бинарных признаков" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Унитарное кодирование признаков Пол (Sex) и Порт посадки (Embarked)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Кодирование" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Embarked_QEmbarked_SEmbarked_nanSex_male
00.01.00.01.0
10.00.00.00.0
20.01.00.00.0
30.01.00.00.0
40.01.00.01.0
...............
8860.01.00.01.0
8870.01.00.00.0
8880.01.00.00.0
8890.00.00.01.0
8901.00.00.01.0
\n", + "

891 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " Embarked_Q Embarked_S Embarked_nan Sex_male\n", + "0 0.0 1.0 0.0 1.0\n", + "1 0.0 0.0 0.0 0.0\n", + "2 0.0 1.0 0.0 0.0\n", + "3 0.0 1.0 0.0 0.0\n", + "4 0.0 1.0 0.0 1.0\n", + ".. ... ... ... ...\n", + "886 0.0 1.0 0.0 1.0\n", + "887 0.0 1.0 0.0 0.0\n", + "888 0.0 1.0 0.0 0.0\n", + "889 0.0 0.0 0.0 1.0\n", + "890 1.0 0.0 0.0 1.0\n", + "\n", + "[891 rows x 4 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "import numpy as np\n", + "\n", + "encoder = OneHotEncoder(sparse_output=False, drop=\"first\")\n", + "\n", + "encoded_values = encoder.fit_transform(titanic[[\"Embarked\", \"Sex\"]])\n", + "\n", + "encoded_columns = encoder.get_feature_names_out([\"Embarked\", \"Sex\"])\n", + "\n", + "encoded_values_df = pd.DataFrame(encoded_values, columns=encoded_columns)\n", + "\n", + "encoded_values_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Добавление признаков в исходный Dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedEmbarked_QEmbarked_SEmbarked_nanSex_male
10.03.0Braund, Mr. Owen Harrismale22.01.00.0A/5 211717.2500NaNS0.00.00.00.0
21.01.0Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01.00.0PC 1759971.2833C85C0.01.00.00.0
31.03.0Heikkinen, Miss. Lainafemale26.00.00.0STON/O2. 31012827.9250NaNS0.01.00.00.0
41.01.0Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01.00.011380353.1000C123S0.01.00.01.0
50.03.0Allen, Mr. William Henrymale35.00.00.03734508.0500NaNS1.00.00.01.0
................................................
8881.01.0Graham, Miss. Margaret Edithfemale19.00.00.011205330.0000B42S0.01.00.00.0
8890.03.0Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN1.02.0W./C. 660723.4500NaNS0.00.00.01.0
8901.01.0Behr, Mr. Karl Howellmale26.00.00.011136930.0000C148C1.00.00.01.0
8910.03.0Dooley, Mr. Patrickmale32.00.00.03703767.7500NaNQNaNNaNNaNNaN
0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.00.01.0
\n", + "

892 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " Survived Pclass Name \\\n", + "1 0.0 3.0 Braund, Mr. Owen Harris \n", + "2 1.0 1.0 Cumings, Mrs. John Bradley (Florence Briggs Th... \n", + "3 1.0 3.0 Heikkinen, Miss. Laina \n", + "4 1.0 1.0 Futrelle, Mrs. Jacques Heath (Lily May Peel) \n", + "5 0.0 3.0 Allen, Mr. William Henry \n", + ".. ... ... ... \n", + "888 1.0 1.0 Graham, Miss. Margaret Edith \n", + "889 0.0 3.0 Johnston, Miss. Catherine Helen \"Carrie\" \n", + "890 1.0 1.0 Behr, Mr. Karl Howell \n", + "891 0.0 3.0 Dooley, Mr. Patrick \n", + "0 NaN NaN NaN \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \\\n", + "1 male 22.0 1.0 0.0 A/5 21171 7.2500 NaN S \n", + "2 female 38.0 1.0 0.0 PC 17599 71.2833 C85 C \n", + "3 female 26.0 0.0 0.0 STON/O2. 3101282 7.9250 NaN S \n", + "4 female 35.0 1.0 0.0 113803 53.1000 C123 S \n", + "5 male 35.0 0.0 0.0 373450 8.0500 NaN S \n", + ".. ... ... ... ... ... ... ... ... \n", + "888 female 19.0 0.0 0.0 112053 30.0000 B42 S \n", + "889 female NaN 1.0 2.0 W./C. 6607 23.4500 NaN S \n", + "890 male 26.0 0.0 0.0 111369 30.0000 C148 C \n", + "891 male 32.0 0.0 0.0 370376 7.7500 NaN Q \n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " Embarked_Q Embarked_S Embarked_nan Sex_male \n", + "1 0.0 0.0 0.0 0.0 \n", + "2 0.0 1.0 0.0 0.0 \n", + "3 0.0 1.0 0.0 0.0 \n", + "4 0.0 1.0 0.0 1.0 \n", + "5 1.0 0.0 0.0 1.0 \n", + ".. ... ... ... ... \n", + "888 0.0 1.0 0.0 0.0 \n", + "889 0.0 0.0 0.0 1.0 \n", + "890 1.0 0.0 0.0 1.0 \n", + "891 NaN NaN NaN NaN \n", + "0 0.0 1.0 0.0 1.0 \n", + "\n", + "[892 rows x 15 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic = pd.concat([titanic, encoded_values_df], axis=1)\n", + "\n", + "titanic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Дискретизация признаков" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Равномерное разделение данных на 3 группы" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "labels = [\"young\", \"middle-aged\", \"old\"]\n", + "num_bins = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0.42 , 26.94666667, 53.47333333, 80. ]),\n", + " array([319, 523, 50]))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hist1, bins1 = np.histogram(titanic[\"Age\"].fillna(titanic[\"Age\"].median()), bins=num_bins)\n", + "bins1, hist1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0(0.42, 26.947]
238.0(26.947, 53.473]
326.0(0.42, 26.947]
435.0(26.947, 53.473]
535.0(26.947, 53.473]
6NaNNaN
754.0(53.473, 80.0]
82.0(0.42, 26.947]
927.0(26.947, 53.473]
1014.0(0.42, 26.947]
114.0(0.42, 26.947]
1258.0(53.473, 80.0]
1320.0(0.42, 26.947]
1439.0(26.947, 53.473]
1514.0(0.42, 26.947]
1655.0(53.473, 80.0]
172.0(0.42, 26.947]
18NaNNaN
1931.0(26.947, 53.473]
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 (0.42, 26.947]\n", + "2 38.0 (26.947, 53.473]\n", + "3 26.0 (0.42, 26.947]\n", + "4 35.0 (26.947, 53.473]\n", + "5 35.0 (26.947, 53.473]\n", + "6 NaN NaN\n", + "7 54.0 (53.473, 80.0]\n", + "8 2.0 (0.42, 26.947]\n", + "9 27.0 (26.947, 53.473]\n", + "10 14.0 (0.42, 26.947]\n", + "11 4.0 (0.42, 26.947]\n", + "12 58.0 (53.473, 80.0]\n", + "13 20.0 (0.42, 26.947]\n", + "14 39.0 (26.947, 53.473]\n", + "15 14.0 (0.42, 26.947]\n", + "16 55.0 (53.473, 80.0]\n", + "17 2.0 (0.42, 26.947]\n", + "18 NaN NaN\n", + "19 31.0 (26.947, 53.473]\n", + "20 NaN NaN" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins1))], axis=1).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0young
238.0middle-aged
326.0young
435.0middle-aged
535.0middle-aged
6NaNNaN
754.0old
82.0young
927.0middle-aged
1014.0young
114.0young
1258.0old
1320.0young
1439.0middle-aged
1514.0young
1655.0old
172.0young
18NaNNaN
1931.0middle-aged
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 young\n", + "2 38.0 middle-aged\n", + "3 26.0 young\n", + "4 35.0 middle-aged\n", + "5 35.0 middle-aged\n", + "6 NaN NaN\n", + "7 54.0 old\n", + "8 2.0 young\n", + "9 27.0 middle-aged\n", + "10 14.0 young\n", + "11 4.0 young\n", + "12 58.0 old\n", + "13 20.0 young\n", + "14 39.0 middle-aged\n", + "15 14.0 young\n", + "16 55.0 old\n", + "17 2.0 young\n", + "18 NaN NaN\n", + "19 31.0 middle-aged\n", + "20 NaN NaN" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins1), labels=labels)], axis=1).head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Равномерное разделение данных на 3 группы c установкой собственной границы диапазона значений (от 0 до 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0. , 33.33333333, 66.66666667, 100. ]),\n", + " array([641, 244, 7]))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bins2 = np.linspace(0, 100, 4)\n", + "tmp_bins2 = np.digitize(titanic[\"Age\"].fillna(titanic[\"Age\"].median()), bins2)\n", + "hist2 = np.bincount(tmp_bins2 - 1)\n", + "bins2, hist2" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0(0.0, 33.333]
238.0(33.333, 66.667]
326.0(0.0, 33.333]
435.0(33.333, 66.667]
535.0(33.333, 66.667]
6NaNNaN
754.0(33.333, 66.667]
82.0(0.0, 33.333]
927.0(0.0, 33.333]
1014.0(0.0, 33.333]
114.0(0.0, 33.333]
1258.0(33.333, 66.667]
1320.0(0.0, 33.333]
1439.0(33.333, 66.667]
1514.0(0.0, 33.333]
1655.0(33.333, 66.667]
172.0(0.0, 33.333]
18NaNNaN
1931.0(0.0, 33.333]
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 (0.0, 33.333]\n", + "2 38.0 (33.333, 66.667]\n", + "3 26.0 (0.0, 33.333]\n", + "4 35.0 (33.333, 66.667]\n", + "5 35.0 (33.333, 66.667]\n", + "6 NaN NaN\n", + "7 54.0 (33.333, 66.667]\n", + "8 2.0 (0.0, 33.333]\n", + "9 27.0 (0.0, 33.333]\n", + "10 14.0 (0.0, 33.333]\n", + "11 4.0 (0.0, 33.333]\n", + "12 58.0 (33.333, 66.667]\n", + "13 20.0 (0.0, 33.333]\n", + "14 39.0 (33.333, 66.667]\n", + "15 14.0 (0.0, 33.333]\n", + "16 55.0 (33.333, 66.667]\n", + "17 2.0 (0.0, 33.333]\n", + "18 NaN NaN\n", + "19 31.0 (0.0, 33.333]\n", + "20 NaN NaN" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins2))], axis=1).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0young
238.0middle-aged
326.0young
435.0middle-aged
535.0middle-aged
6NaNNaN
754.0middle-aged
82.0young
927.0young
1014.0young
114.0young
1258.0middle-aged
1320.0young
1439.0middle-aged
1514.0young
1655.0middle-aged
172.0young
18NaNNaN
1931.0young
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 young\n", + "2 38.0 middle-aged\n", + "3 26.0 young\n", + "4 35.0 middle-aged\n", + "5 35.0 middle-aged\n", + "6 NaN NaN\n", + "7 54.0 middle-aged\n", + "8 2.0 young\n", + "9 27.0 young\n", + "10 14.0 young\n", + "11 4.0 young\n", + "12 58.0 middle-aged\n", + "13 20.0 young\n", + "14 39.0 middle-aged\n", + "15 14.0 young\n", + "16 55.0 middle-aged\n", + "17 2.0 young\n", + "18 NaN NaN\n", + "19 31.0 young\n", + "20 NaN NaN" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins2), labels=labels)], axis=1).head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Равномерное разделение данных на 3 группы c установкой собственных интервалов (0 - 39, 40 - 60, 61 - 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0, 40, 60, 100]), array([729, 137, 26]))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hist3, bins3 = np.histogram(\n", + " titanic[\"Age\"].fillna(titanic[\"Age\"].median()), bins=[0, 40, 60, 100]\n", + ")\n", + "bins3, hist3" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0(0.0, 40.0]
238.0(0.0, 40.0]
326.0(0.0, 40.0]
435.0(0.0, 40.0]
535.0(0.0, 40.0]
6NaNNaN
754.0(40.0, 60.0]
82.0(0.0, 40.0]
927.0(0.0, 40.0]
1014.0(0.0, 40.0]
114.0(0.0, 40.0]
1258.0(40.0, 60.0]
1320.0(0.0, 40.0]
1439.0(0.0, 40.0]
1514.0(0.0, 40.0]
1655.0(40.0, 60.0]
172.0(0.0, 40.0]
18NaNNaN
1931.0(0.0, 40.0]
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 (0.0, 40.0]\n", + "2 38.0 (0.0, 40.0]\n", + "3 26.0 (0.0, 40.0]\n", + "4 35.0 (0.0, 40.0]\n", + "5 35.0 (0.0, 40.0]\n", + "6 NaN NaN\n", + "7 54.0 (40.0, 60.0]\n", + "8 2.0 (0.0, 40.0]\n", + "9 27.0 (0.0, 40.0]\n", + "10 14.0 (0.0, 40.0]\n", + "11 4.0 (0.0, 40.0]\n", + "12 58.0 (40.0, 60.0]\n", + "13 20.0 (0.0, 40.0]\n", + "14 39.0 (0.0, 40.0]\n", + "15 14.0 (0.0, 40.0]\n", + "16 55.0 (40.0, 60.0]\n", + "17 2.0 (0.0, 40.0]\n", + "18 NaN NaN\n", + "19 31.0 (0.0, 40.0]\n", + "20 NaN NaN" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins3))], axis=1).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0young
238.0young
326.0young
435.0young
535.0young
6NaNNaN
754.0middle-aged
82.0young
927.0young
1014.0young
114.0young
1258.0middle-aged
1320.0young
1439.0young
1514.0young
1655.0middle-aged
172.0young
18NaNNaN
1931.0young
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 young\n", + "2 38.0 young\n", + "3 26.0 young\n", + "4 35.0 young\n", + "5 35.0 young\n", + "6 NaN NaN\n", + "7 54.0 middle-aged\n", + "8 2.0 young\n", + "9 27.0 young\n", + "10 14.0 young\n", + "11 4.0 young\n", + "12 58.0 middle-aged\n", + "13 20.0 young\n", + "14 39.0 young\n", + "15 14.0 young\n", + "16 55.0 middle-aged\n", + "17 2.0 young\n", + "18 NaN NaN\n", + "19 31.0 young\n", + "20 NaN NaN" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.cut(titanic[\"Age\"], list(bins3), labels=labels)], axis=1).head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Квантильное разделение данных на 3 группы" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.00.0
238.02.0
326.01.0
435.02.0
535.02.0
6NaNNaN
754.02.0
82.00.0
927.01.0
1014.00.0
114.00.0
1258.02.0
1320.00.0
1439.02.0
1514.00.0
1655.02.0
172.00.0
18NaNNaN
1931.01.0
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 0.0\n", + "2 38.0 2.0\n", + "3 26.0 1.0\n", + "4 35.0 2.0\n", + "5 35.0 2.0\n", + "6 NaN NaN\n", + "7 54.0 2.0\n", + "8 2.0 0.0\n", + "9 27.0 1.0\n", + "10 14.0 0.0\n", + "11 4.0 0.0\n", + "12 58.0 2.0\n", + "13 20.0 0.0\n", + "14 39.0 2.0\n", + "15 14.0 0.0\n", + "16 55.0 2.0\n", + "17 2.0 0.0\n", + "18 NaN NaN\n", + "19 31.0 1.0\n", + "20 NaN NaN" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.qcut(titanic[\"Age\"], q=3, labels=False)], axis=1).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeAge
122.0young
238.0old
326.0middle-aged
435.0old
535.0old
6NaNNaN
754.0old
82.0young
927.0middle-aged
1014.0young
114.0young
1258.0old
1320.0young
1439.0old
1514.0young
1655.0old
172.0young
18NaNNaN
1931.0middle-aged
20NaNNaN
\n", + "
" + ], + "text/plain": [ + " Age Age\n", + "1 22.0 young\n", + "2 38.0 old\n", + "3 26.0 middle-aged\n", + "4 35.0 old\n", + "5 35.0 old\n", + "6 NaN NaN\n", + "7 54.0 old\n", + "8 2.0 young\n", + "9 27.0 middle-aged\n", + "10 14.0 young\n", + "11 4.0 young\n", + "12 58.0 old\n", + "13 20.0 young\n", + "14 39.0 old\n", + "15 14.0 young\n", + "16 55.0 old\n", + "17 2.0 young\n", + "18 NaN NaN\n", + "19 31.0 middle-aged\n", + "20 NaN NaN" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([titanic[\"Age\"], pd.qcut(titanic[\"Age\"], q=3, labels=labels)], axis=1).head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Пример конструирования признаков на основе существующих\n", + "\n", + "Title - обращение к пассажиру (Mr, Mrs, Miss)\n", + "\n", + "Is_married - замужняя ли женщина\n", + "\n", + "Cabin_type - палуба (тип каюты)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleIs_marriedCabin_type
21.01.0Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01.00.0PC 1759971.2833C85CMrs1C
41.01.0Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01.00.011380353.1000C123SMrs1C
70.01.0McCarthy, Mr. Timothy Jmale54.00.00.01746351.8625E46SMr0E
111.03.0Sandstrom, Miss. Marguerite Rutfemale4.01.01.0PP 954916.7000G6SMiss0G
121.01.0Bonnell, Miss. Elizabethfemale58.00.00.011378326.5500C103SMiss0C
.............................................
8721.01.0Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.01.01.01175152.5542D35SMrs1D
8730.01.0Carlsson, Mr. Frans Olofmale33.00.00.06955.0000B51 B53 B55SMr0B
8801.01.0Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.00.01.01176783.1583C50CMrs1C
8881.01.0Graham, Miss. Margaret Edithfemale19.00.00.011205330.0000B42SMiss0B
8901.01.0Behr, Mr. Karl Howellmale26.00.00.011136930.0000C148CMr0C
\n", + "

183 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Survived Pclass Name \\\n", + "2 1.0 1.0 Cumings, Mrs. John Bradley (Florence Briggs Th... \n", + "4 1.0 1.0 Futrelle, Mrs. Jacques Heath (Lily May Peel) \n", + "7 0.0 1.0 McCarthy, Mr. Timothy J \n", + "11 1.0 3.0 Sandstrom, Miss. Marguerite Rut \n", + "12 1.0 1.0 Bonnell, Miss. Elizabeth \n", + ".. ... ... ... \n", + "872 1.0 1.0 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) \n", + "873 0.0 1.0 Carlsson, Mr. Frans Olof \n", + "880 1.0 1.0 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) \n", + "888 1.0 1.0 Graham, Miss. Margaret Edith \n", + "890 1.0 1.0 Behr, Mr. Karl Howell \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \\\n", + "2 female 38.0 1.0 0.0 PC 17599 71.2833 C85 C \n", + "4 female 35.0 1.0 0.0 113803 53.1000 C123 S \n", + "7 male 54.0 0.0 0.0 17463 51.8625 E46 S \n", + "11 female 4.0 1.0 1.0 PP 9549 16.7000 G6 S \n", + "12 female 58.0 0.0 0.0 113783 26.5500 C103 S \n", + ".. ... ... ... ... ... ... ... ... \n", + "872 female 47.0 1.0 1.0 11751 52.5542 D35 S \n", + "873 male 33.0 0.0 0.0 695 5.0000 B51 B53 B55 S \n", + "880 female 56.0 0.0 1.0 11767 83.1583 C50 C \n", + "888 female 19.0 0.0 0.0 112053 30.0000 B42 S \n", + "890 male 26.0 0.0 0.0 111369 30.0000 C148 C \n", + "\n", + " Title Is_married Cabin_type \n", + "2 Mrs 1 C \n", + "4 Mrs 1 C \n", + "7 Mr 0 E \n", + "11 Miss 0 G \n", + "12 Miss 0 C \n", + ".. ... ... ... \n", + "872 Mrs 1 D \n", + "873 Mr 0 B \n", + "880 Mrs 1 C \n", + "888 Miss 0 B \n", + "890 Mr 0 C \n", + "\n", + "[183 rows x 14 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic_cl = titanic.drop(\n", + " [\"Embarked_Q\", \"Embarked_S\", \"Embarked_nan\", \"Sex_male\"], axis=1, errors=\"ignore\"\n", + ")\n", + "titanic_cl = titanic_cl.dropna()\n", + "\n", + "titanic_cl[\"Title\"] = [\n", + " i.split(\",\")[1].split(\".\")[0].strip() for i in titanic_cl[\"Name\"]\n", + "]\n", + "\n", + "titanic_cl[\"Is_married\"] = [1 if i == \"Mrs\" else 0 for i in titanic_cl[\"Title\"]]\n", + "\n", + "titanic_cl[\"Cabin_type\"] = [i[0] for i in titanic_cl[\"Cabin\"]]\n", + "\n", + "titanic_cl" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Пример использования библиотеки Featuretools для автоматического конструирования (синтеза) признаков\n", + "\n", + "https://featuretools.alteryx.com/en/stable/getting_started/using_entitysets.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Загрузка данных\n", + "\n", + "За основу был взят набор данных \"Ecommerce Orders Data Set\" из Kaggle\n", + "\n", + "Используется только 100 первых заказов и связанные с ними объекты\n", + "\n", + "https://www.kaggle.com/datasets/sangamsharmait/ecommerce-orders-data-analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import featuretools as ft\n", + "from woodwork.logical_types import Categorical, Datetime\n", + "\n", + "customers = pd.read_csv(\"data/orders/customers.csv\")\n", + "sellers = pd.read_csv(\"data/orders/sellers.csv\")\n", + "products = pd.read_csv(\"data/orders/products.csv\")\n", + "orders = pd.read_csv(\"data/orders/orders.csv\")\n", + "orders.fillna({\"order_delivered_carrier_date\": pd.to_datetime(\n", + " \"1900-01-01 00:00:00\"\n", + ")}, inplace=True)\n", + "orders.fillna(\n", + " {\"order_delivered_customer_date\": pd.to_datetime(\"1900-01-01 00:00:00\")},\n", + " inplace=True,\n", + ")\n", + "order_items = pd.read_csv(\"data/orders/order_items.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Создание сущностей в featuretools\n", + "\n", + "Добавление dataframe'ов с данными в EntitySet с указанием параметров: название сущности (таблицы), первичный ключ, категориальные атрибуты (в том числе даты)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n" + ] + }, + { + "data": { + "text/plain": [ + "Entityset: orders\n", + " DataFrames:\n", + " customers [Rows: 100, Columns: 5]\n", + " sellers [Rows: 87, Columns: 4]\n", + " products [Rows: 100, Columns: 9]\n", + " orders [Rows: 100, Columns: 8]\n", + " order_items [Rows: 115, Columns: 8]\n", + " Relationships:\n", + " No relationships" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "es = ft.EntitySet(id=\"orders\")\n", + "\n", + "es = es.add_dataframe(\n", + " dataframe_name=\"customers\",\n", + " dataframe=customers,\n", + " index=\"customer_id\",\n", + " logical_types={\n", + " \"customer_unique_id\": Categorical,\n", + " \"customer_zip_code_prefix\": Categorical,\n", + " \"customer_city\": Categorical,\n", + " \"customer_state\": Categorical,\n", + " },\n", + ")\n", + "es = es.add_dataframe(\n", + " dataframe_name=\"sellers\",\n", + " dataframe=sellers,\n", + " index=\"seller_id\",\n", + " logical_types={\n", + " \"seller_zip_code_prefix\": Categorical,\n", + " \"seller_city\": Categorical,\n", + " \"seller_state\": Categorical,\n", + " },\n", + ")\n", + "es = es.add_dataframe(\n", + " dataframe_name=\"products\",\n", + " dataframe=products,\n", + " index=\"product_id\",\n", + " logical_types={\n", + " \"product_category_name\": Categorical,\n", + " \"product_name_lenght\": Categorical,\n", + " \"product_description_lenght\": Categorical,\n", + " \"product_photos_qty\": Categorical,\n", + " },\n", + ")\n", + "es = es.add_dataframe(\n", + " dataframe_name=\"orders\",\n", + " dataframe=orders,\n", + " index=\"order_id\",\n", + " logical_types={\n", + " \"order_status\": Categorical,\n", + " \"order_purchase_timestamp\": Datetime,\n", + " \"order_approved_at\": Datetime,\n", + " \"order_delivered_carrier_date\": Datetime,\n", + " \"order_delivered_customer_date\": Datetime,\n", + " \"order_estimated_delivery_date\": Datetime,\n", + " },\n", + ")\n", + "es = es.add_dataframe(\n", + " dataframe_name=\"order_items\",\n", + " dataframe=order_items,\n", + " index=\"orderitem_id\",\n", + " make_index=True,\n", + " logical_types={\"shipping_limit_date\": Datetime},\n", + ")\n", + "\n", + "es" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Настройка связей между сущностями featuretools\n", + "\n", + "Настройка связей между таблицами на уровне ключей\n", + "\n", + "Связь указывается от родителя к потомкам (таблица-родитель, первичный ключ, таблица-потомок, внешний ключ)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Entityset: orders\n", + " DataFrames:\n", + " customers [Rows: 100, Columns: 5]\n", + " sellers [Rows: 87, Columns: 4]\n", + " products [Rows: 100, Columns: 9]\n", + " orders [Rows: 100, Columns: 8]\n", + " order_items [Rows: 115, Columns: 8]\n", + " Relationships:\n", + " orders.customer_id -> customers.customer_id\n", + " order_items.order_id -> orders.order_id\n", + " order_items.product_id -> products.product_id\n", + " order_items.seller_id -> sellers.seller_id" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "es = es.add_relationship(\"customers\", \"customer_id\", \"orders\", \"customer_id\")\n", + "es = es.add_relationship(\"orders\", \"order_id\", \"order_items\", \"order_id\")\n", + "es = es.add_relationship(\"products\", \"product_id\", \"order_items\", \"product_id\")\n", + "es = es.add_relationship(\"sellers\", \"seller_id\", \"order_items\", \"seller_id\")\n", + "\n", + "es" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Автоматическое конструирование признаков с помощью featuretools\n", + "\n", + "Библиотека применят различные функции агрегации и трансформации к атрибутам таблицы order_items с учетом отношений\n", + "\n", + "Результат помещается в Dataframe feature_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\dfs.py:321: UnusedPrimitiveWarning: Some specified primitives were not used during DFS:\n", + " agg_primitives: ['any', 'mode']\n", + "This may be caused by a using a value of max_depth that is too small, not setting interesting values, or it may indicate no compatible columns for the primitive were found in the data. If the DFS call contained multiple instances of a primitive in the list above, none of them were used.\n", + " warnings.warn(warning_msg, UnusedPrimitiveWarning)\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n", + " ).agg(to_agg)\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n", + " ).agg(to_agg)\n", + "c:\\Users\\user\\Projects\\python\\ckmai\\.venv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:785: FutureWarning: The provided callable is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"mean\" instead.\n", + " ).agg(to_agg)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
order_item_idpricefreight_valueHOUR(shipping_limit_date)WEEKDAY(shipping_limit_date)orders.order_statusproducts.product_category_nameproducts.product_name_lenghtproducts.product_description_lenghtproducts.product_photos_qty...orders.customers.customer_cityorders.customers.customer_stateproducts.COUNT(order_items)products.MEAN(order_items.freight_value)products.MEAN(order_items.order_item_id)products.MEAN(order_items.price)sellers.COUNT(order_items)sellers.MEAN(order_items.freight_value)sellers.MEAN(order_items.order_item_id)sellers.MEAN(order_items.price)
orderitem_id
0138.5024.84204deliveredcama_mesa_banho53.0223.01.0...santa luziaPB124.841.038.50221.3401.061.200000
1129.997.3980deliveredtelefonia59.0675.05.0...sao pauloSP17.391.029.9917.3901.029.990000
21110.9921.27211deliveredcama_mesa_banho52.0413.01.0...gravataiRS121.271.0110.99121.2701.0110.990000
3127.9915.10231deliveredtelefonia60.0818.06.0...imbitubaSC115.101.027.99213.9701.026.490000
4149.9016.05132invoicedNaNNaNNaNNaN...santa rosaRS116.051.049.90116.0501.049.900000
..................................................................
110117.9010.9681deliveredcama_mesa_banho55.0122.01.0...jundiaiSP110.961.017.90110.9601.017.900000
111179.998.9194deliveredbeleza_saude59.0492.03.0...sao pauloSP18.911.079.99513.2061.254.590000
1121190.0019.41133deliveredclimatizacao60.03270.04.0...pauliniaSP119.411.0190.00119.4101.0190.000000
1131109.9015.5322deliveredcool_stuff46.0595.02.0...rio de janeiroRJ115.531.0109.90115.5301.0109.900000
114127.9018.30142deliveredalimentos59.0982.01.0...joinvilleSC216.701.027.90316.1901.038.596667
\n", + "

115 rows × 43 columns

\n", + "
" + ], + "text/plain": [ + " order_item_id price freight_value HOUR(shipping_limit_date) \\\n", + "orderitem_id \n", + "0 1 38.50 24.84 20 \n", + "1 1 29.99 7.39 8 \n", + "2 1 110.99 21.27 21 \n", + "3 1 27.99 15.10 23 \n", + "4 1 49.90 16.05 13 \n", + "... ... ... ... ... \n", + "110 1 17.90 10.96 8 \n", + "111 1 79.99 8.91 9 \n", + "112 1 190.00 19.41 13 \n", + "113 1 109.90 15.53 2 \n", + "114 1 27.90 18.30 14 \n", + "\n", + " WEEKDAY(shipping_limit_date) orders.order_status \\\n", + "orderitem_id \n", + "0 4 delivered \n", + "1 0 delivered \n", + "2 1 delivered \n", + "3 1 delivered \n", + "4 2 invoiced \n", + "... ... ... \n", + "110 1 delivered \n", + "111 4 delivered \n", + "112 3 delivered \n", + "113 2 delivered \n", + "114 2 delivered \n", + "\n", + " products.product_category_name products.product_name_lenght \\\n", + "orderitem_id \n", + "0 cama_mesa_banho 53.0 \n", + "1 telefonia 59.0 \n", + "2 cama_mesa_banho 52.0 \n", + "3 telefonia 60.0 \n", + "4 NaN NaN \n", + "... ... ... \n", + "110 cama_mesa_banho 55.0 \n", + "111 beleza_saude 59.0 \n", + "112 climatizacao 60.0 \n", + "113 cool_stuff 46.0 \n", + "114 alimentos 59.0 \n", + "\n", + " products.product_description_lenght products.product_photos_qty \\\n", + "orderitem_id \n", + "0 223.0 1.0 \n", + "1 675.0 5.0 \n", + "2 413.0 1.0 \n", + "3 818.0 6.0 \n", + "4 NaN NaN \n", + "... ... ... \n", + "110 122.0 1.0 \n", + "111 492.0 3.0 \n", + "112 3270.0 4.0 \n", + "113 595.0 2.0 \n", + "114 982.0 1.0 \n", + "\n", + " ... orders.customers.customer_city \\\n", + "orderitem_id ... \n", + "0 ... santa luzia \n", + "1 ... sao paulo \n", + "2 ... gravatai \n", + "3 ... imbituba \n", + "4 ... santa rosa \n", + "... ... ... \n", + "110 ... jundiai \n", + "111 ... sao paulo \n", + "112 ... paulinia \n", + "113 ... rio de janeiro \n", + "114 ... joinville \n", + "\n", + " orders.customers.customer_state products.COUNT(order_items) \\\n", + "orderitem_id \n", + "0 PB 1 \n", + "1 SP 1 \n", + "2 RS 1 \n", + "3 SC 1 \n", + "4 RS 1 \n", + "... ... ... \n", + "110 SP 1 \n", + "111 SP 1 \n", + "112 SP 1 \n", + "113 RJ 1 \n", + "114 SC 2 \n", + "\n", + " products.MEAN(order_items.freight_value) \\\n", + "orderitem_id \n", + "0 24.84 \n", + "1 7.39 \n", + "2 21.27 \n", + "3 15.10 \n", + "4 16.05 \n", + "... ... \n", + "110 10.96 \n", + "111 8.91 \n", + "112 19.41 \n", + "113 15.53 \n", + "114 16.70 \n", + "\n", + " products.MEAN(order_items.order_item_id) \\\n", + "orderitem_id \n", + "0 1.0 \n", + "1 1.0 \n", + "2 1.0 \n", + "3 1.0 \n", + "4 1.0 \n", + "... ... \n", + "110 1.0 \n", + "111 1.0 \n", + "112 1.0 \n", + "113 1.0 \n", + "114 1.0 \n", + "\n", + " products.MEAN(order_items.price) sellers.COUNT(order_items) \\\n", + "orderitem_id \n", + "0 38.50 2 \n", + "1 29.99 1 \n", + "2 110.99 1 \n", + "3 27.99 2 \n", + "4 49.90 1 \n", + "... ... ... \n", + "110 17.90 1 \n", + "111 79.99 5 \n", + "112 190.00 1 \n", + "113 109.90 1 \n", + "114 27.90 3 \n", + "\n", + " sellers.MEAN(order_items.freight_value) \\\n", + "orderitem_id \n", + "0 21.340 \n", + "1 7.390 \n", + "2 21.270 \n", + "3 13.970 \n", + "4 16.050 \n", + "... ... \n", + "110 10.960 \n", + "111 13.206 \n", + "112 19.410 \n", + "113 15.530 \n", + "114 16.190 \n", + "\n", + " sellers.MEAN(order_items.order_item_id) \\\n", + "orderitem_id \n", + "0 1.0 \n", + "1 1.0 \n", + "2 1.0 \n", + "3 1.0 \n", + "4 1.0 \n", + "... ... \n", + "110 1.0 \n", + "111 1.2 \n", + "112 1.0 \n", + "113 1.0 \n", + "114 1.0 \n", + "\n", + " sellers.MEAN(order_items.price) \n", + "orderitem_id \n", + "0 61.200000 \n", + "1 29.990000 \n", + "2 110.990000 \n", + "3 26.490000 \n", + "4 49.900000 \n", + "... ... \n", + "110 17.900000 \n", + "111 54.590000 \n", + "112 190.000000 \n", + "113 109.900000 \n", + "114 38.596667 \n", + "\n", + "[115 rows x 43 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_matrix, feature_defs = ft.dfs(\n", + " entityset=es,\n", + " target_dataframe_name=\"order_items\",\n", + " agg_primitives=[\"mean\", \"count\", \"mode\", \"any\"],\n", + " trans_primitives=[\"hour\", \"weekday\"],\n", + " max_depth=2,\n", + ")\n", + "\n", + "feature_matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Полученные признаки\n", + "\n", + "Список колонок полученного dataframe'а" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_defs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Отсечение значений признаков" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Определение выбросов с помощью boxplot" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqrElEQVR4nO3dfXRU9YH/8U9IJpMnEkqUSVISyI8ooRqkIiWpAkLzsEg5ZglF+dnf0hbXnpaHhqCUuIrGBaNYHo7IQ92ThdUWVNjIFuQhWdQQaoIQC4VqI9gIKCQ+JgMJmUwm8/vDZbYRVCaZfCcP79c5OcP93u/c+cA5N/Ph3jtzA9xut1sAAACG9PN3AAAA0LdQPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYFeTvAF/W1tams2fPqn///goICPB3HAAAcBXcbrfOnz+vuLg49ev39cc2ul35OHv2rOLj4/0dAwAAdMCZM2c0ePDgr53T7cpH//79JX0RPjIy0s9pAPiS0+lUSUmJMjMzZbFY/B0HgA/Z7XbFx8d73se/TrcrH5dOtURGRlI+gF7G6XQqLCxMkZGRlA+gl7qaSya44BQAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDgBEul0tlZWXav3+/ysrK5HK5/B0JgJ94VT5cLpcefvhhJSYmKjQ0VMOGDdO//uu/yu12e+a43W4tWbJEsbGxCg0NVXp6uk6cOOHz4AB6juLiYiUlJSkjI0MrV65URkaGkpKSVFxc7O9oAPzAq/Lx5JNPav369XrmmWf0zjvv6Mknn9Ty5cu1Zs0az5zly5fr6aef1oYNG3Tw4EGFh4crKytLzc3NPg8PoPsrLi7W9OnTlZKSovLycm3ZskXl5eVKSUnR9OnTKSBAHxTg/vvDFt/ghz/8oWw2m4qKijxjOTk5Cg0N1e9+9zu53W7FxcVp4cKFuv/++yVJDQ0Nstls2rRpk+6+++5vfA273a6oqCg1NDRwbxegh3O5XEpKSlJKSoq2b98ul8ulXbt26Y477lBgYKCys7N1/PhxnThxQoGBgf6OC6ATvHn/9urGct///vf17LPP6t1339X111+vo0eP6sCBA1q5cqUkqaamRrW1tUpPT/c8JyoqSmPHjlVFRcUVy4fD4ZDD4WgXXvriBlROp9ObeAC6mbKyMr3//vt6/vnn5XK5PPv0pccHHnhA48eP12uvvaYJEyb4MyqATvLmPdur8rF48WLZ7XYlJycrMDBQLpdLy5Yt0z333CNJqq2tlSTZbLZ2z7PZbJ51X1ZYWKiCgoLLxktKShQWFuZNPADdzP79+yVJH3zwgT799FPPeGlpqSTp4sWLkqTdu3ersbHRfEAAPtPU1HTVc70qHy+99JJ+//vfa/Pmzbrhhht05MgR5ebmKi4uTrNmzfI6qCTl5+crLy/Ps2y32xUfH6/MzExOuwA9XHh4uFauXKnBgwdr7NixcjqdKi0tVUZGhiwWiyorKyVJkydP5sgH0MNdOnNxNbwqHw888IAWL17sOX2SkpKiU6dOqbCwULNmzVJMTIwkqa6uTrGxsZ7n1dXVadSoUVfcptVqldVqvWzcYrHIYrF4Ew9ANzNx4kQNHTpUy5cv1/bt2z3jFotFgYGBeuqpp5SYmKiJEydyzQfQw3nznu3Vp12amprUr1/7pwQGBqqtrU2SlJiYqJiYGO3bt8+z3m636+DBg0pLS/PmpQD0AoGBgVqxYoV27typ7OxsVVZW6uLFi6qsrFR2drZ27typ3/zmNxQPoI/x6sjH1KlTtWzZMiUkJOiGG27Qn/70J61cuVI/+9nPJEkBAQHKzc3V0qVLdd111ykxMVEPP/yw4uLilJ2d3RX5AXRz06ZN07Zt27Rw4UKNHz/eM56YmKht27Zp2rRpfkwHwB+8+qjt+fPn9fDDD+vll1/WRx99pLi4OM2cOVNLlixRcHCwpC++ZOyRRx7Rs88+q/r6et12221at26drr/++qt6DT5qC/ROLpdLr732mnbv3q3JkydzqgXoZbx5//aqfJhA+QB6L6fT6fmeD67pAnoXb96/ubcLAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAMMLlcqmsrEz79+9XWVmZXC6XvyMB8BPKB4AuV1xcrKSkJGVkZGjlypXKyMhQUlKSiouL/R0NgB9QPgB0qeLiYk2fPl0pKSkqLy/Xli1bVF5erpSUFE2fPp0CAvRBAW632+3vEH/PbrcrKipKDQ0NioyM9HccAJ3gcrmUlJSklJQUbd++XS6XS7t27dIdd9yhwMBAZWdn6/jx4zpx4oQCAwP9HRdAJ3jz/s2RDwBdpry8XO+//74efPBB9evX/tdNv379lJ+fr5qaGpWXl/spIQB/oHwA6DLnzp2TJN14441XXH9p/NI8AH0D5QNAl4mNjZUkHT9+/IrrL41fmgegb6B8AOgy48aN09ChQ/X444+rra2t3bq2tjYVFhYqMTFR48aN81NCAP5A+QDQZQIDA7VixQrt3LlT2dnZqqys1MWLF1VZWans7Gzt3LlTv/nNb7jYFOhjgvwdAEDvNm3aNG3btk0LFy7U+PHjPeOJiYnatm2bpk2b5sd0APyBj9oCMMLlcum1117T7t27NXnyZE2cOJEjHkAv4s37N0c+ABgRGBioCRMmqLGxURMmTKB4AH0Y5QOAES0tLVqzZo1effVVnTx5UvPmzVNwcLC/YwHwAy44BdDlFi1apPDwcN1///3atWuX7r//foWHh2vRokX+jgbADzjyAaBLLVq0SE899ZRsNpsKCgpktVrlcDj0yCOP6KmnnpIkLV++3M8pAZjEkQ8AXaalpUWrVq2SzWbTqVOnNGzYMB07dkzDhg3TqVOnZLPZtGrVKrW0tPg7KgCDKB8Ausy6devU2tqqadOmKTk5WRkZGVq5cqUyMjKUnJysf/zHf1Rra6vWrVvn76gADOK0C4Au895770mS1q9frylTpmjq1Kmqrq7W8OHD9be//U0bNmxoNw9A3+DVkY+hQ4cqICDgsp85c+ZIkpqbmzVnzhxFR0crIiJCOTk5qqur65LgALq/oUOHSpKuvfZa7d27V2vWrFFJSYnWrFmjvXv36tprr203D0Df4FX5OHTokM6dO+f5KS0tlST96Ec/kiQtWLBAO3bs0NatW1VWVqazZ8/y7YVAH5aSkiJJ+vjjjxUdHa0NGzZo48aN2rBhg6Kjo/Xxxx+3mwegb/DqtMul/6Vc8sQTT2jYsGGaMGGCGhoaVFRUpM2bN2vSpEmSpI0bN2rEiBGqrKxUamqq71ID6BH+/shnW1ub2tra5Ha7PX++0jwAvV+Hr/loaWnR7373O+Xl5SkgIEBVVVVyOp1KT0/3zElOTlZCQoIqKiq+snw4HA45HA7Pst1ulyQ5nU45nc6OxgPQDbzxxhuSpDFjxuhPf/qTfvnLX3rWBQUF6ZZbbtHhw4f1xhtv6O677/ZXTAA+4M17dofLx/bt21VfX6+f/OQnkqTa2loFBwdrwIAB7ebZbDbV1tZ+5XYKCwtVUFBw2XhJSYnCwsI6Gg9AN/D+++9L+t//rJSUlKi2tlYxMTHKzMxUYWGhZ96uXbv8mBRAZzU1NV313A6Xj6KiIk2ePFlxcXEd3YQkKT8/X3l5eZ5lu92u+Ph4ZWZmcmM5oIc7efKk9uzZo6NHj+r555/XwoULVVdXJ5vNphUrVujo0aOSpPT0dN1xxx1+TgugMy6dubgaHSofp06d0n//93+ruLjYMxYTE6OWlhbV19e3O/pRV1enmJiYr9yW1WqV1Wq9bNxischisXQkHoBuYt68eVq8eLHCw8N17Ngxz/VgkjRkyBBFRUWpsbFR8+bNY38Hejhv9uEOfcnYxo0bNWjQIE2ZMsUzNnr0aFksFu3bt88zVl1drdOnTystLa0jLwOghwsODtaCBQvU0NAgh8Oh3Nxc3XfffcrNzVVzc7MaGhq0YMECbjAH9DFeH/loa2vTxo0bNWvWLAUF/e/To6KiNHv2bOXl5WngwIGKjIzUvHnzlJaWxiddgD7s0n1bVq1apdWrV3vGg4KC9MADD3BfF6APCnC73W5vnlBSUqKsrCxVV1fr+uuvb7euublZCxcu1JYtW+RwOJSVlaV169Z97WmXL7Pb7YqKilJDQwPXfAC9SEtLi9asWaNXX31VkyZN0rx58zjiAfQi3rx/e10+uhrlA+i9nE6ndu3apTvuuINrPIBexpv3b24sBwAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAMMLlcqmsrEz79+9XWVmZXC6XvyMB8BPKB4AuV1xcrKSkJGVkZGjlypXKyMhQUlJSu1s0AOg7KB8AulRxcbGmT5+ulJQUlZeXa8uWLSovL1dKSoqmT59OAQH6IL5kDECXcblcSkpKUkpKirZv3y6Xy+X5krHAwEBlZ2fr+PHjOnHihAIDA/0dF0An8CVjALqF8vJyvf/++3rwwQflcDg0f/58Pfroo5o/f74cDofy8/NVU1Oj8vJyf0cFYJDXN5YDgKt17tw5SdLSpUv1yiuveMaPHDmiDRs2eO6MfWkegL6B8gGgy8TGxkqSXnnlFQUHBys3N1eJiYmqqanR6tWrPYXk0jwAfQPXfADoMhcuXFD//v0VEBCgpqYmBQYGeq75cLlcCgsLk9vt1vnz5xUREeHvuAA6gWs+AHQLixcvliS53W7NmDFDlZWVunjxoiorKzVjxgxd+r/PpXkA+gbKB4Auc+LECUnSM888o2PHjmn8+PGaOXOmxo8fr+PHj2vNmjXt5gHoGygfALrMddddJ0n64IMPdPLkSZWWliovL0+lpaU6ceKEzpw5024egL6Baz4AdJmLFy8qLCxMwcHBOn/+vAICAjzXfLjdbvXv318tLS1qampSaGiov+MC6ARv3r/5tAuAb9TU1KS//vWvHXruhAkTVFZWpoiICE2fMUNh1wzWi1u3attLL8npdGrChAl65513OpwtOTlZYWFhHX4+APM48gHgG7311lsaPXq0v2NcUVVVlW6++WZ/xwD6PI58APCp5ORkVVVVdWobFy9e1KOPL1fZn6o14bvD9eiDi3xyqiU5ObnT2wBgFkc+ABhz5NSnyl5fqe2/SNWoIdH+jgPAh/ieDwAA0G1RPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRXpePDz/8UD/+8Y8VHR2t0NBQpaSk6PDhw571brdbS5YsUWxsrEJDQ5Wenq4TJ074NDQAAOi5vCofn3/+uW699VZZLBbt3r1bb7/9tlasWKFvfetbnjnLly/X008/rQ0bNujgwYMKDw9XVlaWmpubfR4eAAD0PF7d1fbJJ59UfHy8Nm7c6BlLTEz0/Nntdmv16tV66KGHdOedd0qSnnvuOdlsNm3fvl133323j2IDAICeyqvy8Yc//EFZWVn60Y9+pLKyMn3729/WL3/5S/3zP/+zJKmmpka1tbVKT0/3PCcqKkpjx45VRUXFFcuHw+GQw+HwLNvtdkmS0+mU0+ns0F8KQPfU2trqeWT/BnoXb/Zpr8rH3/72N61fv155eXl68MEHdejQIc2fP1/BwcGaNWuWamtrJUk2m63d82w2m2fdlxUWFqqgoOCy8ZKSEoWFhXkTD0A3d+aCJAWpsrJSHx73dxoAvtTU1HTVc70qH21tbbrlllv0+OOPS5K++93v6vjx49qwYYNmzZrlXcr/kZ+fr7y8PM+y3W5XfHy8MjMzFRkZ2aFtAuiejp7+TDp2WKmpqbopYaC/4wDwoUtnLq6GV+UjNjZW3/nOd9qNjRgxQv/5n/8pSYqJiZEk1dXVKTY21jOnrq5Oo0aNuuI2rVarrFbrZeMWi0UWi8WbeAC6uaCgIM8j+zfQu3izT3v1aZdbb71V1dXV7cbeffddDRkyRNIXF5/GxMRo3759nvV2u10HDx5UWlqaNy8FAAB6Ka+OfCxYsEDf//739fjjj2vGjBl688039eyzz+rZZ5+VJAUEBCg3N1dLly7Vddddp8TERD388MOKi4tTdnZ2V+QHAAA9jFflY8yYMXr55ZeVn5+vxx57TImJiVq9erXuuecez5xFixapsbFR9913n+rr63Xbbbdpz549CgkJ8Xl4AADQ8wS43W63v0P8PbvdrqioKDU0NHDBKdDLHDn1qbLXV2r7L1I1aki0v+MA8CFv3r+5twsAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAo7wqH48++qgCAgLa/SQnJ3vWNzc3a86cOYqOjlZERIRycnJUV1fn89AAAKDn8vrIxw033KBz5855fg4cOOBZt2DBAu3YsUNbt25VWVmZzp49q2nTpvk0MAAA6NmCvH5CUJBiYmIuG29oaFBRUZE2b96sSZMmSZI2btyoESNGqLKyUqmpqZ1PCwAAejyvy8eJEycUFxenkJAQpaWlqbCwUAkJCaqqqpLT6VR6erpnbnJyshISElRRUfGV5cPhcMjhcHiW7Xa7JMnpdMrpdHobD0A31tra6nlk/wZ6F2/2aa/Kx9ixY7Vp0yYNHz5c586dU0FBgcaNG6fjx4+rtrZWwcHBGjBgQLvn2Gw21dbWfuU2CwsLVVBQcNl4SUmJwsLCvIkHoJs7c0GSglRZWakPj/s7DQBfampquuq5XpWPyZMne/48cuRIjR07VkOGDNFLL72k0NBQbzblkZ+fr7y8PM+y3W5XfHy8MjMzFRkZ2aFtAuiejp7+TDp2WKmpqbopYaC/4wDwoUtnLq6G16dd/t6AAQN0/fXX6+TJk8rIyFBLS4vq6+vbHf2oq6u74jUil1itVlmt1svGLRaLLBZLZ+IB6GaCgoI8j+zfQO/izT7dqe/5uHDhgt577z3FxsZq9OjRslgs2rdvn2d9dXW1Tp8+rbS0tM68DAAA6EW8OvJx//33a+rUqRoyZIjOnj2rRx55RIGBgZo5c6aioqI0e/Zs5eXlaeDAgYqMjNS8efOUlpbGJ10AAICHV+Xjgw8+0MyZM/Xpp5/q2muv1W233abKykpde+21kqRVq1apX79+ysnJkcPhUFZWltatW9clwQEAQM/kVfl44YUXvnZ9SEiI1q5dq7Vr13YqFAAA6L24twsAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAozpVPp544gkFBAQoNzfXM9bc3Kw5c+YoOjpaERERysnJUV1dXWdzAgCAXqLD5ePQoUP67W9/q5EjR7YbX7BggXbs2KGtW7eqrKxMZ8+e1bRp0zodFAAA9A4dKh8XLlzQPffco3/7t3/Tt771Lc94Q0ODioqKtHLlSk2aNEmjR4/Wxo0b9cYbb6iystJnoQEAQM8V1JEnzZkzR1OmTFF6erqWLl3qGa+qqpLT6VR6erpnLDk5WQkJCaqoqFBqaupl23I4HHI4HJ5lu90uSXI6nXI6nR2JB6Cbam1t9TyyfwO9izf7tNfl44UXXtBbb72lQ4cOXbautrZWwcHBGjBgQLtxm82m2traK26vsLBQBQUFl42XlJQoLCzM23gAurEzFyQpSJWVlfrwuL/TAPClpqamq57rVfk4c+aMfvWrX6m0tFQhISFeB7uS/Px85eXleZbtdrvi4+OVmZmpyMhIn7wGgO7h6OnPpGOHlZqaqpsSBvo7DgAfunTm4mp4VT6qqqr00Ucf6eabb/aMuVwu7d+/X88884z27t2rlpYW1dfXtzv6UVdXp5iYmCtu02q1ymq1XjZusVhksVi8iQegmwsKCvI8sn8DvYs3+7RX5eMHP/iBjh071m7spz/9qZKTk/XrX/9a8fHxslgs2rdvn3JyciRJ1dXVOn36tNLS0rx5KQAA0Et5VT769++vG2+8sd1YeHi4oqOjPeOzZ89WXl6eBg4cqMjISM2bN09paWlXvNgUAAD0PR36tMvXWbVqlfr166ecnBw5HA5lZWVp3bp1vn4ZAADQQ3W6fLz++uvtlkNCQrR27VqtXbu2s5sGAAC9EPd2AQAARvn8tAuA7qPmk0Y1Olr9HcPjvY8bPY+XPvnSXYRbg5R4Tbi/YwB9Qvfa+wH4TM0njZr4m9f9HeOKFm479s2T/OC1+2+ngAAGUD6AXurSEY/Vd41S0qAIP6f5QuNFh3a+XqEf3p6m8NDLv9/HX05+dEG5Lx7pVkeJgN6M8gH0ckmDInTjt6P8HUPSF/d+qL1WunnIt/iSMaAP44JTAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAY5VX5WL9+vUaOHKnIyEhFRkYqLS1Nu3fv9qxvbm7WnDlzFB0drYiICOXk5Kiurs7noQEAQM/lVfkYPHiwnnjiCVVVVenw4cOaNGmS7rzzTv3lL3+RJC1YsEA7duzQ1q1bVVZWprNnz2ratGldEhwAAPRMQd5Mnjp1arvlZcuWaf369aqsrNTgwYNVVFSkzZs3a9KkSZKkjRs3asSIEaqsrFRqaqrvUgMAgB6rw9d8uFwuvfDCC2psbFRaWpqqqqrkdDqVnp7umZOcnKyEhARVVFT4JCwAAOj5vDryIUnHjh1TWlqampubFRERoZdfflnf+c53dOTIEQUHB2vAgAHt5ttsNtXW1n7l9hwOhxwOh2fZbrdLkpxOp5xOp7fxAPyP1tZWz2N32Zcu5egueS7pjv9WQE/jzb7jdfkYPny4jhw5ooaGBm3btk2zZs1SWVmZt5vxKCwsVEFBwWXjJSUlCgsL6/B2gb7uzAVJCtKBAwd0KsLfadorLS31d4R2uvO/FdBTNDU1XfXcALfb7e7Mi6Wnp2vYsGG666679IMf/ECff/55u6MfQ4YMUW5urhYsWHDF51/pyEd8fLw++eQTRUZGdiYa0Kf95axd2esrtf0XqbohrnvsS06nU6WlpcrIyJDFYvF3HI/u+G8F9DR2u13XXHONGhoavvH92+sjH1/W1tYmh8Oh0aNHy2KxaN++fcrJyZEkVVdX6/Tp00pLS/vK51utVlmt1svGLRZLt/rlBPQ0QUFBnsfuti91t/27O/9bAT2FN/uOV+UjPz9fkydPVkJCgs6fP6/Nmzfr9ddf1969exUVFaXZs2crLy9PAwcOVGRkpObNm6e0tDQ+6QIAADy8Kh8fffSR/umf/knnzp1TVFSURo4cqb179yojI0OStGrVKvXr1085OTlyOBzKysrSunXruiQ4AADombwqH0VFRV+7PiQkRGvXrtXatWs7FQoAAPRe3NsFAAAY1ekLTgF0XwFBdtXYq9UvpHt8frS1tVVnW8/qnc/e8Vzk2R3U2C8oIMju7xhAn9F99n4APmcZcFAPvvm4v2NcZt2e7nctmGXADyTd4e8YQJ9A+QB6MWf9WK2Y8n81bFD3OfLxxwN/1K233dqtjny899EFzf/9e/6OAfQZ3WfvB+Bz7tZIJUYO13eio/wdRdIXXzJWE1SjEQNHdKvv02hrbpC79WN/xwD6DC44BQAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABglFflo7CwUGPGjFH//v01aNAgZWdnq7q6ut2c5uZmzZkzR9HR0YqIiFBOTo7q6up8GhoAAPRcXpWPsrIyzZkzR5WVlSotLZXT6VRmZqYaGxs9cxYsWKAdO3Zo69atKisr09mzZzVt2jSfBwcAAD1TkDeT9+zZ025506ZNGjRokKqqqjR+/Hg1NDSoqKhImzdv1qRJkyRJGzdu1IgRI1RZWanU1FTfJQfwtS46XZKk4x82+DnJ/2q86NDhj6WYU58rPNTq7zgeJz+64O8IQJ/iVfn4soaGL36pDRw4UJJUVVUlp9Op9PR0z5zk5GQlJCSooqLiiuXD4XDI4XB4lu12uyTJ6XTK6XR2Jh7Qp7177ov9c3HxMT8n+bIgPX/ykL9DXJE10M3vHaCDvNl3Olw+2tralJubq1tvvVU33nijJKm2tlbBwcEaMGBAu7k2m021tbVX3E5hYaEKCgouGy8pKVFYWFhH4wFwSnf/nwANCnUruJtcWl53UXr+ZJD+X1KrbKH+TtOeNVB6+2CZ3vZ3EKCHampquuq5HS4fc+bM0fHjx3XgwIGObkKSlJ+fr7y8PM+y3W5XfHy8MjMzFRkZ2altA33dDH8H+JKjpz/T8ycP686JqbopYaC/4wDwoUtnLq5Gh8rH3LlztXPnTu3fv1+DBw/2jMfExKilpUX19fXtjn7U1dUpJibmituyWq2yWi8/92uxWGSxWDoSD0A3FRQU5Hlk/wZ6F2/2aa8Oxrrdbs2dO1cvv/yyXn31VSUmJrZbP3r0aFksFu3bt88zVl1drdOnTystLc2blwIAAL2UV0c+5syZo82bN+u//uu/1L9/f891HFFRUQoNDVVUVJRmz56tvLw8DRw4UJGRkZo3b57S0tL4pAsAAJDkZflYv369JOn2229vN75x40b95Cc/kSStWrVK/fr1U05OjhwOh7KysrRu3TqfhAUAAD2fV+XD7XZ/45yQkBCtXbtWa9eu7XAoAADQe3WTD+ABAIC+gvIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKMoHwAAwCjKBwAAMIryAQAAjKJ8AAAAoygfAADAKMoHAAAwivIBAACMonwAAACjKB8AAMAoygcAADCK8gEAAIyifAAAAKO8Lh/79+/X1KlTFRcXp4CAAG3fvr3derfbrSVLlig2NlahoaFKT0/XiRMnfJUXAAD0cF6Xj8bGRt10001au3btFdcvX75cTz/9tDZs2KCDBw8qPDxcWVlZam5u7nRYAADQ8wV5+4TJkydr8uTJV1zndru1evVqPfTQQ7rzzjslSc8995xsNpu2b9+uu+++u3NpAQBAj+d1+fg6NTU1qq2tVXp6umcsKipKY8eOVUVFxRXLh8PhkMPh8Czb7XZJktPplNPp9GU8AH7W2trqeWT/BnoXb/Zpn5aP2tpaSZLNZms3brPZPOu+rLCwUAUFBZeNl5SUKCwszJfxAPjZmQuSFKTKykp9eNzfaQD4UlNT01XP9Wn56Ij8/Hzl5eV5lu12u+Lj45WZmanIyEg/JgPga0dPfyYdO6zU1FTdlDDQ33EA+NClMxdXw6flIyYmRpJUV1en2NhYz3hdXZ1GjRp1xedYrVZZrdbLxi0WiywWiy/jAfCzoKAgzyP7N9C7eLNP+/R7PhITExUTE6N9+/Z5xux2uw4ePKi0tDRfvhQAAOihvD7yceHCBZ08edKzXFNToyNHjmjgwIFKSEhQbm6uli5dquuuu06JiYl6+OGHFRcXp+zsbF/mBgAAPZTX5ePw4cOaOHGiZ/nS9RqzZs3Spk2btGjRIjU2Nuq+++5TfX29brvtNu3Zs0chISG+Sw0AAHosr8vH7bffLrfb/ZXrAwIC9Nhjj+mxxx7rVDAAANA7cW8XAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABGUT4AAIBRlA8AAGAU5QMAABhF+QAAAEZ1WflYu3athg4dqpCQEI0dO1ZvvvlmV70UAADoQYK6YqMvvvii8vLytGHDBo0dO1arV69WVlaWqqurNWjQoK54SQBdqKmpSX/96187vZ3qc/Vy1J7UO8dD1fbpgM4Hk5ScnKywsDCfbAuAGQFut9vt642OHTtWY8aM0TPPPCNJamtrU3x8vObNm6fFixd/7XPtdruioqLU0NCgyMhIX0cD0AFvvfWWRo8e7e8YV1RVVaWbb77Z3zGAPs+b92+fH/loaWlRVVWV8vPzPWP9+vVTenq6KioqLpvvcDjkcDg8y3a7XZLkdDrldDp9HQ9ABwwbNkwHDx7s9HYuXHRob/khZY0bo4hQqw+SfZGN3xWA/3mzH/q8fHzyySdyuVyy2Wztxm022xUP2xYWFqqgoOCy8ZKSEg6lAr3Q91OG6Xz9Zzpf75vtnTt3zjcbAtApTU1NVz23S6758EZ+fr7y8vI8y3a7XfHx8crMzOS0C9DLOJ1OlZaWKiMjQxaLxd9xAPjQpTMXV8Pn5eOaa65RYGCg6urq2o3X1dUpJibmsvlWq1VW6+WHXy0WC7+cgF6K/RvofbzZp33+Udvg4GCNHj1a+/bt84y1tbVp3759SktL8/XLAQCAHqZLTrvk5eVp1qxZuuWWW/S9731Pq1evVmNjo3760592xcsBAIAepEvKx1133aWPP/5YS5YsUW1trUaNGqU9e/ZcdhEqAADoe7rsgtO5c+dq7ty5XbV5AADQQ3FvFwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABG+f3Gcl/mdrsleXeDGgA9g9PpVFNTk+x2O/d2AXqZS+/bl97Hv063Kx/nz5+XJMXHx/s5CQAA8Nb58+cVFRX1tXMC3FdTUQxqa2vT2bNn1b9/fwUEBPg7DgAfstvtio+P15kzZxQZGenvOAB8yO126/z584qLi1O/fl9/VUe3Kx8Aei+73a6oqCg1NDRQPoA+jAtOAQCAUZQPAABgFOUDgDFWq1WPPPKIrFarv6MA8COu+QAAAEZx5AMAABhF+QAAAEZRPgAAgFGUDwAAYBTlA4BPVFRUKDAwUFOmTPF3FADdHJ92AeAT9957ryIiIlRUVKTq6mrFxcX5OxKAboojHwA67cKFC3rxxRf1i1/8QlOmTNGmTZvarf/DH/6g6667TiEhIZo4caL+4z/+QwEBAaqvr/fMOXDggMaNG6fQ0FDFx8dr/vz5amxsNPsXAWAE5QNAp7300ktKTk7W8OHD9eMf/1j//u//7rmtdk1NjaZPn67s7GwdPXpUP//5z/Uv//Iv7Z7/3nvv6R/+4R+Uk5OjP//5z3rxxRd14MABzZ071x9/HQBdjNMuADrt1ltv1YwZM/SrX/1Kra2tio2N1datW3X77bdr8eLFeuWVV3Ts2DHP/IceekjLli3T559/rgEDBujee+9VYGCgfvvb33rmHDhwQBMmTFBjY6NCQkL88dcC0EU48gGgU6qrq/Xmm29q5syZkqSgoCDdddddKioq8qwfM2ZMu+d873vfa7d89OhRbdq0SREREZ6frKwstbW1qaamxsxfBIAxQf4OAKBnKyoqUmtra7sLTN1ut6xWq5555pmr2saFCxf085//XPPnz79sXUJCgs+yAugeKB8AOqy1tVXPPfecVqxYoczMzHbrsrOztWXLFg0fPly7du1qt+7QoUPtlm+++Wa9/fbbSkpK6vLMAPyPaz4AdNj27dt111136aOPPlJUVFS7db/+9a/16quv6qWXXtLw4cO1YMECzZ49W0eOHNHChQv1wQcfqL6+XlFRUfrzn/+s1NRU/exnP9O9996r8PBwvf322yotLb3qoycAeg6u+QDQYUVFRUpPT7+seEhSTk6ODh8+rPPnz2vbtm0qLi7WyJEjtX79es+nXaxWqyRp5MiRKisr07vvvqtx48bpu9/9rpYsWcJ3hQC9FEc+ABi3bNkybdiwQWfOnPF3FAB+wDUfALrcunXrNGbMGEVHR+uPf/yjnnrqKb7DA+jDKB8AutyJEye0dOlSffbZZ0pISNDChQuVn5/v71gA/ITTLgAAwCguOAUAAEZRPgAAgFGUDwAAYBTlAwAAGEX5AAAARlE+AACAUZQPAABgFOUDAAAYRfkAAABG/X9Yms5FnRz1tgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "titanic.boxplot(column=\"Age\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Отсечение данных для признака Возраст, значение которых больше 65 лет" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeAgeClip
34Wheadon, Mr. Edward H66.065.0
97Goldschmidt, Mr. George B71.065.0
117Connors, Mr. Patrick70.565.0
494Artagaveytia, Mr. Ramon71.065.0
631Barkworth, Mr. Algernon Henry Wilson80.065.0
673Mitchell, Mr. Henry Michael70.065.0
746Crosby, Capt. Edward Gifford70.065.0
852Svensson, Mr. Johan74.065.0
\n", + "
" + ], + "text/plain": [ + " Name Age AgeClip\n", + "34 Wheadon, Mr. Edward H 66.0 65.0\n", + "97 Goldschmidt, Mr. George B 71.0 65.0\n", + "117 Connors, Mr. Patrick 70.5 65.0\n", + "494 Artagaveytia, Mr. Ramon 71.0 65.0\n", + "631 Barkworth, Mr. Algernon Henry Wilson 80.0 65.0\n", + "673 Mitchell, Mr. Henry Michael 70.0 65.0\n", + "746 Crosby, Capt. Edward Gifford 70.0 65.0\n", + "852 Svensson, Mr. Johan 74.0 65.0" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic_norm = titanic.copy()\n", + "\n", + "titanic_norm[\"AgeClip\"] = titanic[\"Age\"].clip(0, 65);\n", + "\n", + "titanic_norm[titanic_norm[\"Age\"] > 65][[\"Name\", \"Age\", \"AgeClip\"]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Винсоризация признака Возраст" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "56.0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeAgeWinsorize
34Wheadon, Mr. Edward H66.054.0
97Goldschmidt, Mr. George B71.054.0
117Connors, Mr. Patrick70.554.0
494Artagaveytia, Mr. Ramon71.054.0
631Barkworth, Mr. Algernon Henry Wilson80.054.0
673Mitchell, Mr. Henry Michael70.054.0
746Crosby, Capt. Edward Gifford70.054.0
852Svensson, Mr. Johan74.054.0
\n", + "
" + ], + "text/plain": [ + " Name Age AgeWinsorize\n", + "34 Wheadon, Mr. Edward H 66.0 54.0\n", + "97 Goldschmidt, Mr. George B 71.0 54.0\n", + "117 Connors, Mr. Patrick 70.5 54.0\n", + "494 Artagaveytia, Mr. Ramon 71.0 54.0\n", + "631 Barkworth, Mr. Algernon Henry Wilson 80.0 54.0\n", + "673 Mitchell, Mr. Henry Michael 70.0 54.0\n", + "746 Crosby, Capt. Edward Gifford 70.0 54.0\n", + "852 Svensson, Mr. Johan 74.0 54.0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats.mstats import winsorize\n", + "\n", + "print(titanic_norm[\"Age\"].quantile(q=0.95))\n", + "\n", + "titanic_norm[\"AgeWinsorize\"] = winsorize(\n", + " titanic_norm[\"Age\"].fillna(titanic_norm[\"Age\"].mean()), (0, 0.05), inplace=False\n", + ")\n", + "\n", + "titanic_norm[titanic_norm[\"Age\"] > 65][[\"Name\", \"Age\", \"AgeWinsorize\"]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Нормализация значений" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeAgeNormAgeClipNormAgeWinsorizeNormAgeWinsorizeNorm2
1Braund, Mr. Owen Harris22.00.2711740.3341590.402762-0.194476
2Cumings, Mrs. John Bradley (Florence Briggs Th...38.00.4722290.5819140.7013810.402762
3Heikkinen, Miss. Laina26.00.3214380.3960980.477417-0.045166
4Futrelle, Mrs. Jacques Heath (Lily May Peel)35.00.4345310.5354600.6453900.290780
5Allen, Mr. William Henry35.00.4345310.5354600.6453900.290780
6Moran, Mr. JamesNaNNaNNaN0.5464560.092912
7McCarthy, Mr. Timothy J54.00.6732850.8296691.0000001.000000
8Palsson, Master. Gosta Leonard2.00.0198540.0244660.029489-0.941023
9Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)27.00.3340040.4115830.496081-0.007839
10Nasser, Mrs. Nicholas (Adele Achem)14.00.1706460.2102820.253453-0.493094
11Sandstrom, Miss. Marguerite Rut4.00.0449860.0554350.066816-0.866368
12Bonnell, Miss. Elizabeth58.00.7235490.8916071.0000001.000000
13Saundercock, Mr. William Henry20.00.2460420.3031900.365435-0.269130
14Andersson, Mr. Anders Johan39.00.4847950.5973990.7200450.440090
15Vestrom, Miss. Hulda Amanda Adolfina14.00.1706460.2102820.253453-0.493094
16Hewlett, Mrs. (Mary D Kingcome)55.00.6858510.8451531.0000001.000000
17Rice, Master. Eugene2.00.0198540.0244660.029489-0.941023
18Williams, Mr. Charles EugeneNaNNaNNaN0.5464560.092912
19Vander Planke, Mrs. Julius (Emelia Maria Vande...31.00.3842670.4735210.5707350.141471
20Masselmani, Mrs. FatimaNaNNaNNaN0.5464560.092912
\n", + "
" + ], + "text/plain": [ + " Name Age AgeNorm \\\n", + "1 Braund, Mr. Owen Harris 22.0 0.271174 \n", + "2 Cumings, Mrs. John Bradley (Florence Briggs Th... 38.0 0.472229 \n", + "3 Heikkinen, Miss. Laina 26.0 0.321438 \n", + "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) 35.0 0.434531 \n", + "5 Allen, Mr. William Henry 35.0 0.434531 \n", + "6 Moran, Mr. James NaN NaN \n", + "7 McCarthy, Mr. Timothy J 54.0 0.673285 \n", + "8 Palsson, Master. Gosta Leonard 2.0 0.019854 \n", + "9 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) 27.0 0.334004 \n", + "10 Nasser, Mrs. Nicholas (Adele Achem) 14.0 0.170646 \n", + "11 Sandstrom, Miss. Marguerite Rut 4.0 0.044986 \n", + "12 Bonnell, Miss. Elizabeth 58.0 0.723549 \n", + "13 Saundercock, Mr. William Henry 20.0 0.246042 \n", + "14 Andersson, Mr. Anders Johan 39.0 0.484795 \n", + "15 Vestrom, Miss. Hulda Amanda Adolfina 14.0 0.170646 \n", + "16 Hewlett, Mrs. (Mary D Kingcome) 55.0 0.685851 \n", + "17 Rice, Master. Eugene 2.0 0.019854 \n", + "18 Williams, Mr. Charles Eugene NaN NaN \n", + "19 Vander Planke, Mrs. Julius (Emelia Maria Vande... 31.0 0.384267 \n", + "20 Masselmani, Mrs. Fatima NaN NaN \n", + "\n", + " AgeClipNorm AgeWinsorizeNorm AgeWinsorizeNorm2 \n", + "1 0.334159 0.402762 -0.194476 \n", + "2 0.581914 0.701381 0.402762 \n", + "3 0.396098 0.477417 -0.045166 \n", + "4 0.535460 0.645390 0.290780 \n", + "5 0.535460 0.645390 0.290780 \n", + "6 NaN 0.546456 0.092912 \n", + "7 0.829669 1.000000 1.000000 \n", + "8 0.024466 0.029489 -0.941023 \n", + "9 0.411583 0.496081 -0.007839 \n", + "10 0.210282 0.253453 -0.493094 \n", + "11 0.055435 0.066816 -0.866368 \n", + "12 0.891607 1.000000 1.000000 \n", + "13 0.303190 0.365435 -0.269130 \n", + "14 0.597399 0.720045 0.440090 \n", + "15 0.210282 0.253453 -0.493094 \n", + "16 0.845153 1.000000 1.000000 \n", + "17 0.024466 0.029489 -0.941023 \n", + "18 NaN 0.546456 0.092912 \n", + "19 0.473521 0.570735 0.141471 \n", + "20 NaN 0.546456 0.092912 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn import preprocessing\n", + "\n", + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "\n", + "min_max_scaler_2 = preprocessing.MinMaxScaler(feature_range=(-1, 1))\n", + "\n", + "titanic_norm[\"AgeNorm\"] = min_max_scaler.fit_transform(\n", + " titanic_norm[\"Age\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\"AgeClipNorm\"] = min_max_scaler.fit_transform(\n", + " titanic_norm[\"AgeClip\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\"AgeWinsorizeNorm\"] = min_max_scaler.fit_transform(\n", + " titanic_norm[\"AgeWinsorize\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\"AgeWinsorizeNorm2\"] = min_max_scaler_2.fit_transform(\n", + " titanic_norm[\"AgeWinsorize\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\n", + " [\"Name\", \"Age\", \"AgeNorm\", \"AgeClipNorm\", \"AgeWinsorizeNorm\", \"AgeWinsorizeNorm2\"]\n", + "].head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Стандартизация значений" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeAgeStandAgeClipStandAgeWinsorizeStand
1Braund, Mr. Owen Harris22.0-0.530377-0.532745-0.606602
2Cumings, Mrs. John Bradley (Florence Briggs Th...38.00.5718310.5850600.718863
3Heikkinen, Miss. Laina26.0-0.254825-0.253294-0.275236
4Futrelle, Mrs. Jacques Heath (Lily May Peel)35.00.3651670.3754720.470339
5Allen, Mr. William Henry35.00.3651670.3754720.470339
6Moran, Mr. JamesNaNNaNNaN0.031205
7McCarthy, Mr. Timothy J54.01.6740391.7028662.044329
8Palsson, Master. Gosta Leonard2.0-1.908136-1.930003-2.263435
9Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)27.0-0.185937-0.183431-0.192394
10Nasser, Mrs. Nicholas (Adele Achem)14.0-1.081480-1.091648-1.269335
11Sandstrom, Miss. Marguerite Rut4.0-1.770360-1.790277-2.097751
12Bonnell, Miss. Elizabeth58.01.9495911.9823172.044329
13Saundercock, Mr. William Henry20.0-0.668153-0.672471-0.772286
14Andersson, Mr. Anders Johan39.00.6407190.6549230.801705
15Vestrom, Miss. Hulda Amanda Adolfina14.0-1.081480-1.091648-1.269335
16Hewlett, Mrs. (Mary D Kingcome)55.01.7429271.7727292.044329
17Rice, Master. Eugene2.0-1.908136-1.930003-2.263435
18Williams, Mr. Charles EugeneNaNNaNNaN0.031205
19Vander Planke, Mrs. Julius (Emelia Maria Vande...31.00.0896150.0960200.138972
20Masselmani, Mrs. FatimaNaNNaNNaN0.031205
\n", + "
" + ], + "text/plain": [ + " Name Age AgeStand \\\n", + "1 Braund, Mr. Owen Harris 22.0 -0.530377 \n", + "2 Cumings, Mrs. John Bradley (Florence Briggs Th... 38.0 0.571831 \n", + "3 Heikkinen, Miss. Laina 26.0 -0.254825 \n", + "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) 35.0 0.365167 \n", + "5 Allen, Mr. William Henry 35.0 0.365167 \n", + "6 Moran, Mr. James NaN NaN \n", + "7 McCarthy, Mr. Timothy J 54.0 1.674039 \n", + "8 Palsson, Master. Gosta Leonard 2.0 -1.908136 \n", + "9 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) 27.0 -0.185937 \n", + "10 Nasser, Mrs. Nicholas (Adele Achem) 14.0 -1.081480 \n", + "11 Sandstrom, Miss. Marguerite Rut 4.0 -1.770360 \n", + "12 Bonnell, Miss. Elizabeth 58.0 1.949591 \n", + "13 Saundercock, Mr. William Henry 20.0 -0.668153 \n", + "14 Andersson, Mr. Anders Johan 39.0 0.640719 \n", + "15 Vestrom, Miss. Hulda Amanda Adolfina 14.0 -1.081480 \n", + "16 Hewlett, Mrs. (Mary D Kingcome) 55.0 1.742927 \n", + "17 Rice, Master. Eugene 2.0 -1.908136 \n", + "18 Williams, Mr. Charles Eugene NaN NaN \n", + "19 Vander Planke, Mrs. Julius (Emelia Maria Vande... 31.0 0.089615 \n", + "20 Masselmani, Mrs. Fatima NaN NaN \n", + "\n", + " AgeClipStand AgeWinsorizeStand \n", + "1 -0.532745 -0.606602 \n", + "2 0.585060 0.718863 \n", + "3 -0.253294 -0.275236 \n", + "4 0.375472 0.470339 \n", + "5 0.375472 0.470339 \n", + "6 NaN 0.031205 \n", + "7 1.702866 2.044329 \n", + "8 -1.930003 -2.263435 \n", + "9 -0.183431 -0.192394 \n", + "10 -1.091648 -1.269335 \n", + "11 -1.790277 -2.097751 \n", + "12 1.982317 2.044329 \n", + "13 -0.672471 -0.772286 \n", + "14 0.654923 0.801705 \n", + "15 -1.091648 -1.269335 \n", + "16 1.772729 2.044329 \n", + "17 -1.930003 -2.263435 \n", + "18 NaN 0.031205 \n", + "19 0.096020 0.138972 \n", + "20 NaN 0.031205 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn import preprocessing\n", + "\n", + "stndart_scaler = preprocessing.StandardScaler()\n", + "\n", + "titanic_norm[\"AgeStand\"] = stndart_scaler.fit_transform(\n", + " titanic_norm[\"Age\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\"AgeClipStand\"] = stndart_scaler.fit_transform(\n", + " titanic_norm[\"AgeClip\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[\"AgeWinsorizeStand\"] = stndart_scaler.fit_transform(\n", + " titanic_norm[\"AgeWinsorize\"].to_numpy().reshape(-1, 1)\n", + ").reshape(titanic_norm[\"Age\"].shape)\n", + "\n", + "titanic_norm[[\"Name\", \"Age\", \"AgeStand\", \"AgeClipStand\", \"AgeWinsorizeStand\"]].head(20)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/poetry.lock b/poetry.lock index 4c0f1c5..7ae692c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -408,6 +408,17 @@ files = [ {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"}, ] +[[package]] +name = "cloudpickle" +version = "3.1.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"}, + {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -609,6 +620,41 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] +[[package]] +name = "featuretools" +version = "1.31.0" +description = "a framework for automated feature engineering" +optional = false +python-versions = "<4,>=3.9" +files = [ + {file = "featuretools-1.31.0-py3-none-any.whl", hash = "sha256:87c94e9ae959c89acd83da96bd2583f3ef0f6daaa9639cbb6e46dbde2c742a18"}, + {file = "featuretools-1.31.0.tar.gz", hash = "sha256:01bfb17fcc1715b4c3623c7bc94a8982122c4a0fa03350ed478601bb81f90155"}, +] + +[package.dependencies] +cloudpickle = ">=1.5.0" +holidays = ">=0.17" +numpy = ">=1.25.0" +packaging = ">=20.0" +pandas = ">=2.0.0" +psutil = ">=5.7.0" +scipy = ">=1.10.0" +tqdm = ">=4.66.3" +woodwork = ">=0.28.0" + +[package.extras] +autonormalize = ["autonormalize (>=2.0.1)"] +complete = ["featuretools[dask,nlp,premium]"] +dask = ["dask[dataframe] (>=2023.2.0)", "distributed (>=2023.2.0)"] +dev = ["black[jupyter] (>=23.1.0)", "featuretools[dask,docs,test]", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)"] +docs = ["Sphinx (==5.1.1)", "autonormalize (>=2.0.1)", "click (>=7.0.0)", "featuretools[dask,test]", "ipython (==8.4.0)", "jupyter (==1.0.0)", "jupyter-client (>=8.0.2)", "matplotlib (==3.7.2)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"] +nlp = ["nlp-primitives (>=2.12.0)"] +premium = ["premium-primitives (>=0.0.3)"] +sklearn = ["featuretools-sklearn-transformer (>=1.0.0)"] +sql = ["featuretools-sql (>=0.0.1)", "psycopg2-binary (>=2.9.3)"] +test = ["boto3 (>=1.34.32)", "composeml (>=0.8.0)", "graphviz (>=0.8.4)", "moto[all] (>=5.0.0)", "pip (>=23.3.0)", "pyarrow (>=14.0.1)", "pympler (>=0.8)", "pytest (>=7.1.2)", "pytest-cov (>=3.0.0)", "pytest-timeout (>=2.1.0)", "pytest-xdist (>=2.5.0)", "smart-open (>=5.0.0)", "urllib3 (>=1.26.18)"] +tsfresh = ["featuretools-tsfresh-primitives (>=1.0.0)"] + [[package]] name = "fonttools" version = "4.55.1" @@ -704,6 +750,20 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "holidays" +version = "0.62" +description = "Open World Holidays Framework" +optional = false +python-versions = ">=3.9" +files = [ + {file = "holidays-0.62-py3-none-any.whl", hash = "sha256:4db5019092279716276a9fdaa65d4edd066257a6b8caecbfde7e4af520b349f2"}, + {file = "holidays-0.62.tar.gz", hash = "sha256:85020562b176f19bb83779d0aa9926ea1dd7fe00568ec119d6e8c907afbdc22c"}, +] + +[package.dependencies] +python-dateutil = "*" + [[package]] name = "httpcore" version = "1.0.7" @@ -787,6 +847,25 @@ examples = ["keras (>=2.4.3)", "matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "seab optional = ["keras (>=2.4.3)", "pandas (>=1.0.5)", "tensorflow (>=2.4.3)"] tests = ["black (>=23.3.0)", "flake8 (>=3.8.2)", "keras (>=2.4.3)", "mypy (>=1.3.0)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "tensorflow (>=2.4.3)"] +[[package]] +name = "importlib-resources" +version = "6.4.5" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"}, + {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"] +type = ["pytest-mypy"] + [[package]] name = "ipykernel" version = "6.29.5" @@ -2801,6 +2880,27 @@ files = [ {file = "tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b"}, ] +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "traitlets" version = "5.14.3" @@ -2929,7 +3029,33 @@ files = [ {file = "widgetsnbextension-4.0.13.tar.gz", hash = "sha256:ffcb67bc9febd10234a362795f643927f4e0c05d9342c727b65d2384f8feacb6"}, ] +[[package]] +name = "woodwork" +version = "0.31.0" +description = "a data typing library for machine learning" +optional = false +python-versions = "<4,>=3.9" +files = [ + {file = "woodwork-0.31.0-py3-none-any.whl", hash = "sha256:5cb3370553b5f466f8c8599b1bf559584dc0b798cc1f2da26bbd7029d256c6f9"}, + {file = "woodwork-0.31.0.tar.gz", hash = "sha256:6ef82af1d5b6525b02efe6417c574c810cfdcc606cb266bd0d7fb17a1d066b67"}, +] + +[package.dependencies] +importlib-resources = ">=5.10.0" +numpy = ">=1.25.0" +pandas = ">=2.0.0" +python-dateutil = ">=2.8.2" +scikit-learn = ">=1.1.0" +scipy = ">=1.10.0" + +[package.extras] +complete = ["woodwork[updater]"] +dev = ["click (>=8.1.7)", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)", "woodwork[docs,test]"] +docs = ["Sphinx (==5.1.1)", "ipython (==8.4.0)", "jupyter (==1.0.0)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pyarrow (>=14.0.1)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"] +test = ["boto3 (>=1.34.32)", "moto[all] (>=5.0.0)", "pyarrow (>=14.0.1)", "pytest (>=7.0.1)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=2.1.0)", "smart-open (>=5.0.0)"] +updater = ["alteryx-open-src-update-checker (>=3.1.0)"] + [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "d9de29d5a54172d74c1c4d32cc992d85f9ada806a82846ab228dca34419bba41" +content-hash = "14251a2aa051d0453baa081f3c5967a8fc2d57d32f379f3b899973001543c094" diff --git a/pyproject.toml b/pyproject.toml index 012070f..d84d8aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ numpy = "^2.1.0" pandas = "^2.2.2" matplotlib = "^3.9.2" imbalanced-learn = "^0.12.3" +featuretools = "^1.31.0" [build-system]