Created
September 7, 2015 12:48
-
-
Save kuk/b36ead1569f8b1635da3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# target_test.csv и target_train.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%run -n main.py\n", | |
"target = list(read_target(TARGET_TRAIN))\n", | |
"test = list(read_target(TARGET_TEST))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[TargetRecord(month=Timestamp('2014-11-01 00:00:00'), id='pLEqn20t4hK6E1U0fG80TPkusHY=OheXhIX3Negh/F4rqkwt5YiAWvo=', churn=False),\n", | |
" TargetRecord(month=Timestamp('2014-11-01 00:00:00'), id='o5UNJ6bfcvvfZVkr7I5lP0BxnwQ=b1EBM7qDYX5IE8uFfxT+t35GvRg=', churn=False),\n", | |
" TargetRecord(month=Timestamp('2014-11-01 00:00:00'), id='tGerzXbWLZzLQbrKND7aHUviznI=wD4yb2JW15NMTvVKMszthyXnAUg=', churn=False),\n", | |
" TargetRecord(month=Timestamp('2014-11-01 00:00:00'), id='GLiZukGscMDsZ9gXWT3Fcgn74+w=K+iMpCQsduglOsYkdIUQZQMtaDM=', churn=False),\n", | |
" TargetRecord(month=Timestamp('2014-11-01 00:00:00'), id='vdQBJ41tLktl4bAhgmhaFwdK6yI=z8qIYGby7AvDPmhfQuuSyfmQUoY=', churn=False)]" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"target[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[TargetRecord(month=Timestamp('2015-02-01 00:00:00'), id='pLEqn20t4hK6E1U0fG80TPkusHY=OheXhIX3Negh/F4rqkwt5YiAWvo=', churn=None),\n", | |
" TargetRecord(month=Timestamp('2015-02-01 00:00:00'), id='o5UNJ6bfcvvfZVkr7I5lP0BxnwQ=b1EBM7qDYX5IE8uFfxT+t35GvRg=', churn=None),\n", | |
" TargetRecord(month=Timestamp('2015-02-01 00:00:00'), id='tGerzXbWLZzLQbrKND7aHUviznI=wD4yb2JW15NMTvVKMszthyXnAUg=', churn=None),\n", | |
" TargetRecord(month=Timestamp('2015-02-01 00:00:00'), id='GLiZukGscMDsZ9gXWT3Fcgn74+w=K+iMpCQsduglOsYkdIUQZQMtaDM=', churn=None),\n", | |
" TargetRecord(month=Timestamp('2015-02-01 00:00:00'), id='vdQBJ41tLktl4bAhgmhaFwdK6yI=z8qIYGby7AvDPmhfQuuSyfmQUoY=', churn=None)]" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"test[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"target_ids = [_.id for _ in target]\n", | |
"test_ids = [_.id for _ in test]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Почему-то в тренировочных данных id неуникальные" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total target ids: 50939\n", | |
"Unique target ids: 19697\n" | |
] | |
} | |
], | |
"source": [ | |
"print 'Total target ids:', len(target_ids)\n", | |
"print 'Unique target ids:', len(set(target_ids))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Часто для одного id повторяют \"в ноябре не ушёл, в декабре не ушёл, в январе не ушёл\". Зачем? Лучше бы побольше разных id выгрузили. Период, кстати, тоже блестяще выбраны: НГ, праздники." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>month</th>\n", | |
" <th>2014-11-01 00:00:00</th>\n", | |
" <th>2014-12-01 00:00:00</th>\n", | |
" <th>2015-01-01 00:00:00</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>id</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>++1bFkShFFpFU4GZUgKgyYSB5K0=mRt2ytNDjwH6XemQY6JnqzriKpI=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>++2cBeC/UEz4BrhlxdL8a6oD9Uo=y5L96d4Z+OSXWLMWtjwEjW04evw=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>++7nq+bqX8Vu2+FCk/mEhl0fNnc=zVGW37tHjdXdkItOJCVwt84lB58=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>++Ih/EonzIGxcopatn3bkyj8W14=K+iMpCQsduglOsYkdIUQZQMtaDM=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>++tpcfrV2HNcYhVFta9iSx7FrLE=AN7OinNFjOdMeNN4ECY/S+ozMZk=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>++v0JgSoryGwn/Nw+aN3dQxCVt0=siZqqQbt4dXbF3oDvDcmMi3V06k=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+/UuG4DbGOJ9YjVadWgyQqiBZoE=OvnqPTbqFUMgIXTjwIS9pjx3V5Q=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+06NsHyeXvDxLeud4v1SqIIQ7cA=vTlylpKHECHufqP1D5xnvgigIVk=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+11Bqzv+LadE4UIDKUbH3vn3RIE=ZkL57yyf91+1sF7YB0V7DlNWOxA=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+1WmDXhd0p1mXq/rd/m2zHw6mmQ=ckc1iTjmZCyv4hujh+yrHiqbtzU=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+1lOY+OodyANjh/MAyhcRg3ADxo=K+iMpCQsduglOsYkdIUQZQMtaDM=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+1wXP9/hgxshPXvCEE9HxxHQtpg=BXDmlQtPA+ThDGzw0+zCPIqtMmg=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+1wedOITkZE8g3QPW2usAygfJSo=EkQemVgd6nl8jKy7KQlCJ4KCu3U=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+1z2UOfnS9MFEobW8bmYYaAZseY=vVvEZVO5jH8lEfv+8mAmag9g/8U=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+27tEkVZOq5UfnweykpFVpUH3+8=3S3qVHDoF8Wt8o10so8vdnJNHIM=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+2Xdf3WJ/Rw+dckLWfTuu5g3PFU=/m7o+LfSuugydN4wYE9zv4c3I2c=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+2YxONL1ZWxcc3sRXZKhrupmePA=RmoFR1FKwJoAWR93DTHoHioNSws=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+2aVGQYuoYYZWyCKxXU3MW790yA=RmoFR1FKwJoAWR93DTHoHioNSws=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+2e4OXlm8fJumS3pIZQBf9MAatg=32r5lQVSU6tZPqt77rAjTa8GFUk=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+2wiIBFPxP0n4Bn5qzmkRmS5E1g=tda7jgBOjYVV+oDdThha4EvJasw=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"month 2014-11-01 2014-12-01 \\\n", | |
"id \n", | |
"++1bFkShFFpFU4GZUgKgyYSB5K0=mRt2ytNDjwH6XemQY6J... False False \n", | |
"++2cBeC/UEz4BrhlxdL8a6oD9Uo=y5L96d4Z+OSXWLMWtjw... NaN False \n", | |
"++7nq+bqX8Vu2+FCk/mEhl0fNnc=zVGW37tHjdXdkItOJCV... NaN False \n", | |
"++Ih/EonzIGxcopatn3bkyj8W14=K+iMpCQsduglOsYkdIU... False False \n", | |
"++tpcfrV2HNcYhVFta9iSx7FrLE=AN7OinNFjOdMeNN4ECY... False False \n", | |
"++v0JgSoryGwn/Nw+aN3dQxCVt0=siZqqQbt4dXbF3oDvDc... False False \n", | |
"+/UuG4DbGOJ9YjVadWgyQqiBZoE=OvnqPTbqFUMgIXTjwIS... False False \n", | |
"+06NsHyeXvDxLeud4v1SqIIQ7cA=vTlylpKHECHufqP1D5x... NaN False \n", | |
"+11Bqzv+LadE4UIDKUbH3vn3RIE=ZkL57yyf91+1sF7YB0V... False False \n", | |
"+1WmDXhd0p1mXq/rd/m2zHw6mmQ=ckc1iTjmZCyv4hujh+y... False False \n", | |
"+1lOY+OodyANjh/MAyhcRg3ADxo=K+iMpCQsduglOsYkdIU... NaN NaN \n", | |
"+1wXP9/hgxshPXvCEE9HxxHQtpg=BXDmlQtPA+ThDGzw0+z... NaN NaN \n", | |
"+1wedOITkZE8g3QPW2usAygfJSo=EkQemVgd6nl8jKy7KQl... False False \n", | |
"+1z2UOfnS9MFEobW8bmYYaAZseY=vVvEZVO5jH8lEfv+8mA... False False \n", | |
"+27tEkVZOq5UfnweykpFVpUH3+8=3S3qVHDoF8Wt8o10so8... NaN False \n", | |
"+2Xdf3WJ/Rw+dckLWfTuu5g3PFU=/m7o+LfSuugydN4wYE9... False False \n", | |
"+2YxONL1ZWxcc3sRXZKhrupmePA=RmoFR1FKwJoAWR93DTH... False False \n", | |
"+2aVGQYuoYYZWyCKxXU3MW790yA=RmoFR1FKwJoAWR93DTH... False False \n", | |
"+2e4OXlm8fJumS3pIZQBf9MAatg=32r5lQVSU6tZPqt77rA... NaN False \n", | |
"+2wiIBFPxP0n4Bn5qzmkRmS5E1g=tda7jgBOjYVV+oDdThh... NaN True \n", | |
"\n", | |
"month 2015-01-01 \n", | |
"id \n", | |
"++1bFkShFFpFU4GZUgKgyYSB5K0=mRt2ytNDjwH6XemQY6J... False \n", | |
"++2cBeC/UEz4BrhlxdL8a6oD9Uo=y5L96d4Z+OSXWLMWtjw... False \n", | |
"++7nq+bqX8Vu2+FCk/mEhl0fNnc=zVGW37tHjdXdkItOJCV... False \n", | |
"++Ih/EonzIGxcopatn3bkyj8W14=K+iMpCQsduglOsYkdIU... False \n", | |
"++tpcfrV2HNcYhVFta9iSx7FrLE=AN7OinNFjOdMeNN4ECY... False \n", | |
"++v0JgSoryGwn/Nw+aN3dQxCVt0=siZqqQbt4dXbF3oDvDc... False \n", | |
"+/UuG4DbGOJ9YjVadWgyQqiBZoE=OvnqPTbqFUMgIXTjwIS... False \n", | |
"+06NsHyeXvDxLeud4v1SqIIQ7cA=vTlylpKHECHufqP1D5x... False \n", | |
"+11Bqzv+LadE4UIDKUbH3vn3RIE=ZkL57yyf91+1sF7YB0V... False \n", | |
"+1WmDXhd0p1mXq/rd/m2zHw6mmQ=ckc1iTjmZCyv4hujh+y... False \n", | |
"+1lOY+OodyANjh/MAyhcRg3ADxo=K+iMpCQsduglOsYkdIU... False \n", | |
"+1wXP9/hgxshPXvCEE9HxxHQtpg=BXDmlQtPA+ThDGzw0+z... True \n", | |
"+1wedOITkZE8g3QPW2usAygfJSo=EkQemVgd6nl8jKy7KQl... False \n", | |
"+1z2UOfnS9MFEobW8bmYYaAZseY=vVvEZVO5jH8lEfv+8mA... False \n", | |
"+27tEkVZOq5UfnweykpFVpUH3+8=3S3qVHDoF8Wt8o10so8... False \n", | |
"+2Xdf3WJ/Rw+dckLWfTuu5g3PFU=/m7o+LfSuugydN4wYE9... False \n", | |
"+2YxONL1ZWxcc3sRXZKhrupmePA=RmoFR1FKwJoAWR93DTH... False \n", | |
"+2aVGQYuoYYZWyCKxXU3MW790yA=RmoFR1FKwJoAWR93DTH... False \n", | |
"+2e4OXlm8fJumS3pIZQBf9MAatg=32r5lQVSU6tZPqt77rA... False \n", | |
"+2wiIBFPxP0n4Bn5qzmkRmS5E1g=tda7jgBOjYVV+oDdThh... True " | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"table = pd.DataFrame(target)\n", | |
"table.columns = ['month', 'id', 'churn']\n", | |
"table = table.pivot('id', 'month', 'churn')\n", | |
"table.head(20)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Чаще всего для пользователя указано, что он три раза не ушёл. Пользователей, которые ушли вообще крохи, 400 штук, примерно. Есть даже те, кто ушли и потом вернулись: 30 штук." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False False False 12603\n", | |
"NaN False False 5134\n", | |
" NaN False 690\n", | |
" True 206\n", | |
"True NaN NaN 156\n", | |
"False True True 124\n", | |
" False True 99\n", | |
"NaN True NaN 97\n", | |
"False False NaN 92\n", | |
"True True NaN 90\n", | |
"False NaN False 83\n", | |
"NaN False NaN 80\n", | |
" True True 75\n", | |
"False NaN NaN 62\n", | |
"NaN False True 56\n", | |
"False NaN True 20\n", | |
"True False NaN 12\n", | |
"False True False 10\n", | |
"NaN True False 8\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 79, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"combinations = Counter((month1, month2, month3) for id, month1, month2, month3 in table.itertuples())\n", | |
"combinations = pd.Series(combinations).sort(ascending=False, inplace=False)\n", | |
"combinations" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Предсказывать всё нужно будет для февраля" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 82, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{Timestamp('2015-02-01 00:00:00')}" | |
] | |
}, | |
"execution_count": 82, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"set(_.month for _ in test)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ну слава богу" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total test ids: 26414\n", | |
"Unique test ids: 26414\n" | |
] | |
} | |
], | |
"source": [ | |
"print 'Total test ids:', len(test_ids)\n", | |
"print 'Unique test ids:', len(set(test_ids))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"70% пользователей из тренировочной выборки есть в тестовой" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"18512\n" | |
] | |
} | |
], | |
"source": [ | |
"test_target_ids = set(test_ids) & set(target_ids)\n", | |
"print len(test_target_ids)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Посмотрим на них подробнее" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>month</th>\n", | |
" <th>2014-11-01 00:00:00</th>\n", | |
" <th>2014-12-01 00:00:00</th>\n", | |
" <th>2015-01-01 00:00:00</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>id</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>drzgYfD/KyV312otNerxOMGj9ks=GAiu85msFUVvInkXW7xceeh4EWY=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>XtOPGcypMqRo6hPu9LIfxt3YXg8=C6xty8Qd7bxvJrgpHIl9mBIXZyU=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6hQWRoLaNOHlWP23CipL5f55ECA=DUcjw66omMz1dGlmOB1OFA0WDbM=</th>\n", | |
" <td>False</td>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0x5P9STXkK2LdLjqHFJlu2d68QQ=mkKMHPeV4MK33Q58jH0FIUH+Mdo=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19nI1UsPD0ASvEam6+Efbe+JmSY=FzkSvSPXAOEcKwoqFKubrUb94kA=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>XxUHyN55kbXhl85TeeIItNGTleE=7wSKT3ymGzECUQwlHvs3vxkcNp4=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>fHnnUJLJvWnRmsbQSk0JrPlbQTk=IIknVGMjCFacghhO0Ps6ZxYp+FE=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>finQI2+7lKr+S0FRZ/9zZ/MysLM=2ktQcY+FUZE+l1fkeYFEt8iVnDw=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>+sInCAL0URe3uZu4d3Em9KRdD0k=NSeyFwysXs0kDgAoDPFLKkDRwm8=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>jmwUzn7EJxX6mb58d5tHl9JLkoY=ckc1iTjmZCyv4hujh+yrHiqbtzU=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yXyMVgWRIvGRKDscZsijEJqB4O8=tQPHp9jDKtimAmsoqyA0rpi5Rbw=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>USPeTOffYc3GvmT/WJR4/xZlPcw=y5Ak3NcmmNc1lwm6KMYBJr94Szc=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Rdiq41pEcc2lhu8AO26RUK0Vv08=yL6wlhovxvMF14arIaDiUsz9EO4=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>gsmjLR4qrgdMXDanJ9SzAazlVis=0vTDWr/ZDvHFNFONMPa4HFzmDcw=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>YPB/0H6g2YHK7ZQJT277xNnI290=vVvEZVO5jH8lEfv+8mAmag9g/8U=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>N6RTOPUGaTj0iRjEbakZUjStXXY=U5fXpflRnB0lkFBkk/97vPMlTS0=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>RHpUYZNeHet4PGsA8l1gj6JUCnw=Mz2Cf14MNPf/UbAYPjqTsAbf+sk=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>SuJDJb8yf0BVv7p+ku3fmqcmo1g=vVvEZVO5jH8lEfv+8mAmag9g/8U=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>OJoT5LUq5R5EMOX8AbHTvrEfiSg=dnvpt9S05sn/ThHUJTQJAqH6FZk=</th>\n", | |
" <td>NaN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>HDxTe6JFEqaUr2Bvh0qt4TDPMJc=inwzhgx47lQ01oPY2u1s+Nesj8Q=</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"month 2014-11-01 2014-12-01 \\\n", | |
"id \n", | |
"drzgYfD/KyV312otNerxOMGj9ks=GAiu85msFUVvInkXW7x... False False \n", | |
"XtOPGcypMqRo6hPu9LIfxt3YXg8=C6xty8Qd7bxvJrgpHIl... NaN False \n", | |
"6hQWRoLaNOHlWP23CipL5f55ECA=DUcjw66omMz1dGlmOB1... False NaN \n", | |
"0x5P9STXkK2LdLjqHFJlu2d68QQ=mkKMHPeV4MK33Q58jH0... False False \n", | |
"19nI1UsPD0ASvEam6+Efbe+JmSY=FzkSvSPXAOEcKwoqFKu... False False \n", | |
"XxUHyN55kbXhl85TeeIItNGTleE=7wSKT3ymGzECUQwlHvs... False False \n", | |
"fHnnUJLJvWnRmsbQSk0JrPlbQTk=IIknVGMjCFacghhO0Ps... False False \n", | |
"finQI2+7lKr+S0FRZ/9zZ/MysLM=2ktQcY+FUZE+l1fkeYF... NaN NaN \n", | |
"+sInCAL0URe3uZu4d3Em9KRdD0k=NSeyFwysXs0kDgAoDPF... False False \n", | |
"jmwUzn7EJxX6mb58d5tHl9JLkoY=ckc1iTjmZCyv4hujh+y... False False \n", | |
"yXyMVgWRIvGRKDscZsijEJqB4O8=tQPHp9jDKtimAmsoqyA... False False \n", | |
"USPeTOffYc3GvmT/WJR4/xZlPcw=y5Ak3NcmmNc1lwm6KMY... False False \n", | |
"Rdiq41pEcc2lhu8AO26RUK0Vv08=yL6wlhovxvMF14arIaD... NaN False \n", | |
"gsmjLR4qrgdMXDanJ9SzAazlVis=0vTDWr/ZDvHFNFONMPa... False False \n", | |
"YPB/0H6g2YHK7ZQJT277xNnI290=vVvEZVO5jH8lEfv+8mA... False False \n", | |
"N6RTOPUGaTj0iRjEbakZUjStXXY=U5fXpflRnB0lkFBkk/9... False False \n", | |
"RHpUYZNeHet4PGsA8l1gj6JUCnw=Mz2Cf14MNPf/UbAYPjq... False False \n", | |
"SuJDJb8yf0BVv7p+ku3fmqcmo1g=vVvEZVO5jH8lEfv+8mA... False False \n", | |
"OJoT5LUq5R5EMOX8AbHTvrEfiSg=dnvpt9S05sn/ThHUJTQ... NaN False \n", | |
"HDxTe6JFEqaUr2Bvh0qt4TDPMJc=inwzhgx47lQ01oPY2u1... False False \n", | |
"\n", | |
"month 2015-01-01 \n", | |
"id \n", | |
"drzgYfD/KyV312otNerxOMGj9ks=GAiu85msFUVvInkXW7x... False \n", | |
"XtOPGcypMqRo6hPu9LIfxt3YXg8=C6xty8Qd7bxvJrgpHIl... False \n", | |
"6hQWRoLaNOHlWP23CipL5f55ECA=DUcjw66omMz1dGlmOB1... False \n", | |
"0x5P9STXkK2LdLjqHFJlu2d68QQ=mkKMHPeV4MK33Q58jH0... False \n", | |
"19nI1UsPD0ASvEam6+Efbe+JmSY=FzkSvSPXAOEcKwoqFKu... False \n", | |
"XxUHyN55kbXhl85TeeIItNGTleE=7wSKT3ymGzECUQwlHvs... False \n", | |
"fHnnUJLJvWnRmsbQSk0JrPlbQTk=IIknVGMjCFacghhO0Ps... False \n", | |
"finQI2+7lKr+S0FRZ/9zZ/MysLM=2ktQcY+FUZE+l1fkeYF... False \n", | |
"+sInCAL0URe3uZu4d3Em9KRdD0k=NSeyFwysXs0kDgAoDPF... False \n", | |
"jmwUzn7EJxX6mb58d5tHl9JLkoY=ckc1iTjmZCyv4hujh+y... False \n", | |
"yXyMVgWRIvGRKDscZsijEJqB4O8=tQPHp9jDKtimAmsoqyA... False \n", | |
"USPeTOffYc3GvmT/WJR4/xZlPcw=y5Ak3NcmmNc1lwm6KMY... False \n", | |
"Rdiq41pEcc2lhu8AO26RUK0Vv08=yL6wlhovxvMF14arIaD... False \n", | |
"gsmjLR4qrgdMXDanJ9SzAazlVis=0vTDWr/ZDvHFNFONMPa... False \n", | |
"YPB/0H6g2YHK7ZQJT277xNnI290=vVvEZVO5jH8lEfv+8mA... False \n", | |
"N6RTOPUGaTj0iRjEbakZUjStXXY=U5fXpflRnB0lkFBkk/9... False \n", | |
"RHpUYZNeHet4PGsA8l1gj6JUCnw=Mz2Cf14MNPf/UbAYPjq... False \n", | |
"SuJDJb8yf0BVv7p+ku3fmqcmo1g=vVvEZVO5jH8lEfv+8mA... False \n", | |
"OJoT5LUq5R5EMOX8AbHTvrEfiSg=dnvpt9S05sn/ThHUJTQ... False \n", | |
"HDxTe6JFEqaUr2Bvh0qt4TDPMJc=inwzhgx47lQ01oPY2u1... False " | |
] | |
}, | |
"execution_count": 93, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"table = table.reindex(index=test_target_ids)\n", | |
"table.head(20)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Для 150 я кажется знаю ответ: они уйдут в феврале, потому что уже ушли в январе." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 94, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False False False 12485\n", | |
"NaN False False 5054\n", | |
" NaN False 599\n", | |
"False False True 99\n", | |
" NaN False 62\n", | |
" False NaN 58\n", | |
"NaN False True 56\n", | |
" NaN 45\n", | |
" NaN True 42\n", | |
"False NaN True 10\n", | |
" NaN 2\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 94, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"combinations = Counter((month1, month2, month3) for id, month1, month2, month3 in table.itertuples())\n", | |
"combinations = pd.Series(combinations).sort(ascending=False, inplace=False)\n", | |
"combinations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment