-
-
Save justheuristic/e582879e0aab458a058732245143fc6d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Данные" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from datetime import datetime\n", | |
"\n", | |
"def print_time_range(times):\n", | |
" print 'Time from: {0}, to: {1}'.format(datetime.fromtimestamp(times.min()), datetime.fromtimestamp(times.max()))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Лайки" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(111251, 4)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>user_id</th>\n", | |
" <th>item_id</th>\n", | |
" <th>channel</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>612d8e8eef05acff3278c061ec10f704</td>\n", | |
" <td>7aa5d00445cb9d61d1739dd0df9a0a88</td>\n", | |
" <td>1f0e3dad99908345f7439f8ffabdffc4</td>\n", | |
" <td>1389733974</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>71a7f1d1be96603971ba66e4a17e845c</td>\n", | |
" <td>5edaf734b432e5cc954a10b59cb97e70</td>\n", | |
" <td>ec5decca5ed3d6b8079e2e7e7bacc9f2</td>\n", | |
" <td>1390459377</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>6eaa117728d50265e6b2ac24a80e04ae</td>\n", | |
" <td>8ad97d075fce19c2d182eb2a4539aa1c</td>\n", | |
" <td>98f13708210194c475687be6106a3b84</td>\n", | |
" <td>1391063963</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" user_id item_id \\\n", | |
"0 612d8e8eef05acff3278c061ec10f704 7aa5d00445cb9d61d1739dd0df9a0a88 \n", | |
"1 71a7f1d1be96603971ba66e4a17e845c 5edaf734b432e5cc954a10b59cb97e70 \n", | |
"2 6eaa117728d50265e6b2ac24a80e04ae 8ad97d075fce19c2d182eb2a4539aa1c \n", | |
"\n", | |
" channel time \n", | |
"0 1f0e3dad99908345f7439f8ffabdffc4 1389733974 \n", | |
"1 ec5decca5ed3d6b8079e2e7e7bacc9f2 1390459377 \n", | |
"2 98f13708210194c475687be6106a3b84 1391063963 " | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_likes_df = pd.read_csv('train_likes.csv')\n", | |
"print train_likes_df.shape\n", | |
"train_likes_df.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Time from: 2014-01-10 17:15:37, to: 2016-02-24 15:15:37\n" | |
] | |
} | |
], | |
"source": [ | |
"print_time_range(train_likes_df.time)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Контент" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[u'duration', u'f_110698', u'f_110704', u'f_110705', u'f_119091', u'f_122282', u'f_122494', u'f_130892', u'f_144583', u'f_148957', u'f_148972', u'f_151440', u'f_153601', u'f_158463', u'f_163719', u'f_168477', u'f_187511', u'f_191091', u'f_205162', u'f_207186', u'f_210900', u'f_30859', u'f_39933', u'f_44251', u'f_46518', u'f_49968', u'f_74564', u'genre', u'id', u'year']\n" | |
] | |
} | |
], | |
"source": [ | |
"import json\n", | |
"\n", | |
"with open('items.json') as f:\n", | |
" items_dicts = json.load(f)\n", | |
" \n", | |
"print sorted(items_dicts[0].keys())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"У каждой единицы контента есть четыре обязательные характеристики:\n", | |
"- id --- идентификатор\n", | |
"- duration --- коэффициент продолжительности\n", | |
"- year --- коэффициент года производства\n", | |
"- genre --- жанр (категориальная переменная)\n", | |
"\n", | |
"и множество опциональныйх переменных вида \"f_N\"." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Замечание\n", | |
"Описание есть не для всего контента (покрыто порядка 2/3 лайков)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Расписание" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"time_end,time_start,item_id,channel\r\n", | |
"1457356808.0,1457354408.0,84868d868783ee3a41b963a2fb2629ec,02522a2b2726fb0a03bb19f2d8d9524d\r\n", | |
"1457360408.0,1457356808.0,84868d868783ee3a41b963a2fb2629ec,02522a2b2726fb0a03bb19f2d8d9524d\r\n", | |
"1457371208.0,1457369408.0,574629dcccbf0f871e9eec4ef14ff270,02522a2b2726fb0a03bb19f2d8d9524d\r\n", | |
"1458467708.0,1458467108.0,1613b303565352254d5551886ec677c2,04025959b191f8f9de3f924f0940515f\r\n", | |
"1458464708.0,1458462008.0,6d38003a9c000dd30e05edad1559b711,43baa6762fa81bb43b39c62553b2970d\r\n", | |
"1457302808.0,1457301008.0,574629dcccbf0f871e9eec4ef14ff270,7f1de29e6da19d22b51c68001e7e0e54\r\n", | |
"1457311808.0,1457310008.0,f5905aaeeece9fe55335b16009dfca7b,7f1de29e6da19d22b51c68001e7e0e54\r\n", | |
"1457313608.0,1457311808.0,574629dcccbf0f871e9eec4ef14ff270,7f1de29e6da19d22b51c68001e7e0e54\r\n", | |
"1457325908.0,1457324408.0,574629dcccbf0f871e9eec4ef14ff270,7f1de29e6da19d22b51c68001e7e0e54\r\n" | |
] | |
} | |
], | |
"source": [ | |
"#число строк в расписании\n", | |
"!head schedule.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(1000, 4)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>time_end</th>\n", | |
" <th>time_start</th>\n", | |
" <th>item_id</th>\n", | |
" <th>channel</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1457356808</td>\n", | |
" <td>1457354408</td>\n", | |
" <td>84868d868783ee3a41b963a2fb2629ec</td>\n", | |
" <td>02522a2b2726fb0a03bb19f2d8d9524d</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1457360408</td>\n", | |
" <td>1457356808</td>\n", | |
" <td>84868d868783ee3a41b963a2fb2629ec</td>\n", | |
" <td>02522a2b2726fb0a03bb19f2d8d9524d</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1457371208</td>\n", | |
" <td>1457369408</td>\n", | |
" <td>574629dcccbf0f871e9eec4ef14ff270</td>\n", | |
" <td>02522a2b2726fb0a03bb19f2d8d9524d</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" time_end time_start item_id \\\n", | |
"0 1457356808 1457354408 84868d868783ee3a41b963a2fb2629ec \n", | |
"1 1457360408 1457356808 84868d868783ee3a41b963a2fb2629ec \n", | |
"2 1457371208 1457369408 574629dcccbf0f871e9eec4ef14ff270 \n", | |
"\n", | |
" channel \n", | |
"0 02522a2b2726fb0a03bb19f2d8d9524d \n", | |
"1 02522a2b2726fb0a03bb19f2d8d9524d \n", | |
"2 02522a2b2726fb0a03bb19f2d8d9524d " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"schedule_df = pd.read_csv('schedule.csv', \n", | |
" nrows=1000,# Первые 1000 строк\n", | |
" )\n", | |
"print schedule_df.shape\n", | |
"schedule_df.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment