Created
May 20, 2016 07:33
-
-
Save ermakovpetr/e71af09554f39ec8f5276a122ffaa019 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from utils.database_utils import *" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>employer_id</th>\n", | |
" <th>manager_id</th>\n", | |
" <th>creation_time</th>\n", | |
" <th>area_id</th>\n", | |
" <th>name</th>\n", | |
" <th>agreement_id</th>\n", | |
" <th>category</th>\n", | |
" <th>url</th>\n", | |
" <th>group_name</th>\n", | |
" <th>on_firstpage</th>\n", | |
" <th>...</th>\n", | |
" <th>vacancy_page_template</th>\n", | |
" <th>country_id</th>\n", | |
" <th>employees_number</th>\n", | |
" <th>registration_completed</th>\n", | |
" <th>dublicated_employer_b</th>\n", | |
" <th>corporate_gift_b</th>\n", | |
" <th>hide_type_b</th>\n", | |
" <th>view_similar_vacancy_b</th>\n", | |
" <th>must_discard_by_robot_b</th>\n", | |
" <th>hidden</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>77453</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2007-03-23 09:52:06.000</td>\n", | |
" <td>1</td>\n", | |
" <td>СС</td>\n", | |
" <td></td>\n", | |
" <td>0</td>\n", | |
" <td>http://</td>\n", | |
" <td>77453</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>None</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>None</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1654338</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2014-09-29 18:08:55.776</td>\n", | |
" <td>2</td>\n", | |
" <td>бургер кинг</td>\n", | |
" <td></td>\n", | |
" <td>0</td>\n", | |
" <td></td>\n", | |
" <td>None</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>None</td>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2 rows × 50 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" employer_id manager_id creation_time area_id name \\\n", | |
"0 77453 NaN 2007-03-23 09:52:06.000 1 СС \n", | |
"1 1654338 NaN 2014-09-29 18:08:55.776 2 бургер кинг \n", | |
"\n", | |
" agreement_id category url group_name on_firstpage ... \\\n", | |
"0 0 http:// 77453 0 ... \n", | |
"1 0 None 0 ... \n", | |
"\n", | |
" vacancy_page_template country_id employees_number registration_completed \\\n", | |
"0 None 1 NaN None \n", | |
"1 None 1 1.0 False \n", | |
"\n", | |
" dublicated_employer_b corporate_gift_b hide_type_b \\\n", | |
"0 False False False \n", | |
"1 False False False \n", | |
"\n", | |
" view_similar_vacancy_b must_discard_by_robot_b hidden \n", | |
"0 True False False \n", | |
"1 True False False \n", | |
"\n", | |
"[2 rows x 50 columns]" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# сделать обычный запрос, можно делать запросы\n", | |
"# load_pssql, load_pssql_data, load_mssql, load_hive\n", | |
"\n", | |
"employer = load_pssql('SELECT * FROM employer LIMIT 1000')\n", | |
"employer.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 1000 entries, 0 to 999\n", | |
"Data columns (total 50 columns):\n", | |
"employer_id 1000 non-null int64\n", | |
"manager_id 77 non-null float64\n", | |
"creation_time 1000 non-null datetime64[ns]\n", | |
"area_id 1000 non-null int64\n", | |
"name 1000 non-null object\n", | |
"agreement_id 1000 non-null object\n", | |
"category 1000 non-null int64\n", | |
"url 999 non-null object\n", | |
"group_name 162 non-null object\n", | |
"on_firstpage 1000 non-null int64\n", | |
"in_best_list 1000 non-null int64\n", | |
"delivery_address 2 non-null object\n", | |
"discard_status 1000 non-null int64\n", | |
"size 1000 non-null int64\n", | |
"pay_readiness 1000 non-null int64\n", | |
"own_policy 1000 non-null int64\n", | |
"employer_branch_head_company_action_type 1000 non-null int64\n", | |
"employer_branch_type 1000 non-null int64\n", | |
"owning_employer 953 non-null float64\n", | |
"email_for_vacancy_responces 0 non-null object\n", | |
"resume_views 1000 non-null int64\n", | |
"bank_detail_id 28 non-null float64\n", | |
"delivery_person 2 non-null object\n", | |
"main_insider_id 1000 non-null int64\n", | |
"vacancy_response_replay 1000 non-null bool\n", | |
"open_vacancies_number 1000 non-null int64\n", | |
"how_did_you_hear_about_us_id 488 non-null float64\n", | |
"resume_views_limit 1000 non-null int64\n", | |
"registration_platform_id 1000 non-null int64\n", | |
"parent_id 0 non-null object\n", | |
"structure_name 0 non-null object\n", | |
"discard_status_employer_id 306 non-null float64\n", | |
"small_logo_url 0 non-null object\n", | |
"talentlink_client 0 non-null object\n", | |
"page_template 0 non-null object\n", | |
"organization_form 406 non-null float64\n", | |
"state 1000 non-null int64\n", | |
"last_change_time 825 non-null datetime64[ns]\n", | |
"confirmed_anonymous 1000 non-null bool\n", | |
"site_id 712 non-null float64\n", | |
"vacancy_page_template 0 non-null object\n", | |
"country_id 1000 non-null int64\n", | |
"employees_number 361 non-null float64\n", | |
"registration_completed 377 non-null object\n", | |
"dublicated_employer_b 1000 non-null bool\n", | |
"corporate_gift_b 1000 non-null bool\n", | |
"hide_type_b 1000 non-null bool\n", | |
"view_similar_vacancy_b 1000 non-null bool\n", | |
"must_discard_by_robot_b 1000 non-null bool\n", | |
"hidden 1000 non-null bool\n", | |
"dtypes: bool(8), datetime64[ns](2), float64(8), int64(18), object(14)\n", | |
"memory usage: 336.0+ KB\n" | |
] | |
} | |
], | |
"source": [ | |
"employer.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"log: write cache\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>resume.datetime</th>\n", | |
" <th>resume.value</th>\n", | |
" <th>resume.year</th>\n", | |
" <th>resume.month</th>\n", | |
" <th>resume.day</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2015-12-31 00:23:04,191</td>\n", | |
" <td>{\"id\":24406648,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2015-12-31 00:23:04,201</td>\n", | |
" <td>{\"id\":47975617,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2015-12-31 00:23:04,214</td>\n", | |
" <td>{\"id\":48062731,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" resume.datetime resume.value \\\n", | |
"0 2015-12-31 00:23:04,191 {\"id\":24406648,\"userId\":2530514,\"platform\":\"HE... \n", | |
"1 2015-12-31 00:23:04,201 {\"id\":47975617,\"userId\":2530514,\"platform\":\"HE... \n", | |
"2 2015-12-31 00:23:04,214 {\"id\":48062731,\"userId\":2530514,\"platform\":\"HE... \n", | |
"\n", | |
" resume.year resume.month resume.day \n", | |
"0 2016 01 01 \n", | |
"1 2016 01 01 \n", | |
"2 2016 01 01 " | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# можно закэшировать результат запроса\n", | |
"\n", | |
"load_hive(\"SELECT * FROM moderation.resume WHERE year = '2016' LIMIT 3\", write_cache=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"log: read cache\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>resume.datetime</th>\n", | |
" <th>resume.value</th>\n", | |
" <th>resume.year</th>\n", | |
" <th>resume.month</th>\n", | |
" <th>resume.day</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2015-12-31 00:23:04,191</td>\n", | |
" <td>{\"id\":24406648,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2015-12-31 00:23:04,201</td>\n", | |
" <td>{\"id\":47975617,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2015-12-31 00:23:04,214</td>\n", | |
" <td>{\"id\":48062731,\"userId\":2530514,\"platform\":\"HE...</td>\n", | |
" <td>2016</td>\n", | |
" <td>01</td>\n", | |
" <td>01</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" resume.datetime resume.value \\\n", | |
"0 2015-12-31 00:23:04,191 {\"id\":24406648,\"userId\":2530514,\"platform\":\"HE... \n", | |
"1 2015-12-31 00:23:04,201 {\"id\":47975617,\"userId\":2530514,\"platform\":\"HE... \n", | |
"2 2015-12-31 00:23:04,214 {\"id\":48062731,\"userId\":2530514,\"platform\":\"HE... \n", | |
"\n", | |
" resume.year resume.month resume.day \n", | |
"0 2016 01 01 \n", | |
"1 2016 01 01 \n", | |
"2 2016 01 01 " | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# можно прочитать результат запроса из кэша, если он есть (выше мы записали)\n", | |
"\n", | |
"load_hive(\"SELECT * FROM moderation.resume WHERE year = '2016' LIMIT 3\", read_cache=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>resume.datetime</th>\n", | |
" <th>resume.value</th>\n", | |
" <th>resume.year</th>\n", | |
" <th>resume.month</th>\n", | |
" <th>resume.day</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2015-10-02 04:35:55,243</td>\n", | |
" <td>{\"id\":42817070,\"userId\":24395671,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2015-10-02 05:24:20,863</td>\n", | |
" <td>{\"id\":31667687,\"userId\":18829490,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2015-10-02 07:36:46,968</td>\n", | |
" <td>{\"id\":45251573,\"userId\":25269223,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" resume.datetime resume.value \\\n", | |
"0 2015-10-02 04:35:55,243 {\"id\":42817070,\"userId\":24395671,\"platform\":\"H... \n", | |
"1 2015-10-02 05:24:20,863 {\"id\":31667687,\"userId\":18829490,\"platform\":\"H... \n", | |
"2 2015-10-02 07:36:46,968 {\"id\":45251573,\"userId\":25269223,\"platform\":\"H... \n", | |
"\n", | |
" resume.year resume.month resume.day \n", | |
"0 2015 10 02 \n", | |
"1 2015 10 02 \n", | |
"2 2015 10 02 " | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# результат кэшируется исходя из запроса, меняем запрос и пытаемся прочитать из кэша\n", | |
"# из кэша не прочитанно - ибо для данного запроса кэш не был сохранен\n", | |
"\n", | |
"load_hive(\"SELECT * FROM moderation.resume WHERE year = '2015' LIMIT 3\", read_cache=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"log: write cache\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>resume.datetime</th>\n", | |
" <th>resume.value</th>\n", | |
" <th>resume.year</th>\n", | |
" <th>resume.month</th>\n", | |
" <th>resume.day</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2015-10-02 04:35:55,243</td>\n", | |
" <td>{\"id\":42817070,\"userId\":24395671,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2015-10-02 05:24:20,863</td>\n", | |
" <td>{\"id\":31667687,\"userId\":18829490,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2015-10-02 07:36:46,968</td>\n", | |
" <td>{\"id\":45251573,\"userId\":25269223,\"platform\":\"H...</td>\n", | |
" <td>2015</td>\n", | |
" <td>10</td>\n", | |
" <td>02</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" resume.datetime resume.value \\\n", | |
"0 2015-10-02 04:35:55,243 {\"id\":42817070,\"userId\":24395671,\"platform\":\"H... \n", | |
"1 2015-10-02 05:24:20,863 {\"id\":31667687,\"userId\":18829490,\"platform\":\"H... \n", | |
"2 2015-10-02 07:36:46,968 {\"id\":45251573,\"userId\":25269223,\"platform\":\"H... \n", | |
"\n", | |
" resume.year resume.month resume.day \n", | |
"0 2015 10 02 \n", | |
"1 2015 10 02 \n", | |
"2 2015 10 02 " | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# можно пытаться читать из кэша, а если читать нечего, то запрос будет выполнен, а результат сохранен\n", | |
"\n", | |
"load_hive(\"SELECT * FROM moderation.resume WHERE year = '2015' LIMIT 3\", read_cache=True, write_cache=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"__cache_wrapper_-1479390422386652675.pckl\r\n", | |
"__cache_wrapper_-4844589160831331624.pckl\r\n", | |
"__cache_wrapper_-6307272719582419216.pckl\r\n", | |
"__cache_wrapper_1254068372247203274.pckl\r\n" | |
] | |
} | |
], | |
"source": [ | |
"# кэш файл кладется в папку вместе с py-файлом из которого вы вызываете функции и выглядит так\n", | |
"\n", | |
"!ls __cache_*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2 (upstream libs)", | |
"language": "python", | |
"name": "python2-upstream" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment