Skip to content

Instantly share code, notes, and snippets.

@vol1ura
Created December 25, 2021 21:03
Show Gist options
  • Save vol1ura/0e2f8aa924753c763a2c0bb868705a8e to your computer and use it in GitHub Desktop.
Save vol1ura/0e2f8aa924753c763a2c0bb868705a8e to your computer and use it in GitHub Desktop.
clubmates_engirunners.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "clubmates_engirunners.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyOWSRcN76BxAOsrr77rG3Hz",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"4a3d8cc726d04888a03aa610ceaced1e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_820063756eb54fc8a39a84db865d7ea4",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_8c8627c52d344f8cac5b209f2eaa56f9",
"IPY_MODEL_294f52e6617b4f3b922bd647419e6cd2",
"IPY_MODEL_4d869a25e2f54da2a28c926279e511db"
]
}
},
"820063756eb54fc8a39a84db865d7ea4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8c8627c52d344f8cac5b209f2eaa56f9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_770911812c774074a283d41798d1ff5b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_be94329f0da54421a3f16b25bbc07548"
}
},
"294f52e6617b4f3b922bd647419e6cd2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_cc345771c1374fc5a18e67aad15358fb",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 108,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 108,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_a2238e026f2748748920d7dcadf8f93a"
}
},
"4d869a25e2f54da2a28c926279e511db": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_139c00373a514ceb9edbc7710431483b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 108/108 [17:16<00:00, 8.81s/it]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_2c502534ff18428187f87629a87bfc8d"
}
},
"770911812c774074a283d41798d1ff5b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"be94329f0da54421a3f16b25bbc07548": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"cc345771c1374fc5a18e67aad15358fb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"a2238e026f2748748920d7dcadf8f93a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"139c00373a514ceb9edbc7710431483b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"2c502534ff18428187f87629a87bfc8d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/vol1ura/0e2f8aa924753c763a2c0bb868705a8e/clubmates.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "zRg0J0r6ztxV"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random\n",
"import re\n",
"import requests\n",
"import time\n",
"from tqdm.notebook import tqdm\n",
"\n",
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"source": [
"club_id = 24630 # ENGIRUNNERS"
],
"metadata": {
"id": "clo0v2xzzyLU"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"headers = {\n",
" 'Host': 'www.parkrun.ru',\n",
" 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0',\n",
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\n",
" 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',\n",
" 'Accept-Encoding': 'gzip, deflate, br',\n",
" 'Connection': 'keep-alive',\n",
" 'Upgrade-Insecure-Requests': '1',\n",
" 'Sec-GPC': '1',\n",
" 'TE': 'Trailers'\n",
" }"
],
"metadata": {
"id": "JMntqd19z4bw"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"page_all_results = requests.get('https://www.parkrun.ru/results/courserecords/', headers=headers)\n",
"data = pd.read_html(page_all_results.text)[0]\n",
"russian_parkruns = data[data.columns[0]]"
],
"metadata": {
"id": "ctIMuJuVz7T9"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"dfs = []\n",
"for parkrun in tqdm(russian_parkruns):\n",
" time.sleep(3 + 5*random.random())\n",
" parkrun_trim = re.sub(r'[\\s-]', '', parkrun)\n",
" url = f'https://www.parkrun.ru/{parkrun_trim}/results/clubhistory/?clubNum={club_id}'\n",
" club_results = requests.get(url, headers=headers)\n",
" try:\n",
" df = pd.read_html(club_results.text)[0]\n",
" dfs.append(df[df.columns[0]])\n",
" except:\n",
" print('ОШИБКА - операция завершилась досрочно. Паркран временно заблокировал IP.')\n",
" break"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"4a3d8cc726d04888a03aa610ceaced1e",
"820063756eb54fc8a39a84db865d7ea4",
"8c8627c52d344f8cac5b209f2eaa56f9",
"294f52e6617b4f3b922bd647419e6cd2",
"4d869a25e2f54da2a28c926279e511db",
"770911812c774074a283d41798d1ff5b",
"be94329f0da54421a3f16b25bbc07548",
"cc345771c1374fc5a18e67aad15358fb",
"a2238e026f2748748920d7dcadf8f93a",
"139c00373a514ceb9edbc7710431483b",
"2c502534ff18428187f87629a87bfc8d"
]
},
"id": "pB86PSu40JiI",
"outputId": "c8369b97-53c8-4b8b-b0aa-3a195e802fc3"
},
"execution_count": 5,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4a3d8cc726d04888a03aa610ceaced1e",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/108 [00:00<?, ?it/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"def last_name_first(full_name: str):\n",
" names = full_name.split()\n",
" last_name = names.pop(1).capitalize()\n",
" names.insert(0, last_name)\n",
" return ' '.join(names)"
],
"metadata": {
"id": "JGv4TY_5LOho"
},
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pd.concat(dfs).drop_duplicates(keep='last').apply(last_name_first).sort_values().reset_index(drop=True).shift(1, fill_value='________Фамилия_Имя___')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u-ij29bnAOaq",
"outputId": "1462dc7f-bf69-4293-9944-84c7dee29dd2"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 ________Фамилия_Имя___\n",
"1 Kondratenko Gleb\n",
"2 Martynov Dmitry\n",
"3 Smirnov Kirill\n",
"4 Акинин Алексей\n",
"5 Бакланов Михаил\n",
"6 Ванаг Константин\n",
"7 Дмитренко Наталья\n",
"8 Зенов Роман\n",
"9 Иванов Сергей\n",
"10 Колокольников Алексей\n",
"11 Комаров Алексей\n",
"12 Кравцов Даниил\n",
"13 Лавлинская Дарья\n",
"14 Мартынова Диана\n",
"15 Маяков Денис\n",
"16 Нестеров Алексей\n",
"17 Осипов Федор\n",
"18 Петров Дмитрий\n",
"19 Пьянов Артем\n",
"20 Самошин Илья\n",
"21 Симонов Юрий\n",
"22 Сосновский Виктор\n",
"23 Телепень Николай\n",
"24 Титаренко Наталья\n",
"25 Ходакова Людмила\n",
"26 Шемякина Надежда\n",
"27 Щукина Ольга\n",
"Name: (Unnamed: 0_level_0, Участник), dtype: object"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "526myT8CFL74"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment