Skip to content

Instantly share code, notes, and snippets.

@theotheo
Created February 26, 2019 12:58
Show Gist options
  • Save theotheo/7cf17e47963284000b58852edc5178fb to your computer and use it in GitHub Desktop.
Save theotheo/7cf17e47963284000b58852edc5178fb to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-26T12:53:52.960343Z",
"start_time": "2019-02-26T12:53:52.701602Z"
}
},
"outputs": [],
"source": [
"import requests \n",
"import parsel\n",
"import logging\n",
"\n",
"\n",
"def get_photo_urls(event='2014 Сестрорецкий полумарафон', number=10):\n",
" URL = 'https://www.marathon-photo.ru/index.php'\n",
"\n",
" params = {\n",
" 'sphoto': 'on', # WHAT IS IT\n",
" 'competition': event,\n",
" 'search': number\n",
" }\n",
" \n",
" res = requests.get(URL.format(10), params=params)\n",
" logging.debug(res.url)\n",
" \n",
" sel = parsel.Selector(res.content.decode())\n",
" \n",
" \n",
" for part in sel.css('.tr2 img::attr(src)').getall():\n",
" yield 'https://www.marathon-photo.ru' + part \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-26T12:53:52.960343Z",
"start_time": "2019-02-26T12:53:52.701602Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['https://www.marathon-photo.ru/static2/preview1/stock-photo-10-101-223082.jpg']"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(get_photo_urls(number=101))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"main_language": "python",
"text_representation": {
"extension": ".md",
"format_name": "markdown",
"format_version": "1.0",
"jupytext_version": "0.8.5"
}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment