Skip to content

Instantly share code, notes, and snippets.

@rs6000
Created January 26, 2019 12:54
Show Gist options
  • Save rs6000/9c66f5d6575ca7f285adc552e2e8132b to your computer and use it in GitHub Desktop.
Save rs6000/9c66f5d6575ca7f285adc552e2e8132b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-26T12:45:39.087055Z",
"start_time": "2019-01-26T12:45:38.889038Z"
}
},
"outputs": [],
"source": [
"import requests, re, os, csv, wget, time\n",
"\n",
"from bs4 import BeautifulSoup\n",
"base_url = \"http://stockmarketpilipinas.com/\"\n",
"url='http://stockmarketpilipinas.com/thread-337.html'\n",
"url2='http://stockmarketpilipinas.com/thread-337-page-454.html'\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-26T12:45:40.187889Z",
"start_time": "2019-01-26T12:45:39.945738Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"檔案名稱:stockquotes_01252019.csv\n",
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3844\n"
]
}
],
"source": [
"page_html = requests.get(url2)\n",
"page_soup = BeautifulSoup(page_html.text, 'lxml')\n",
"\n",
"#抓在附件檔裡的csv\n",
"title = page_soup.find('div', {'id': 'posts'}).find_all('fieldset')\n",
"for i in title:\n",
" #取得檔名 + 轉成小寫\n",
" f_name = i.find('a').text.lower()\n",
" # 取得檔案連結\n",
" f_href = base_url+i.find('a')['href']\n",
"print(\"檔案名稱:{}\\n檔案連結:{}\".format(f_name,f_href))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-26T12:45:41.348034Z",
"start_time": "2019-01-26T12:45:41.326947Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[<fieldset>\n",
" <legend><strong>Attached Files</strong></legend>\n",
" <!-- start: postbit_attachments_attachment -->\n",
" <br/><!-- start: attachment_icon -->\n",
" <img alt=\".csv\" border=\"0\" src=\"http://stockmarketpilipinas.com/images/attachtypes/xls.gif\" title=\"\"/>\n",
" <!-- end: attachment_icon -->  <a href=\"attachment.php?aid=3844\" target=\"_blank\" title=\"Yesterday, 04:12 PM\">stockQuotes_01252019.csv</a> (Size: 11.79 KB / Downloads: 221)\n",
" <!-- end: postbit_attachments_attachment -->\n",
" </fieldset>]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"title"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-26T12:46:52.443659Z",
"start_time": "2019-01-26T12:46:52.185881Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"檔案名稱:stockquotes_01242019.csv\n",
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3843\n",
"檔案名稱:stockquotes_01252019.csv\n",
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3844\n"
]
}
],
"source": [
"page_html = requests.get(url2)\n",
"page_soup = BeautifulSoup(page_html.text, 'lxml')\n",
"\n",
"title = page_soup.find('div', {'id': 'posts'}).find_all('a')\n",
"for i in title:\n",
" if i.text.endswith(\".csv\"):\n",
" #取得檔名 + 轉成小寫\n",
" f_name = i.text.lower()\n",
" # 取得檔案連結\n",
" f_href = base_url+i['href']\n",
" print(\"檔案名稱:{}\\n檔案連結:{}\".format(f_name,f_href))\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-26T12:46:10.078107Z",
"start_time": "2019-01-26T12:46:10.064967Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[<a id=\"pid345054\" name=\"pid345054\"></a>,\n",
" <a href=\"user-1.html\"><img alt=\"\" height=\"59\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_1.gif?dateline=1422259061\" width=\"70\"/></a>,\n",
" <a href=\"http://stockmarketpilipinas.com/user-1.html\"><span style=\"color: orange;\"><strong><em>Ollie</em></strong></span></a>,\n",
" <a href=\"reputation.php?uid=1\"><strong class=\"reputation_positive\">198</strong></a>,\n",
" <a href=\"thread-337-post-345054.html#pid345054\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,531</a>,\n",
" <a class=\"mycode_url\" href=\"https://pixiutrades.wordpress.com/2018/12/20/focus-an-intensive-4-week-stock-market-trading-workshop/\" rel=\"noopener\" target=\"_blank\">https://pixiutrades.wordpress.com/2018/1...-workshop/</a>,\n",
" <a class=\"postbit_website\" href=\"http://www.stockmarketpilipinas.com\" rel=\"noopener\" target=\"_blank\" title=\"Visit this user's website\"><span>Website</span></a>,\n",
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&amp;replyto=345054\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n",
" <a id=\"pid345055\" name=\"pid345055\"></a>,\n",
" <a href=\"user-1.html\"><img alt=\"\" height=\"59\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_1.gif?dateline=1422259061\" width=\"70\"/></a>,\n",
" <a href=\"http://stockmarketpilipinas.com/user-1.html\"><span style=\"color: orange;\"><strong><em>Ollie</em></strong></span></a>,\n",
" <a href=\"reputation.php?uid=1\"><strong class=\"reputation_positive\">198</strong></a>,\n",
" <a href=\"thread-337-post-345055.html#pid345055\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,532</a>,\n",
" <a class=\"mycode_url\" href=\"https://pixiutrades.wordpress.com/2018/12/20/focus-an-intensive-4-week-stock-market-trading-workshop/\" rel=\"noopener\" target=\"_blank\">https://pixiutrades.wordpress.com/2018/1...-workshop/</a>,\n",
" <a class=\"postbit_website\" href=\"http://www.stockmarketpilipinas.com\" rel=\"noopener\" target=\"_blank\" title=\"Visit this user's website\"><span>Website</span></a>,\n",
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&amp;replyto=345055\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n",
" <a id=\"pid345056\" name=\"pid345056\"></a>,\n",
" <a href=\"user-109.html\"><img alt=\"\" height=\"70\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_109.jpg?dateline=1367581062\" width=\"47\"/></a>,\n",
" <a href=\"http://stockmarketpilipinas.com/user-109.html\">silverhand</a>,\n",
" <a href=\"reputation.php?uid=109\"><strong class=\"reputation_positive\">15</strong></a>,\n",
" <a href=\"thread-337-post-345056.html#pid345056\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,533</a>,\n",
" <a href=\"attachment.php?aid=3843\" target=\"_blank\" title=\"01-24-2019, 05:08 PM\">stockQuotes_01242019.csv</a>,\n",
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&amp;replyto=345056\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n",
" <a id=\"pid345079\" name=\"pid345079\"></a>,\n",
" <a href=\"user-268.html\"><img alt=\"\" height=\"47\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_268.jpg?dateline=1471506964\" width=\"70\"/></a>,\n",
" <a href=\"http://stockmarketpilipinas.com/user-268.html\"><span style=\"color: #41A317;\"><strong><em>Paul_G</em></strong></span></a>,\n",
" <a href=\"reputation.php?uid=268\"><strong class=\"reputation_positive\">79</strong></a>,\n",
" <a href=\"thread-337-post-345079.html#pid345079\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,534</a>,\n",
" <a href=\"attachment.php?aid=3844\" target=\"_blank\" title=\"Yesterday, 04:12 PM\">stockQuotes_01252019.csv</a>,\n",
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&amp;replyto=345079\" title=\"Quote this message in a reply\"><span>Reply</span></a>]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"title"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment