Created
January 26, 2019 12:54
-
-
Save rs6000/9c66f5d6575ca7f285adc552e2e8132b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-01-26T12:45:39.087055Z", | |
"start_time": "2019-01-26T12:45:38.889038Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests, re, os, csv, wget, time\n", | |
"\n", | |
"from bs4 import BeautifulSoup\n", | |
"base_url = \"http://stockmarketpilipinas.com/\"\n", | |
"url='http://stockmarketpilipinas.com/thread-337.html'\n", | |
"url2='http://stockmarketpilipinas.com/thread-337-page-454.html'\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-01-26T12:45:40.187889Z", | |
"start_time": "2019-01-26T12:45:39.945738Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"檔案名稱:stockquotes_01252019.csv\n", | |
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3844\n" | |
] | |
} | |
], | |
"source": [ | |
"page_html = requests.get(url2)\n", | |
"page_soup = BeautifulSoup(page_html.text, 'lxml')\n", | |
"\n", | |
"#抓在附件檔裡的csv\n", | |
"title = page_soup.find('div', {'id': 'posts'}).find_all('fieldset')\n", | |
"for i in title:\n", | |
" #取得檔名 + 轉成小寫\n", | |
" f_name = i.find('a').text.lower()\n", | |
" # 取得檔案連結\n", | |
" f_href = base_url+i.find('a')['href']\n", | |
"print(\"檔案名稱:{}\\n檔案連結:{}\".format(f_name,f_href))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-01-26T12:45:41.348034Z", | |
"start_time": "2019-01-26T12:45:41.326947Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<fieldset>\n", | |
" <legend><strong>Attached Files</strong></legend>\n", | |
" <!-- start: postbit_attachments_attachment -->\n", | |
" <br/><!-- start: attachment_icon -->\n", | |
" <img alt=\".csv\" border=\"0\" src=\"http://stockmarketpilipinas.com/images/attachtypes/xls.gif\" title=\"\"/>\n", | |
" <!-- end: attachment_icon --> <a href=\"attachment.php?aid=3844\" target=\"_blank\" title=\"Yesterday, 04:12 PM\">stockQuotes_01252019.csv</a> (Size: 11.79 KB / Downloads: 221)\n", | |
" <!-- end: postbit_attachments_attachment -->\n", | |
" </fieldset>]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"title" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-01-26T12:46:52.443659Z", | |
"start_time": "2019-01-26T12:46:52.185881Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"檔案名稱:stockquotes_01242019.csv\n", | |
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3843\n", | |
"檔案名稱:stockquotes_01252019.csv\n", | |
"檔案連結:http://stockmarketpilipinas.com/attachment.php?aid=3844\n" | |
] | |
} | |
], | |
"source": [ | |
"page_html = requests.get(url2)\n", | |
"page_soup = BeautifulSoup(page_html.text, 'lxml')\n", | |
"\n", | |
"title = page_soup.find('div', {'id': 'posts'}).find_all('a')\n", | |
"for i in title:\n", | |
" if i.text.endswith(\".csv\"):\n", | |
" #取得檔名 + 轉成小寫\n", | |
" f_name = i.text.lower()\n", | |
" # 取得檔案連結\n", | |
" f_href = base_url+i['href']\n", | |
" print(\"檔案名稱:{}\\n檔案連結:{}\".format(f_name,f_href))\n", | |
" \n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-01-26T12:46:10.078107Z", | |
"start_time": "2019-01-26T12:46:10.064967Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[<a id=\"pid345054\" name=\"pid345054\"></a>,\n", | |
" <a href=\"user-1.html\"><img alt=\"\" height=\"59\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_1.gif?dateline=1422259061\" width=\"70\"/></a>,\n", | |
" <a href=\"http://stockmarketpilipinas.com/user-1.html\"><span style=\"color: orange;\"><strong><em>Ollie</em></strong></span></a>,\n", | |
" <a href=\"reputation.php?uid=1\"><strong class=\"reputation_positive\">198</strong></a>,\n", | |
" <a href=\"thread-337-post-345054.html#pid345054\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,531</a>,\n", | |
" <a class=\"mycode_url\" href=\"https://pixiutrades.wordpress.com/2018/12/20/focus-an-intensive-4-week-stock-market-trading-workshop/\" rel=\"noopener\" target=\"_blank\">https://pixiutrades.wordpress.com/2018/1...-workshop/</a>,\n", | |
" <a class=\"postbit_website\" href=\"http://www.stockmarketpilipinas.com\" rel=\"noopener\" target=\"_blank\" title=\"Visit this user's website\"><span>Website</span></a>,\n", | |
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&replyto=345054\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n", | |
" <a id=\"pid345055\" name=\"pid345055\"></a>,\n", | |
" <a href=\"user-1.html\"><img alt=\"\" height=\"59\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_1.gif?dateline=1422259061\" width=\"70\"/></a>,\n", | |
" <a href=\"http://stockmarketpilipinas.com/user-1.html\"><span style=\"color: orange;\"><strong><em>Ollie</em></strong></span></a>,\n", | |
" <a href=\"reputation.php?uid=1\"><strong class=\"reputation_positive\">198</strong></a>,\n", | |
" <a href=\"thread-337-post-345055.html#pid345055\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,532</a>,\n", | |
" <a class=\"mycode_url\" href=\"https://pixiutrades.wordpress.com/2018/12/20/focus-an-intensive-4-week-stock-market-trading-workshop/\" rel=\"noopener\" target=\"_blank\">https://pixiutrades.wordpress.com/2018/1...-workshop/</a>,\n", | |
" <a class=\"postbit_website\" href=\"http://www.stockmarketpilipinas.com\" rel=\"noopener\" target=\"_blank\" title=\"Visit this user's website\"><span>Website</span></a>,\n", | |
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&replyto=345055\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n", | |
" <a id=\"pid345056\" name=\"pid345056\"></a>,\n", | |
" <a href=\"user-109.html\"><img alt=\"\" height=\"70\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_109.jpg?dateline=1367581062\" width=\"47\"/></a>,\n", | |
" <a href=\"http://stockmarketpilipinas.com/user-109.html\">silverhand</a>,\n", | |
" <a href=\"reputation.php?uid=109\"><strong class=\"reputation_positive\">15</strong></a>,\n", | |
" <a href=\"thread-337-post-345056.html#pid345056\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,533</a>,\n", | |
" <a href=\"attachment.php?aid=3843\" target=\"_blank\" title=\"01-24-2019, 05:08 PM\">stockQuotes_01242019.csv</a>,\n", | |
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&replyto=345056\" title=\"Quote this message in a reply\"><span>Reply</span></a>,\n", | |
" <a id=\"pid345079\" name=\"pid345079\"></a>,\n", | |
" <a href=\"user-268.html\"><img alt=\"\" height=\"47\" src=\"http://stockmarketpilipinas.com/uploads/avatars/avatar_268.jpg?dateline=1471506964\" width=\"70\"/></a>,\n", | |
" <a href=\"http://stockmarketpilipinas.com/user-268.html\"><span style=\"color: #41A317;\"><strong><em>Paul_G</em></strong></span></a>,\n", | |
" <a href=\"reputation.php?uid=268\"><strong class=\"reputation_positive\">79</strong></a>,\n", | |
" <a href=\"thread-337-post-345079.html#pid345079\" title=\"RE: CSV Daily Quotation Report by Silime\">#4,534</a>,\n", | |
" <a href=\"attachment.php?aid=3844\" target=\"_blank\" title=\"Yesterday, 04:12 PM\">stockQuotes_01252019.csv</a>,\n", | |
" <a class=\"postbit_quote\" href=\"newreply.php?tid=337&replyto=345079\" title=\"Quote this message in a reply\"><span>Reply</span></a>]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"title" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment