Last active
December 7, 2018 03:07
-
-
Save baobao/8cec987ef083ae858f72272789b7f040 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""" | |
http://uta.pw/sakusibbs/ のマイページに表示されているお気に入り一覧を取得するスクリプト | |
1. ログイン処理 | |
2. マイページURLの取得 | |
3. マイページHTMLのパース | |
""" | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import urljoin | |
# ユーザー名とパスワード設定 | |
USER = "ユーザーID" | |
PASS = "パスワード" | |
#セッションスタート | |
session = requests.session() | |
login_info = { | |
"username_mmlbbs6":USER, | |
"password_mmlbbs6" : PASS, | |
"back":"index.php", | |
"mml_id":"0" | |
} | |
url_login = "http://uta.pw/sakusibbs/users.php?action=login&m=try" | |
res = session.post(url_login, data=login_info) | |
# エラーならここで例外発生 | |
res.raise_for_status() | |
# ログインページパース | |
soup = BeautifulSoup(res.text, "html.parser") | |
# 確認のためチェックしたソース確認 | |
# print(soup.prettify()) | |
# ログイン済みチェック | |
a = soup.select_one(".islogin a") | |
if a is None: | |
print("ログイン失敗") | |
quit() | |
url_mypage = urljoin(url_login, a.attrs["href"]) | |
print("マイページURL : ", url_mypage) | |
#マイページHTML取得 | |
res = session.get(url_mypage) | |
res.raise_for_status() | |
# マイページパース | |
soup = BeautifulSoup(res.text, "html.parser") | |
links = soup.select("#favlist li > a") | |
for a in links: | |
href = a.attrs["href"] | |
title = a.get_text() | |
print("-",title,">",href) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment