Last active
June 9, 2019 16:24
-
-
Save rinjugatla/c36d1fcba3a6d18756e3ecd48c72f4c0 to your computer and use it in GitHub Desktop.
freemlのメール保存用コード ご利用は自己責任で https://rinatl.blog.fc2.com/blog-entry-793.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################## | |
# 参考 | |
# https://qiita.com/shunyooo/items/36af8bcb501baf8c7014 | |
# 【Python3】ログイン機能付サイトでスクレイピング【requests】【BeautifulSoup】 | |
################## | |
# 設定が必要な項目 | |
# メールアドレスとパスワードの指定 | |
mail = "hoge@hoge.com" | |
password = "hogehoge" | |
# メーリングリストのアドレス | |
mailBaseUrl = "https://www.freeml.com/hogehoge/" | |
# メーリングリストの最後のメールの番号 | |
# https://www.freeml.com/hogehoge/list から確認可能 | |
mailLastNumber = 100 | |
# メール取得時間間隔(秒) | |
randomMin = 10 | |
randomMax = 20 | |
################## | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import urljoin | |
from pprint import pprint | |
import random | |
import time | |
def ReplacEescapeChar(text): | |
return text.replace("\r\n", "\n").replace("\t", "") | |
session = requests.session() | |
login_info = { | |
"email":mail, | |
"password":password, | |
"save_password":"save_password" | |
} | |
url_login = "https://www.freeml.com/ep.umzx/grid/General/node/SpLoginProcess" | |
res = session.post(url_login, data=login_info) | |
res.raise_for_status() | |
selectorMailBase = "#main_column > div > div.main_box2 > div.table_bg > table > " | |
selectorMailSubject = "tr:nth-of-type(1) > td > p > span" | |
selectorMailFromNoneName = "tr:nth-of-type(2) > td" | |
selecotrMailFromName = "tr:nth-of-type(2) > td > a" | |
selecotrMailSendTime = "tr:nth-of-type(3) > td" | |
selectorMailBody = "tr:nth-of-type(5) > td > div.mlc_text_area1 > p" | |
mails = {} | |
for mailNumber in range(mailLastNumber + 1): | |
mailUrl = mailBaseUrl + str(mailNumber) | |
response = session.get(mailUrl) | |
if response.status_code == 200: | |
if mailNumber % 50 == 0: | |
print(mailNumber) | |
soup = BeautifulSoup(response.text) | |
mailSubject = soup.select_one(selectorMailBase + selectorMailSubject) | |
mailFromNoneName = soup.select_one(selectorMailBase + selectorMailFromNoneName) | |
mailFromName = soup.select_one(selectorMailBase + selecotrMailFromName) | |
mailSendTime = soup.select_one(selectorMailBase + selecotrMailSendTime) | |
mailBody = soup.select_one(selectorMailBase + selectorMailBody) | |
if mailSubject != None: | |
mails[mailNumber] = { | |
"subject": ReplacEescapeChar(mailSubject.text), | |
"name": ReplacEescapeChar(mailFromName.text) if mailFromName != None else ReplacEescapeChar(mailFromNoneName.text), | |
"time": mailSendTime.text, | |
"number": mailNumber, | |
"body": ReplacEescapeChar(mailBody.text) | |
} | |
time.sleep(random.randint(randomMin, randomMax)) | |
# 保存 | |
import json | |
with open("mails.json", "w", encoding="utf-8") as outputFile: | |
json.dump(mails, outputFile, ensure_ascii=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment