Last active
April 23, 2019 12:18
-
-
Save takemikami/4b462b66fc009f5301d41036d934f92b to your computer and use it in GitHub Desktop.
技術書典: 頒布物支払い状況 をCSVにする使い捨てスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 技術書典: 頒布物支払い状況 をCSVにする使い捨てスクリプト | |
# わたしの場合はこれでCSV変換できましたが、 | |
# どんなパターンがあるか分からないので、変換できるかは分からないです | |
# | |
# 1. 頒布物支払い状況をChromeで開く | |
# 2. ファイル→ページを別名で保存 | |
# 3. 保存したhtmlをorder.htmlに名称変更 | |
# 4. pip install beautifulsoup4 | |
# 5. pip install html5lib | |
# 6. このファイルをorder.htmlと同じディレクトリに置く | |
# 7. python parse_techbookfest_orders.py > orders.csv | |
from bs4 import BeautifulSoup | |
with open("./order.html", mode='r') as f: | |
html = f.read() | |
soup = BeautifulSoup(html, "html5lib") | |
idx = 1 | |
mat_list = soup.find_all("mat-list") | |
# オーダーのループ | |
for mat in mat_list: | |
order = {"index": idx} | |
dtl_item = mat.find_all("mat-list-item") | |
for dtls in dtl_item[0].find_all("p"): | |
if dtls.text.startswith("ステータス:"): | |
order['status'] = dtls.text[6:].strip() | |
if dtls.text.startswith("作成日時:"): | |
order['order_datetime'] = dtls.text[6:].strip() | |
if dtls.text.startswith("キャンセル期限:"): | |
order['cancel_limit'] = dtls.text[8:].strip() | |
# 詳細のループ | |
for i in range(1, len(dtl_item)): | |
for items in dtl_item[i].find_all("div", attrs = {"class", "mat-list-text"}): | |
detail = { | |
'title': items.find("h4").text, | |
'quantity': items.find("p").text.split("¥")[0].strip(), | |
'price': items.find("p").text.split("¥")[1].strip() | |
} | |
print(','.join([ | |
str(order['index']), | |
order['status'], | |
order['order_datetime'], | |
order['cancel_limit'], | |
detail['title'], | |
detail['quantity'], | |
detail['price'], | |
])) | |
idx += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment