takemikami/parse_techbookfest_orders.py

## parse_techbookfest_orders.py
# 技術書典: 頒布物支払い状況 をCSVにする使い捨てスクリプト
#  わたしの場合はこれでCSV変換できましたが、
#  どんなパターンがあるか分からないので、変換できるかは分からないです
#
# 1. 頒布物支払い状況をChromeで開く
# 2. ファイル→ページを別名で保存
# 3. 保存したhtmlをorder.htmlに名称変更
# 4. pip install beautifulsoup4
# 5. pip install html5lib
# 6. このファイルをorder.htmlと同じディレクトリに置く
# 7. python parse_techbookfest_orders.py > orders.csv

from bs4 import BeautifulSoup

with open("./order.html", mode='r') as f:
    html = f.read()
soup = BeautifulSoup(html, "html5lib")

idx = 1
mat_list = soup.find_all("mat-list")

# オーダーのループ
for mat in mat_list:
    order = {"index": idx}
    dtl_item = mat.find_all("mat-list-item")
    for dtls in dtl_item[0].find_all("p"):
        if dtls.text.startswith("ステータス:"):
            order['status'] = dtls.text[6:].strip()
        if dtls.text.startswith("作成日時:"):
            order['order_datetime'] = dtls.text[6:].strip()
        if dtls.text.startswith("キャンセル期限:"):
            order['cancel_limit'] = dtls.text[8:].strip()

    # 詳細のループ
    for i in range(1, len(dtl_item)):
        for items in dtl_item[i].find_all("div", attrs = {"class", "mat-list-text"}):
            detail = {
                'title': items.find("h4").text,
                'quantity': items.find("p").text.split("￥")[0].strip(),
                'price': items.find("p").text.split("￥")[1].strip()
            }
            print(','.join([
                str(order['index']),
                order['status'],
                order['order_datetime'],
                order['cancel_limit'],
                detail['title'],
                detail['quantity'],
                detail['price'],
            ]))
    idx += 1
	# 技術書典: 頒布物支払い状況をCSVにする使い捨てスクリプト
	# わたしの場合はこれでCSV変換できましたが、
	# どんなパターンがあるか分からないので、変換できるかは分からないです
	#
	# 1. 頒布物支払い状況をChromeで開く
	# 2. ファイル→ページを別名で保存
	# 3. 保存したhtmlをorder.htmlに名称変更
	# 4. pip install beautifulsoup4
	# 5. pip install html5lib
	# 6. このファイルをorder.htmlと同じディレクトリに置く
	# 7. python parse_techbookfest_orders.py > orders.csv

	from bs4 import BeautifulSoup

	with open("./order.html", mode='r') as f:
	html = f.read()
	soup = BeautifulSoup(html, "html5lib")

	idx = 1
	mat_list = soup.find_all("mat-list")

	# オーダーのループ
	for mat in mat_list:
	order = {"index": idx}
	dtl_item = mat.find_all("mat-list-item")
	for dtls in dtl_item[0].find_all("p"):
	if dtls.text.startswith("ステータス:"):
	order['status'] = dtls.text[6:].strip()
	if dtls.text.startswith("作成日時:"):
	order['order_datetime'] = dtls.text[6:].strip()
	if dtls.text.startswith("キャンセル期限:"):
	order['cancel_limit'] = dtls.text[8:].strip()

	# 詳細のループ
	for i in range(1, len(dtl_item)):
	for items in dtl_item[i].find_all("div", attrs = {"class", "mat-list-text"}):
	detail = {
	'title': items.find("h4").text,
	'quantity': items.find("p").text.split("￥")[0].strip(),
	'price': items.find("p").text.split("￥")[1].strip()
	}
	print(','.join([
	str(order['index']),
	order['status'],
	order['order_datetime'],
	order['cancel_limit'],
	detail['title'],
	detail['quantity'],
	detail['price'],
	]))
	idx += 1