Last active
June 24, 2019 04:50
-
-
Save umihico/7b395560a69f5b875a2803bd89364e9a to your computer and use it in GitHub Desktop.
佐川の支店一覧を出力するスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium.webdriver import Chrome | |
from umihico.xlsx import to_xlsx | |
from time import sleep | |
c=Chrome() | |
urls=[ | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=17", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=15", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=16", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=14", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=13", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=12", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=18", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=11", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=9", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=8", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=10", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=7", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=6", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=5", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=4", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=3", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=2", | |
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=1", | |
] | |
col_header=[ | |
"url", | |
"担当営業所", | |
"営業所番号", | |
"住所", | |
"集荷専用電話番号", | |
"お問い合わせ電話番号", | |
"FAX番号", | |
"お持ち込み締切時間(当日発送分)", | |
"飛脚クール便お持ち込み締切時間(当日発送分)", | |
"荷物の受け取り可能時間(営業所受取)", | |
] | |
rows=[col_header,] | |
for url in urls: | |
c.get(url) | |
sleep(1) | |
for elm_office in c.find_elements_by_xpath("//div[@id='main']/section[@class='section']/dl"): | |
row=[url,] | |
for elm in elm_office.find_elements_by_xpath("./dt//span"): | |
print(elm.text) | |
row.append(elm.text) | |
if len(row)==2: | |
row.append(''); | |
for elm in elm_office.find_elements_by_xpath("./dd//li"): | |
print(elm.text) | |
row.append(elm.text) | |
row=[s.replace("営業所番号:","").replace("集荷専用電話番号:","").replace("お問い合わせ電話番号:","").replace("FAX番号:","") for s in row] | |
rows.append(row) | |
for pref_i in range(47): | |
pref_url=f"https://www2.sagawa-exp.co.jp/send/branch_search/tanto/select/?p_id={pref_i+1}" | |
c.get(pref_url) | |
sleep(1) | |
urls=[e.get_attribute('href') for e in c.find_elements_by_xpath("//table[@class='addListTable']//a")] | |
for url in urls: | |
c.get(url) | |
sleep(1) | |
row_dict={"url":url} | |
for tr in c.find_elements_by_xpath("//table[@class='table_form01']//tr"): | |
row_dict[tr.find_element_by_xpath('./th').text]=tr.find_element_by_xpath('./td').text | |
row=[row_dict.get(h,"") for h in col_header] | |
print(row) | |
rows.append(row) | |
to_xlsx('sagawas.xlsx',rows) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment