Skip to content

Instantly share code, notes, and snippets.

@umihico
Last active June 24, 2019 04:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save umihico/7b395560a69f5b875a2803bd89364e9a to your computer and use it in GitHub Desktop.
Save umihico/7b395560a69f5b875a2803bd89364e9a to your computer and use it in GitHub Desktop.
佐川の支店一覧を出力するスクリプト
from selenium.webdriver import Chrome
from umihico.xlsx import to_xlsx
from time import sleep
c=Chrome()
urls=[
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=17",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=15",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=16",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=14",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=13",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=12",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=18",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=11",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=9",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=8",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=10",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=7",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=6",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=5",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=4",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=3",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=2",
"https://www2.sagawa-exp.co.jp/company/branch/list/?sub_b_id=1",
]
col_header=[
"url",
"担当営業所",
"営業所番号",
"住所",
"集荷専用電話番号",
"お問い合わせ電話番号",
"FAX番号",
"お持ち込み締切時間(当日発送分)",
"飛脚クール便お持ち込み締切時間(当日発送分)",
"荷物の受け取り可能時間(営業所受取)",
]
rows=[col_header,]
for url in urls:
c.get(url)
sleep(1)
for elm_office in c.find_elements_by_xpath("//div[@id='main']/section[@class='section']/dl"):
row=[url,]
for elm in elm_office.find_elements_by_xpath("./dt//span"):
print(elm.text)
row.append(elm.text)
if len(row)==2:
row.append('');
for elm in elm_office.find_elements_by_xpath("./dd//li"):
print(elm.text)
row.append(elm.text)
row=[s.replace("営業所番号:","").replace("集荷専用電話番号:","").replace("お問い合わせ電話番号:","").replace("FAX番号:","") for s in row]
rows.append(row)
for pref_i in range(47):
pref_url=f"https://www2.sagawa-exp.co.jp/send/branch_search/tanto/select/?p_id={pref_i+1}"
c.get(pref_url)
sleep(1)
urls=[e.get_attribute('href') for e in c.find_elements_by_xpath("//table[@class='addListTable']//a")]
for url in urls:
c.get(url)
sleep(1)
row_dict={"url":url}
for tr in c.find_elements_by_xpath("//table[@class='table_form01']//tr"):
row_dict[tr.find_element_by_xpath('./th').text]=tr.find_element_by_xpath('./td').text
row=[row_dict.get(h,"") for h in col_header]
print(row)
rows.append(row)
to_xlsx('sagawas.xlsx',rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment