Skip to content

Instantly share code, notes, and snippets.

@cwchentw
Last active August 28, 2023 05:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cwchentw/50b71fa9b7aab702af0003ec6002cc6e to your computer and use it in GitHub Desktop.
Save cwchentw/50b71fa9b7aab702af0003ec6002cc6e to your computer and use it in GitHub Desktop.
Taiwan Foreclosure Data Crawler as a Python script
#!/usr/bin/env python
##############################################################################
# fetchForeclosureData.py #
# #
# Requirements #
# #
# - Python 3 #
# - Selenium package for Python #
# - The web driver for Chrome #
# #
# Copyright (c) 2018-2020, Michael Chen; Apache 2.0 #
##############################################################################
import csv
import datetime
import os
import sys
import time
from selenium import webdriver
targetCounty = "新北市"
targetDistrict = "板橋區"
now = datetime.datetime.now()
threeYearAgo = str(now.year - 1911 - 3)
downloadPath = os.path.dirname(os.path.abspath(__file__))
# Change Chrome options: (1) default downloading path, (2) disable popup blocking.
options = webdriver.ChromeOptions()
prefs = {}
prefs["download.default_directory"] = downloadPath
prefs["profile.default_content_settings"] = { "popups": 1 }
options.add_experimental_option('prefs', prefs)
# Create a new instance of the Chrome driver
driver = webdriver.Chrome(options=options)
# Go to the foreclosure house page.
driver.get("https://pip.moi.gov.tw/V2/A/SCRA0601.aspx")
time.sleep(6) # Wait the page to fresh.
# Trick to handle site redirecting.
driver.get("https://pip.moi.gov.tw/V2/A/SCRA0601.aspx")
time.sleep(6) # Wait the page to fresh.
# Open the county menu.
countyMenu = driver.find_element_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlCity\"]")
countyMenu.click()
time.sleep(2) # Wait the page to fresh.
# Select targte county.
countyOptions = driver.find_elements_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlCity\"] option")
for option in countyOptions:
if targetCounty in option.text:
option.click()
break
time.sleep(4) # Wait the page to fresh.
# Open the district menu.
districtMenu = driver.find_element_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlTown\"]")
districtMenu.click()
time.sleep(2) # Wait the page to fresh.
# Select targte district.
districtOptions = driver.find_elements_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlTown\"] option")
for option in districtOptions:
if targetDistrict in option.text:
option.click()
break
time.sleep(2) # Wait the page to fresh.
# Switch district menu.
districtMenu.click()
time.sleep(2) # Wait the page to fresh.
# Open the starting year menu.
startYearMenu = driver.find_element_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlYear1\"]")
startYearMenu.click()
time.sleep(2) # Wait the page to fresh.
# Select the starting year.
startYears = driver.find_elements_by_css_selector("select[name=\"ctl00$ContentPlaceHolder1$ddlYear1\"] option")
for option in startYears:
if threeYearAgo in option.text:
option.click()
break
time.sleep(2) # Wait the page to fresh.
# Close the starting year menu.
startYearMenu.click()
time.sleep(2) # Wait the page to fresh.
# Submit our query.
submitButton = driver.find_element_by_css_selector("[name=\"ctl00$ContentPlaceHolder1$btnQuery\"]")
submitButton.click()
time.sleep(4) # Wait the page to fresh.
# Remove old Excel file.
excelPath = os.path.join(downloadPath, "%5b拍賣拍定價%5d查詢結果下載.xls")
if os.path.exists(excelPath):
os.remove(excelPath)
# Download new Excel file.
link = driver.find_element_by_css_selector("#ctl00_ContentPlaceHolder1_btnExportExcel")
link.click()
time.sleep(5) # Wait the page to fresh.
# Close the browser.
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment