Skip to content

Instantly share code, notes, and snippets.

@definev
Created August 9, 2021 10:04
Show Gist options
  • Save definev/8753de0ebc078d33791caae279730bea to your computer and use it in GitHub Desktop.
Save definev/8753de0ebc078d33791caae279730bea to your computer and use it in GitHub Desktop.
Crawl dữ liệu thi cấp 3 Bắc Ninh 2020
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
def getSBD(sbd):
if (sbd < 10):
return f"02000{sbd}"
elif (sbd < 100):
return f"0200{sbd}"
elif (sbd < 1000):
return f"020{sbd}"
else:
return f"02{sbd}"
chrome_options = Options()
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--window-size=1920x1080")
driver = webdriver.Chrome(chrome_options=chrome_options,
executable_path="D:/chromedriver_win32/chromedriver.exe")
url = "http://bacninh.edu.vn/tra-cuu/bang-diem-final"
header = ['Số báo danh', 'Họ và tên', 'Ngày sinh', 'Trường', 'Điểm Khuyến khích', 'Ngữ Văn', 'Tiếng Anh', 'Toán Tự Luận',
'Toán Trắc Nghiệm', 'Tổng toán', 'Môn Chuyên', 'Tổng Chuyên', 'Tổng đại trà', 'Đỗ Chuyên', 'Đỗ đại trà', 'Trượt']
employee_file = open('diem-thi.csv', mode='w',
encoding='utf-8-sig', newline='')
employee_writer = csv.writer(employee_file)
employee_writer.writerow(header)
driver.get(url)
time.sleep(1)
for i in range(30):
sbd_field = driver.find_element_by_css_selector(
"#form15 > input.search_init.text_filter.form-control.wp-100")
sbd_field.send_keys(getSBD(i + 1))
submit_button = driver.find_element_by_css_selector('#form15 > button')
submit_button.click()
row = []
for index in range(16):
sbd = driver.find_element_by_css_selector(
f'#module1015 > div.table-responsive > table > tbody > tr > td:nth-child({index + 1})')
try:
row.append(sbd.text)
except:
sbd = driver.find_element_by_css_selector(
f'#module1015 > div.table-responsive > table > tbody > tr > td:nth-child({index + 1})')
try:
row.append(sbd.text)
except:
row.append("")
print(f"error at {getSBD(i + 1)} with field {index + 1}")
employee_writer.writerow(row)
sbd_field.clear()
employee_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment