Created
August 9, 2021 10:04
-
-
Save definev/8753de0ebc078d33791caae279730bea to your computer and use it in GitHub Desktop.
Crawl dữ liệu thi cấp 3 Bắc Ninh 2020
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
import time | |
def getSBD(sbd): | |
if (sbd < 10): | |
return f"02000{sbd}" | |
elif (sbd < 100): | |
return f"0200{sbd}" | |
elif (sbd < 1000): | |
return f"020{sbd}" | |
else: | |
return f"02{sbd}" | |
chrome_options = Options() | |
chrome_options.add_argument("--incognito") | |
chrome_options.add_argument("--window-size=1920x1080") | |
driver = webdriver.Chrome(chrome_options=chrome_options, | |
executable_path="D:/chromedriver_win32/chromedriver.exe") | |
url = "http://bacninh.edu.vn/tra-cuu/bang-diem-final" | |
header = ['Số báo danh', 'Họ và tên', 'Ngày sinh', 'Trường', 'Điểm Khuyến khích', 'Ngữ Văn', 'Tiếng Anh', 'Toán Tự Luận', | |
'Toán Trắc Nghiệm', 'Tổng toán', 'Môn Chuyên', 'Tổng Chuyên', 'Tổng đại trà', 'Đỗ Chuyên', 'Đỗ đại trà', 'Trượt'] | |
employee_file = open('diem-thi.csv', mode='w', | |
encoding='utf-8-sig', newline='') | |
employee_writer = csv.writer(employee_file) | |
employee_writer.writerow(header) | |
driver.get(url) | |
time.sleep(1) | |
for i in range(30): | |
sbd_field = driver.find_element_by_css_selector( | |
"#form15 > input.search_init.text_filter.form-control.wp-100") | |
sbd_field.send_keys(getSBD(i + 1)) | |
submit_button = driver.find_element_by_css_selector('#form15 > button') | |
submit_button.click() | |
row = [] | |
for index in range(16): | |
sbd = driver.find_element_by_css_selector( | |
f'#module1015 > div.table-responsive > table > tbody > tr > td:nth-child({index + 1})') | |
try: | |
row.append(sbd.text) | |
except: | |
sbd = driver.find_element_by_css_selector( | |
f'#module1015 > div.table-responsive > table > tbody > tr > td:nth-child({index + 1})') | |
try: | |
row.append(sbd.text) | |
except: | |
row.append("") | |
print(f"error at {getSBD(i + 1)} with field {index + 1}") | |
employee_writer.writerow(row) | |
sbd_field.clear() | |
employee_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment