Skip to content

Instantly share code, notes, and snippets.

@yuwen41200
Last active July 6, 2018 05:54
Show Gist options
  • Save yuwen41200/ce16bafa60e8139867a8e154a60ee26a to your computer and use it in GitHub Desktop.
Save yuwen41200/ce16bafa60e8139867a8e154a60ee26a to your computer and use it in GitHub Desktop.
Automatically download all handouts from NCTU e3.
.idea/
secret
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import shutil
from time import sleep
from urllib.parse import unquote
from selenium import webdriver
course_pg = 0
course_no = 0
p = re.compile(r'id="ctl00_ContentPlaceHolder1_dgCourseHandout_.+?_lnkFile"')
with open("secret") as file:
usr = file.readline().strip()
pwd = file.readline().strip()
src_dir = file.readline().strip()
dest_dir = file.readline().strip()
driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get("https://dcpc.nctu.edu.tw")
print(driver.title)
driver.find_element_by_id("txtAccount").send_keys(usr)
driver.find_element_by_id("txtPwd").send_keys(pwd)
driver.find_element_by_id("btnLoginIn").click()
sleep(5)
print(driver.find_element_by_id("ctl00_lbWelcomeT").text)
while True:
driver.find_element_by_link_text("歷年課程").click()
sleep(5)
for _ in range(course_pg):
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03")
assert "not-allowed" not in next_button.get_attribute("style")
next_button.click()
sleep(5)
course_links = driver.find_elements_by_link_text("進入課程")
if course_no == len(course_links):
print("page", course_pg + 1, ", number", course_no)
course_no = 0
course_pg += 1
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03")
if "not-allowed" in next_button.get_attribute("style"):
driver.quit()
exit(0)
else:
next_button.click()
sleep(5)
course_links = driver.find_elements_by_link_text("進入課程")
course_links[course_no].click()
sleep(5)
course_name = driver.find_element_by_id("ctl00_lbCurrentCourseName").text
print(course_name)
driver.find_element_by_link_text("教材列表").click()
sleep(5)
while True:
html = driver.page_source
iterator = p.finditer(html)
for match in iterator:
handout_id = match.group()
driver.find_element_by_id(handout_id[4:-1]).click()
sleep(5)
driver.switch_to.window(driver.window_handles[-1])
driver.switch_to.frame("frmMain")
driver.find_element_by_id("ctl00_ContentPlaceHolder1_btnUrl").click()
sleep(5)
driver.close()
driver.switch_to.window(driver.window_handles[0])
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03")
if "not-allowed" in next_button.get_attribute("style"):
break
else:
next_button.click()
sleep(5)
driver.find_element_by_id("ctl00_btnBackIndex").click()
sleep(5)
course_no += 1
os.makedirs(os.path.join(dest_dir, course_name), exist_ok=True)
for filename in os.listdir(src_dir):
shutil.move(
os.path.join(src_dir, filename),
os.path.join(dest_dir, course_name, unquote(filename))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment