Last active
May 26, 2020 13:02
-
-
Save ABlueStudent/09ecaf5bcf216ebde1b30776d57f319e to your computer and use it in GitHub Desktop.
作業 寫一個把系上教授的資料通通爬下來的爬蟲
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import csv | |
class CSIEProf: | |
def Extract(self, url): | |
Data = BeautifulSoup(requests.get(url).text, "html.parser").findAll("div", "card") | |
return list( | |
zip( | |
map(lambda Data: Data.find("h2", "card-header").text.replace('\u2003', ""), Data), | |
map(lambda Data: Data.find("p", "card-meta").text.replace('\u2003', ""), Data), | |
map(lambda Data: list(map(lambda buf: buf.text.replace("\t", "").replace("\n", "").replace(" ", "").replace('\u2003', ""), Data.findAll("p", "card-description"))), Data) | |
) | |
) | |
def Write(self, Data, FileName): | |
with open(FileName + ".csv", "w", newline='', encoding="big5") as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow([FileName]) | |
writer.writerow(["姓名", "級職", "學歷", "辦公室", "分機", "E-mail"]) | |
writer.writerows(Data) | |
def Format(self, Data): | |
final = [] | |
for Prof in Data: | |
result = ["", "", "", "", "", ""] | |
result[0] = Prof[0] | |
result[1] = Prof[1] | |
for description in Prof[2]: | |
split = description.split(":") | |
if split[0] == "學歷": | |
result[2] = split[1] | |
elif split[0] == "辦公室": | |
result[3] = split[1] | |
elif split[0] == "分機": | |
result[4] = split[1] | |
elif split[0] == "E-mail": | |
result[5] = split[1] | |
else: | |
result.append(split[1]) | |
final.append(result) | |
return final | |
def main(): | |
Prof = CSIEProf() | |
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/professors")), "專任教授") | |
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/associate-professors")), "專任副教授") | |
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/assistant-professors")), "專任助理教授") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment