Skip to content

Instantly share code, notes, and snippets.

@ABlueStudent
Last active May 26, 2020 13:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ABlueStudent/09ecaf5bcf216ebde1b30776d57f319e to your computer and use it in GitHub Desktop.
Save ABlueStudent/09ecaf5bcf216ebde1b30776d57f319e to your computer and use it in GitHub Desktop.
作業 寫一個把系上教授的資料通通爬下來的爬蟲
from bs4 import BeautifulSoup
import requests
import csv
class CSIEProf:
def Extract(self, url):
Data = BeautifulSoup(requests.get(url).text, "html.parser").findAll("div", "card")
return list(
zip(
map(lambda Data: Data.find("h2", "card-header").text.replace('\u2003', ""), Data),
map(lambda Data: Data.find("p", "card-meta").text.replace('\u2003', ""), Data),
map(lambda Data: list(map(lambda buf: buf.text.replace("\t", "").replace("\n", "").replace(" ", "").replace('\u2003', ""), Data.findAll("p", "card-description"))), Data)
)
)
def Write(self, Data, FileName):
with open(FileName + ".csv", "w", newline='', encoding="big5") as csvfile:
writer = csv.writer(csvfile)
writer.writerow([FileName])
writer.writerow(["姓名", "級職", "學歷", "辦公室", "分機", "E-mail"])
writer.writerows(Data)
def Format(self, Data):
final = []
for Prof in Data:
result = ["", "", "", "", "", ""]
result[0] = Prof[0]
result[1] = Prof[1]
for description in Prof[2]:
split = description.split(":")
if split[0] == "學歷":
result[2] = split[1]
elif split[0] == "辦公室":
result[3] = split[1]
elif split[0] == "分機":
result[4] = split[1]
elif split[0] == "E-mail":
result[5] = split[1]
else:
result.append(split[1])
final.append(result)
return final
def main():
Prof = CSIEProf()
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/professors")), "專任教授")
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/associate-professors")), "專任副教授")
Prof.Write(Prof.Format(Prof.Extract("https://csie.asia.edu.tw/faculty/assistant-professors")), "專任助理教授")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment