Skip to content

Instantly share code, notes, and snippets.

@souenzzo
Last active September 11, 2016 00:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save souenzzo/4ab9ffc78677a7b30a092f3c96a284a5 to your computer and use it in GitHub Desktop.
Save souenzzo/4ab9ffc78677a7b30a092f3c96a284a5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from http import client
from bs4 import BeautifulSoup
def getElems(tr):
res = []
for th in tr:
if th.string != "\n":
res.append(th.string)
return res
def structuralize(table):
struct = []
titles = getElems(table.thead.tr)
for elem in table.tbody:
if elem != "\n":
coisa = getElems(elem)
struct.append(dict(zip(titles,coisa)))
return struct
def getInfo(soup):
table = soup.find(id="table_disciplinas")
return structuralize(table)
def nextPage(soup):
pag = soup.find_all("a", "next_page")
if pag == []:
return False
return True
def get_data(periodo, curso):
conn = client.HTTPSConnection("sistemas.uff.br")
BASE='/transparencia/disciplinas_que_mais_reprovam'
page = 1
data = { "periodo": periodo,
"curso": curso,
"disciplinas": [] }
while True:
PATH="%s?page=%d&periodo=%d&curso=%d"%(BASE,page,periodo,curso)
conn.request("GET", PATH)
res = conn.getresponse()
raw = res.read()
soup = BeautifulSoup(raw, "html.parser")
info = getInfo(soup)
data["disciplinas"].extend(info)
page = page + 1
if not nextPage(soup):
break
return data
if __name__ == "__main__":
db = { "nome": "disciplinas",
"data": []
}
for ano in range(1999, 2017): ## anos
for semestre in [1,2]: ## Semestres
for curso in range(200): ## cursos
try:
periodo = (ano * 10) + semestre
x = get_data(20152, 25)
except:
pass
else:
db["data"].append(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment