Skip to content

Instantly share code, notes, and snippets.

@Eastkap
Created January 21, 2017 12:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Eastkap/66eb8871afc5803ddb94ea0f0b10e3c1 to your computer and use it in GitHub Desktop.
Save Eastkap/66eb8871afc5803ddb94ea0f0b10e3c1 to your computer and use it in GitHub Desktop.
import sys, re, csv, requests
from bs4 import BeautifulSoup
if sys.version_info[0] == 3:
from urllib.request import urlopen
else:
# Not Python 3 - today, it is most likely to be Python 2
# But note that this might need an update when Python 4
# might be around one day
from urllib import urlopen
#filename = '/Users/Jacobo/pr/upmatch/a.jpg'
#with open(filename, 'wb') as file:
# file.write(urlopen('https://etu.math.upmc.fr/math/photos/3520121.jpg').read())
def get_infos(r):
soup = BeautifulSoup(r.text, 'html.parser')
info = dict()
for o in soup.findAll("td",{'class':'result'}):
info['name'] =o.text.strip() #prenom NOM
info['etu']=str(o)[78:85] #numETU
adresse='https://www.annuaire.upmc.fr/upmc/list.upmc?method=list&dn=uidInterne='+str(o)[re.search(r'Interne=', str(o)).span()[1]:re.search(r',ou=', str(o)).span()[0]]+',ou=People&mode=display'
with urlopen(adresse) as url:
s = url.read()
full = BeautifulSoup(s, 'html.parser')
mail=full.findAll("td",{'class':'attributeDisplayer_value'})
mail=str(mail[5])
info['mail']=mail[re.search(r'@etu.upmc.fr">',mail).span()[1]:re.search(r'</a>',mail).span()[0]]
##ajout des photos:
try:
pic = '/Users/Jacobo/pr/upmatch/photos/'+info['etu']+'.jpg'
with open(filename, 'wb') as file:
file.write(urlopen('https://etu.math.upmc.fr/math/photos/'+info['etu']+'.jpg').read())
info['pic']=1
except:
info['pic']=0
return info
def make_csv(infos, filename):
with open(filename, 'w') as csvfile:
fieldnames = list({key for info in infos for key in info.keys()})
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for info in infos:
writer.writerow(info)
url = "https://www.annuaire.upmc.fr/upmc/simpleSearch.upmc"
infos=list()
filename="/Users/Jacobo/pr/upmatch/data.csv"
for i in range(97,123):
for j in range(97,123):
for k in range(97,123):
name=chr(i)+chr(j)+chr(k)
payload = {
'name': name,
'name_query_type': 'NAME*',
'surname': '',
'surname_query_type': 'SURNAME*',
'filter': 'objectClass=etudiant',
'number': '2500',
'bouton.x': '38',
'bouton.y': '15',
'inputPage': 'inputBadSearchWithoutFilters',
}
session = requests.session()
r = requests.post(url, data=payload)
#call get info with r
infos.append(get_infos(r))
make_csv(infos,filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment