Skip to content

Instantly share code, notes, and snippets.

@Xmoe
Last active April 1, 2018 14:54
Show Gist options
  • Save Xmoe/bc9f1e10a15a1a46297f02515a9381f2 to your computer and use it in GitHub Desktop.
Save Xmoe/bc9f1e10a15a1a46297f02515a9381f2 to your computer and use it in GitHub Desktop.
Simulate the login process to eKVV in python to obtain cookies.
import requests_html
import json
def login(username=None, password=None):
"""
:param username:
:param password:
:return: A session with all the necessary context ( cookies etc ) to make further requests to the server
By GETting the jsession_id_url first, we are given a jsession cookie. I am not sure yet, whether it's needed
Afterwards we GET the login page, which redirects us to itself, but with additional header data.
Hidden inside the login form is a one-time secret token called "nnc" which needs to be submitted as well as the credentials.
Therefore we need to extract the value and add it to our payload. This payload is then POSTed back to the same page.
The response from the server is a file called trust from which we have to extract ALL its form data to POST them to the same page.
Doing so completes the login and yields us the mighty SP_AUTH cookie,
"""
if username is None or password is None:
print("Cannot login without credentials!")
return
jsession_id_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Benvw_Loggedin.jsp"
login_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/benvw_Login_MatrikelAct"
with requests_html.HTMLSession() as session:
r = session.get(jsession_id_url)
r = session.get(login_url)
# after we have been redirected to the correct url, update the path
login_url = r.url
# find the first element with id "nnc" and retrieve it's value
secret_token = r.html.find("#nnc")[0].attrs["value"]
# construct the payload for POSTing
payload = {"username": username,
"password": password,
"nnc": secret_token}
r = session.post(login_url, data=payload)
# get all the input fields and values which authenticate us, so we can POST them
submit_url, payload = form_crawler(r)
r = session.post(submit_url, data=payload)
return session
def form_crawler(response, selector="form"):
# take the first form which fits the selector
form = response.html.find(selector, first=True)
# find all input fields inside this form
input_elements = form.find("input")
# construct a dictionary of {name: value} for all inputs
input_fields = {elem.attrs["name"]: elem.attrs["value"] for elem in input_elements}
# if the form has no action tag, the the submit_url to the page itself
submit_url = response.url
# however if there is an action tag, we have to POST our data to that URL
try:
submit_url = form.attrs["action"]
except KeyError as e:
raise e
return (submit_url, input_fields)
if __name__ == "__main__":
with open("login_data.json", "r") as file:
credentials = json.loads(file.read())
session = login(**credentials)
# here you can do whatever you want
other_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Stundenplan_Liste_Kompakt.jsp"
r = session.get(other_url)
print(r.text)
print(session.cookies)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment