Xmoe/ekVV_login.py

## ekVV_login.py
import requests_html
import json


def login(username=None, password=None):
    """
    :param username:
    :param password:
    :return: A session with all the necessary context ( cookies etc ) to make further requests to the server

    By GETting the jsession_id_url first, we are given a jsession cookie. I am not sure yet, whether it's needed
    Afterwards we GET the login page, which redirects us to itself, but with additional header data.
    Hidden inside the login form is a one-time secret token called "nnc" which needs to be submitted as well as the credentials.
    Therefore we need to extract the value and add it to our payload. This payload is then POSTed back to the same page.
    The response from the server is a file called trust from which we have to extract ALL its form data to POST them to the same page.
    Doing so completes the login and yields us the mighty SP_AUTH cookie,
    """

    if username is None or password is None:
        print("Cannot login without credentials!")
        return

    jsession_id_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Benvw_Loggedin.jsp"
    login_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/benvw_Login_MatrikelAct"

    with requests_html.HTMLSession() as session:
        r = session.get(jsession_id_url)
        r = session.get(login_url)

        # after we have been redirected to the correct url, update the path
        login_url = r.url

        # find the first element with id "nnc" and retrieve it's value
        secret_token = r.html.find("#nnc")[0].attrs["value"]

        # construct the payload for POSTing
        payload = {"username": username,
                   "password": password,
                   "nnc": secret_token}

        r = session.post(login_url, data=payload)

        # get all the input fields and values which authenticate us, so we can POST them
        submit_url, payload = form_crawler(r)

        r = session.post(submit_url, data=payload)

        return session


def form_crawler(response, selector="form"):
    # take the first form which fits the selector
    form = response.html.find(selector, first=True)

    # find all input fields inside this form
    input_elements = form.find("input")

    # construct a dictionary of {name: value} for all inputs
    input_fields = {elem.attrs["name"]: elem.attrs["value"] for elem in input_elements}

    # if the form has no action tag, the the submit_url to the page itself
    submit_url = response.url

    # however if there is an action tag, we have to POST our data to that URL
    try:
        submit_url = form.attrs["action"]
    except KeyError as e:
        raise e

    return (submit_url, input_fields)


if __name__ == "__main__":

    with open("login_data.json", "r") as file:
        credentials = json.loads(file.read())

    session = login(**credentials)

    # here you can do whatever you want
    other_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Stundenplan_Liste_Kompakt.jsp"
    r = session.get(other_url)
    print(r.text)
    print(session.cookies)
	import requests_html
	import json


	def login(username=None, password=None):
	"""
	:param username:
	:param password:
	:return: A session with all the necessary context ( cookies etc ) to make further requests to the server

	By GETting the jsession_id_url first, we are given a jsession cookie. I am not sure yet, whether it's needed
	Afterwards we GET the login page, which redirects us to itself, but with additional header data.
	Hidden inside the login form is a one-time secret token called "nnc" which needs to be submitted as well as the credentials.
	Therefore we need to extract the value and add it to our payload. This payload is then POSTed back to the same page.
	The response from the server is a file called trust from which we have to extract ALL its form data to POST them to the same page.
	Doing so completes the login and yields us the mighty SP_AUTH cookie,
	"""

	if username is None or password is None:
	print("Cannot login without credentials!")
	return

	jsession_id_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Benvw_Loggedin.jsp"
	login_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/benvw_Login_MatrikelAct"

	with requests_html.HTMLSession() as session:
	r = session.get(jsession_id_url)
	r = session.get(login_url)

	# after we have been redirected to the correct url, update the path
	login_url = r.url

	# find the first element with id "nnc" and retrieve it's value
	secret_token = r.html.find("#nnc")[0].attrs["value"]

	# construct the payload for POSTing
	payload = {"username": username,
	"password": password,
	"nnc": secret_token}

	r = session.post(login_url, data=payload)

	# get all the input fields and values which authenticate us, so we can POST them
	submit_url, payload = form_crawler(r)

	r = session.post(submit_url, data=payload)

	return session


	def form_crawler(response, selector="form"):
	# take the first form which fits the selector
	form = response.html.find(selector, first=True)

	# find all input fields inside this form
	input_elements = form.find("input")

	# construct a dictionary of {name: value} for all inputs
	input_fields = {elem.attrs["name"]: elem.attrs["value"] for elem in input_elements}

	# if the form has no action tag, the the submit_url to the page itself
	submit_url = response.url

	# however if there is an action tag, we have to POST our data to that URL
	try:
	submit_url = form.attrs["action"]
	except KeyError as e:
	raise e

	return (submit_url, input_fields)


	if __name__ == "__main__":

	with open("login_data.json", "r") as file:
	credentials = json.loads(file.read())

	session = login(**credentials)

	# here you can do whatever you want
	other_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Stundenplan_Liste_Kompakt.jsp"
	r = session.get(other_url)
	print(r.text)
	print(session.cookies)