Skip to content

Instantly share code, notes, and snippets.

@shmidtelson
Created October 21, 2021 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shmidtelson/15aece812781d1d10046ab338ea59744 to your computer and use it in GitHub Desktop.
Save shmidtelson/15aece812781d1d10046ab338ea59744 to your computer and use it in GitHub Desktop.
Парсим занятия mkr.udau.edu.ua
import json
from pprint import pprint
import requests
import re
from bs4 import BeautifulSoup
url = 'http://mkr.udau.edu.ua/time-table/group?type=1'
# Параметры для получения данных
data = {
csrf_param: csrf_token,
"TimeTableForm[facultyId]": "5",
"TimeTableForm[course]": "2",
"TimeTableForm[groupId]": "1007",
"date-picker": "01.10.2020 - 31.10.2022",
"TimeTableForm[dateStart]": "01.10.2020",
"TimeTableForm[dateEnd]": "31.10.2022",
"TimeTableForm[indicationDays]": "5"
}
session = requests.Session()
first_entry_response = session.get(url)
soup = BeautifulSoup(first_entry_response.text, 'html.parser')
csrf_param = soup.find("meta", attrs={"name": "csrf-param"})['content']
csrf_token = soup.find("meta", attrs={"name": "csrf-token"})['content']
print(f'Получили csrf_param: {csrf_param} и csrf_token: {csrf_token}')
table_page = session.post(url, data=data)
soup = BeautifulSoup(table_page.text, 'html.parser')
script_tag = soup.findAll("script")[-1] # Опасное место, потому что они могут добавить скрипт и парсер перестанет работать
regex_search = re.findall(r'"events":(\[.*\])', script_tag.text)[0] # Получаем JSON с данными о уроках из кода
result = json.loads(regex_search)
pprint(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment