Skip to content

Instantly share code, notes, and snippets.

@ttycelery
Last active June 27, 2020 05:39
Show Gist options
  • Save ttycelery/78ca10bb63393f14fd3aa15ff634b16b to your computer and use it in GitHub Desktop.
Save ttycelery/78ca10bb63393f14fd3aa15ff634b16b to your computer and use it in GitHub Desktop.
Score scraper: e-Rapor Direktorat PSMA Kemdikbud
"""
Usage: python main.py <semester identifier> <student national id list file>
Semester identifier consists of year + period (even or odd, 1 or 2)
File list.txt contains student nasional identifiers separated by newline character
Example: python main.py 20192 list.txt
This script has these possible output formats:
TOTAL_SCORE,STUDENT_ID,NAME (not sorted, no exception)
0,STUDENT_ID,EXCEPTION (not sorted, with exception)
You can sort it manually if you want.
For example:
python main.py 20192 list.txt > out
cat out | sort
Feel free to do anything with the script.
It is unlicensed.
"""
import concurrent.futures
import re
import sys
import requests
MAIN_URL = 'http://localhost:5678' # change with app url
DEFAULT_PASSWORD = (
'ba3253876aed6bc22d4a6ff53d8406c6ad864195ed144ab5c87621b6c233b548' +
'baeae6956df346ec8c17f5ea10f35ee3cbc514797ed7ddd3145464e2a0bab413')
def main(semester_id, nisn_list):
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as e:
futures = {
e.submit(get_score, semester_id, nisn): nisn
for nisn in nisn_list
}
for future in concurrent.futures.as_completed(futures):
nisn = futures[future]
if future.exception():
print(f'0,{nisn},{future.exception()}')
continue
name, score = future.result()
print(f'{score},{nisn},{name}')
def get_score(semester_id, nisn):
session = requests.Session()
login_data = {
'username': nisn,
'password': '',
'beban': 'Paket',
'level': 'Siswa',
'semester_id': semester_id,
'p': DEFAULT_PASSWORD,
}
login_request = session.post(f'{MAIN_URL}/library/process_login.php',
data=login_data)
if 'Lihat Nilai Akhir' not in login_request.text:
raise Exception('Login error')
name = re.findall('<p><small>(.+?)</small></p>', login_request.text,
re.M | re.I)[0]
report_request = session.get(
f'{MAIN_URL}/raporsma/index.php?page=Siswa-Lihat-Nilai-Akhir')
matches = re.findall('<td\s.+>(\d+?)</td>', report_request.text,
re.M | re.I)
if (len(matches) / 4) % int(len(matches) / 4):
raise Exception('Invalid report format')
total_score = 0
for i in range(int(len(matches) / 4)):
_, _, knowledge, practice = matches[i * 4:(i + 1) * 4]
knowledge, practice = int(knowledge), int(practice)
total_score += knowledge + practice
return name, total_score
def read_list_file(filename):
with open(filename) as file:
for line in file.readlines():
line = line.strip()
if len(line):
yield line
if __name__ == '__main__':
if len(sys.argv) != 3:
print(f'usage: {sys.argv[0]} <semester_id> <nisn_list.txt>')
sys.exit(-1)
main(sys.argv[1], read_list_file(sys.argv[2]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment