Skip to content

Instantly share code, notes, and snippets.

@jameelkaisar
Last active October 9, 2022 06:57
Show Gist options
  • Save jameelkaisar/7d50b714dfe3b46bba578c547a4260fe to your computer and use it in GitHub Desktop.
Save jameelkaisar/7d50b714dfe3b46bba578c547a4260fe to your computer and use it in GitHub Desktop.
NIT Srinagar Result Scraping Script (with Captcha Cracking)

NIT Srinagar Result Scraper

Installing Dependencies

sudo apt install tesseract-ocr

Installing Python Packages

pip install beautifulsoup4
pip install opencv-python
pip install pytesseract
pip install unidecode
pip install numpy

Usage

Creating Result Object

>>> r = Result()

Checking Student

>>> r.check_student("2020BITE001")
{'status': True, 'result': True}

>>> r.check_student("2020NITSGR0413")
{'status': True, 'result': True}

>>> r.check_student("2020BITE999")
{'status': True, 'result': False}

>>> r.check_student("2020NITSGR9999")
{'status': True, 'result': False}

Getting Semesters

>>> r.get_semesters("2020BITE001")
{'status': True, 'result': [('1', 'FIRST SEMESTER'), ('2', 'SECOND SEMESTER'), ('3', 'THIRD SEMESTER'), ('4', 'FOURTH SEMESTER')]}

>>> r.get_semesters("2020NITSGR0413")
{'status': True, 'result': [('1', 'FIRST SEMESTER'), ('2', 'SECOND SEMESTER'), ('3', 'THIRD SEMESTER'), ('4', 'FOURTH SEMESTER')]}

>>> r.get_semesters("2020BITE999")
{'status': True, 'message': 'Student does not exist!'}

>>> r.get_semesters("2020NITSGR9999")
{'status': True, 'message': 'Student does not exist!'}

Getting Result

>>> r.get_result("2020BITE001", 1)
{'status': True, 'result': {'Session': 'AUTUMN 2020', 'Name': 'JAMEEL KAISAR KHAN', 'Enrollment': '2020BITE001', 'Semester': 'FIRST SEMESTER', 'Degree': 'B.TECH', 'Branch': 'ITE', 'Publish Date': '12/05/2022', 'Semester Credits': '25', 'Semester Grade Points': '225', 'SGPA': '9.00', 'Cumulative Credits': '25', 'Cumulative Grade Points': '225', 'CGPA': '-', 'Result': 'PASS', 'Subjects': [{'Code': 'CHL101', 'Name': 'Chemistry Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'CHT101', 'Name': 'Engineering Chemistry', 'Grade': 'B+', 'Credits': '4'}, {'Code': 'CVT101', 'Name': 'Engineering Drawing', 'Grade': 'A', 'Credits': '4'}, {'Code': 'EEL101', 'Name': 'Basic Electrical Engineering Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'EET101', 'Name': 'Basic Electrical Engineering', 'Grade': 'A+', 'Credits': '4'}, {'Code': 'HST101', 'Name': 'Basic English and Communication Skills', 'Grade': 'A+', 'Credits': '3'}, {'Code': 'ITL101', 'Name': 'Computer Programming Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'ITT101', 'Name': 'Computer Programming', 'Grade': 'B+', 'Credits': '3'}, {'Code': 'MAT101', 'Name': 'Mathematics I', 'Grade': 'A', 'Credits': '4'}]}}

>>> r.get_result("2020NITSGR0413", "2")
{'status': True, 'result': {'Session': 'SPRING 2021', 'Name': 'JAMEEL KAISAR KHAN', 'Enrollment': '2020BITE001', 'Semester': 'SECOND SEMESTER', 'Degree': 'B.TECH', 'Branch': 'ITE', 'Publish Date': '31/05/2022', 'Semester Credits': '25', 'Semester Grade Points': '224', 'SGPA': '8.96', 'Cumulative Credits': '50', 'Cumulative Grade Points': '449', 'CGPA': '8.980', 'Result': 'PASS', 'Subjects': [{'Code': 'CHT102', 'Name': 'Environmental Studies', 'Grade': 'A', 'Credits': '3'}, {'Code': 'CVT102', 'Name': 'Engineering Mechanics', 'Grade': 'A+', 'Credits': '4'}, {'Code': 'HSL101', 'Name': 'Language Laboratory', 'Grade': 'A+', 'Credits': '1'}, {'Code': 'HST102', 'Name': 'Advanced English Comm. Skills & Organizational Behavior', 'Grade': 'A+', 'Credits': '3'}, {'Code': 'MAT102', 'Name': 'Mathematics II', 'Grade': 'B+', 'Credits': '4'}, {'Code': 'MET101', 'Name': 'Elements of Mechanical Engg.', 'Grade': 'B+', 'Credits': '3'}, {'Code': 'PHL101', 'Name': 'Physics Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'PHT101', 'Name': 'Engineering Physics', 'Grade': 'A', 'Credits': '4'}, {'Code': 'WSL101', 'Name': 'Work shop Practice', 'Grade': 'B+', 'Credits': '2'}]}}

>>> r.get_result("2020BITE001", 8)
{'status': True, 'message': 'Invalid semester or result not published yet!'}

>>> r.get_result("2020NITSGR9999", 10)
{'status': True, 'message': 'Student does not exist!'}

>>> r.get_result("2020BITE999", 1)
{'status': True, 'message': 'Student does not exist!'}

>>> r.get_result("2020NITSGR9999", 1)
{'status': True, 'message': 'Student does not exist!'}

Bulk Usage

Creating Result Object

>>> r = Result()

You can set a custom limit to the number of concurrent threads. Default max_threads value is 5.

>>> r = Result(max_threads=10)

Checking Student

>>> r.check_student_bulk(["2020BITE001", "2020NITSGR0413", "2020BITE999", "2020NITSGR9999"])
[{'status': True, 'result': True}, {'status': True, 'result': True}, {'status': True, 'result': False}, {'status': True, 'result': False}]

Getting Semesters

>>> r.get_semesters_bulk(["2020BITE001", "2020NITSGR0413", "2020BITE999", "2020NITSGR9999"])
[{'status': True, 'result': [('1', 'FIRST SEMESTER'), ('2', 'SECOND SEMESTER'), ('3', 'THIRD SEMESTER'), ('4', 'FOURTH SEMESTER')]}, {'status': True, 'result': [('1', 'FIRST SEMESTER'), ('2', 'SECOND SEMESTER'), ('3', 'THIRD SEMESTER'), ('4', 'FOURTH SEMESTER')]}, {'status': True, 'message': 'Student does not exist!'}, {'status': True, 'message': 'Student does not exist!'}]

Getting Result

>>> r.get_result_bulk([("2020BITE001", 1), ("2020NITSGR0413", "2"), ("2020BITE001", 8), ("2020NITSGR9999", 10), ("2020BITE999", 1), ("2020NITSGR9999", 1)])
[{'status': True, 'result': {'Session': 'AUTUMN 2020', 'Name': 'JAMEEL KAISAR KHAN', 'Enrollment': '2020BITE001', 'Semester': 'FIRST SEMESTER', 'Degree': 'B.TECH', 'Branch': 'ITE', 'Publish Date': '12/05/2022', 'Semester Credits': '25', 'Semester Grade Points': '225', 'SGPA': '9.00', 'Cumulative Credits': '25', 'Cumulative Grade Points': '225', 'CGPA': '-', 'Result': 'PASS', 'Subjects': [{'Code': 'CHL101', 'Name': 'Chemistry Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'CHT101', 'Name': 'Engineering Chemistry', 'Grade': 'B+', 'Credits': '4'}, {'Code': 'CVT101', 'Name': 'Engineering Drawing', 'Grade': 'A', 'Credits': '4'}, {'Code': 'EEL101', 'Name': 'Basic Electrical Engineering Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'EET101', 'Name': 'Basic Electrical Engineering', 'Grade': 'A+', 'Credits': '4'}, {'Code': 'HST101', 'Name': 'Basic English and Communication Skills', 'Grade': 'A+', 'Credits': '3'}, {'Code': 'ITL101', 'Name': 'Computer Programming Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'ITT101', 'Name': 'Computer Programming', 'Grade': 'B+', 'Credits': '3'}, {'Code': 'MAT101', 'Name': 'Mathematics I', 'Grade': 'A', 'Credits': '4'}]}}, {'status': True, 'result': {'Session': 'SPRING 2021', 'Name': 'JAMEEL KAISAR KHAN', 'Enrollment': '2020BITE001', 'Semester': 'SECOND SEMESTER', 'Degree': 'B.TECH', 'Branch': 'ITE', 'Publish Date': '26/09/2022', 'Semester Credits': '25', 'Semester Grade Points': '224', 'SGPA': '8.96', 'Cumulative Credits': '50', 'Cumulative Grade Points': '449', 'CGPA': '8.980', 'Result': 'PASS', 'Subjects': [{'Code': 'CHT102', 'Name': 'Environmental Studies', 'Grade': 'A', 'Credits': '3'}, {'Code': 'CVT102', 'Name': 'Engineering Mechanics', 'Grade': 'A+', 'Credits': '4'}, {'Code': 'HSL101', 'Name': 'Language Laboratory', 'Grade': 'A+', 'Credits': '1'}, {'Code': 'HST102', 'Name': 'Advanced English Comm. Skills & Organizational Behavior', 'Grade': 'A+', 'Credits': '3'}, {'Code': 'MAT102', 'Name': 'Mathematics II', 'Grade': 'B+', 'Credits': '4'}, {'Code': 'MET101', 'Name': 'Elements of Mechanical Engg.', 'Grade': 'B+', 'Credits': '3'}, {'Code': 'PHL101', 'Name': 'Physics Laboratory', 'Grade': 'A', 'Credits': '1'}, {'Code': 'PHT101', 'Name': 'Engineering Physics', 'Grade': 'A', 'Credits': '4'}, {'Code': 'WSL101', 'Name': 'Work shop Practice', 'Grade': 'B+', 'Credits': '2'}]}}, {'status': True, 'message': 'Invalid semester or result not published yet!'}, {'status': True, 'message': 'Student does not exist!'}, {'status': True, 'message': 'Student does not exist!'}, {'status': True, 'message': 'Student does not exist!'}]

Issues

Sometimes you may get the following output.

>>> r.get_result("2020BITE001", 1)
{'status': False, 'message': "Couldn't crack captcha, please retry!"}

This happens when the script is unable to crack the captcha. You may try again or increase the number of tries after which the script gives up cracking captcha. Default max_tries value is 10.

>>> r = Result(max_tries=15)

Scraping Source

https://result.nitsri.ac.in/

PS

DO NOT MISUSE THIS SCRIPT

from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
from urllib.request import urlopen, urlretrieve, Request
from urllib.parse import urlencode, urljoin
from urllib.error import HTTPError
from bs4 import BeautifulSoup
from unidecode import unidecode
from pathlib import Path
import numpy as np
import pytesseract
import random
import time
import cv2
import os
import re
class Result():
def __init__(self, max_tries=10, max_threads=5):
self.LINK = "https://result.nitsri.ac.in/"
self.MAX_TRIES = max_tries
self.MAX_THREADS = max_threads
self.VIEWSTATE = "/wEPDwUKLTE3ODk4NzY1Mg9kFgICAQ9kFgQCCQ8QZGQWAWZkAh8PDxYCHgdWaXNpYmxlaGQWBAIPDxQrAAIPFgQeC18hRGF0YUJvdW5kZx4LXyFJdGVtQ291bnQC/////w9kZGQCEQ8PFgIeBFRleHRlZGQYAwUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgQFCmJ0bmltZ1Nob3cFEGJ0bmltZ1Nob3dSZXN1bHQFCGJ0blByaW50BQxidG5pbWdDYW5jZWwFEGx2U3ViamVjdERldGFpbHMPZ2QFCENhcHRjaGExDwUkNGRlNDVhNGMtOTIyYi00MTM1LWFkZTgtZjhkMzczNDZlZThmZNEFjPN4CBwfLWWlZjINWo62U4wgslxqQhHHOsA6Flw+"
self.SHOW_X = "32"
self.SHOW_Y = "15"
self.RESULT_X = "40"
self.RESULT_Y = "11"
self.TEMP_PATH = ".tmp/"
Path(self.TEMP_PATH).mkdir(parents=True, exist_ok=True)
def check_student(self, student):
data = {
"__VIEWSTATE": self.VIEWSTATE,
"btnimgShow.x": self.SHOW_X,
"btnimgShow.y": self.SHOW_Y,
"txtRegno": student
}
with urlopen(Request(self.LINK, data=urlencode(data).encode())) as response:
source = response.read().decode()
soup = BeautifulSoup(source, 'html.parser')
id = soup.find('input', {'id': 'hfIdno'}).get('value')
return {"status": True, "result": True} if id != None else {"status": True, "result": False}
def check_student_bulk_helper(self, arg, i, results):
result = self.check_student(arg)
results[i] = result
def check_student_bulk(self, args):
futures = set()
results = [0]*len(args)
with ThreadPoolExecutor() as executor:
for i, arg in enumerate(args):
if len(futures) == self.MAX_THREADS:
completed, futures = wait(futures, return_when=FIRST_COMPLETED)
futures.add(executor.submit(self.check_student_bulk_helper, arg, i, results))
wait(futures)
return results
def get_semesters(self, student):
data = {
"__VIEWSTATE": self.VIEWSTATE,
"btnimgShow.x": self.SHOW_X,
"btnimgShow.y": self.SHOW_Y,
"txtRegno": student
}
with urlopen(Request(self.LINK, data=urlencode(data).encode())) as response:
source = response.read().decode()
soup = BeautifulSoup(source, 'html.parser')
id = soup.find('input', {'id': 'hfIdno'}).get('value')
if id == None:
return {"status": True, "message": "Student does not exist!"}
semesters = [(x.get('value'), x.getText()) for x in soup.find('select', {'id': 'ddlSemester'}).find_all('option')[1:]]
return {"status": True, "result": semesters}
def get_semesters_bulk_helper(self, arg, i, results):
result = self.get_semesters(arg)
results[i] = result
def get_semesters_bulk(self, args):
futures = set()
results = [0]*len(args)
with ThreadPoolExecutor() as executor:
for i, arg in enumerate(args):
if len(futures) == self.MAX_THREADS:
completed, futures = wait(futures, return_when=FIRST_COMPLETED)
futures.add(executor.submit(self.get_semesters_bulk_helper, arg, i, results))
wait(futures)
return results
def get_result(self, student, semester):
semester = str(semester)
tries = 0
while tries < self.MAX_TRIES:
data = {
"__VIEWSTATE": self.VIEWSTATE,
"btnimgShow.x": self.SHOW_X,
"btnimgShow.y": self.SHOW_Y,
"txtRegno": student
}
with urlopen(Request(self.LINK, data=urlencode(data).encode())) as response:
source = response.read().decode()
soup = BeautifulSoup(source, 'html.parser')
id = soup.find('input', {'id': 'hfIdno'}).get('value')
if id == None:
return {"status": True, "message": "Student does not exist!"}
semesters = [(x.get('value'), x.getText()) for x in soup.find('select', {'id': 'ddlSemester'}).find_all('option')[1:]]
if len(semesters) == 0:
return {"status": True, "message": "Invalid semester or result not published yet!"}
if semester not in map(lambda x: x[0], semesters):
return {"status": True, "message": "Invalid semester or result not published yet!"}
viewstate = soup.find('input', {'id': '__VIEWSTATE'}).get('value')
captcha = soup.find('img', {'width': '200'}).get('src')
captcha_text = self.get_captcha(captcha)
data = {
"__VIEWSTATE": viewstate,
"txtRegno": student,
"hfIdno": id,
"ddlSemester": semester,
"txtCaptcha": captcha_text,
"btnimgShowResult.x": self.RESULT_X,
"btnimgShowResult.y": self.RESULT_Y
}
# Khbr Yem Line Bagaer Kyazi Neh Ye Asal Paith Chalaan
time.sleep(5)
with urlopen(Request(self.LINK, data=urlencode(data).encode())) as response:
source = response.read().decode(errors='ignore')
soup = BeautifulSoup(source, 'html.parser')
html = soup.find('div', {'id': 'PnlShowResult'})
if html == None:
tries += 1
continue
result = self.scrape_result(html)
return {"status": True, "result": result}
return {"status": False, "message": "Couldn't crack captcha, please retry!"}
def get_result_bulk_helper(self, arg, i, results):
result = self.get_result(*arg)
results[i] = result
def get_result_bulk(self, args):
futures = set()
results = [0]*len(args)
with ThreadPoolExecutor() as executor:
for i, arg in enumerate(args):
if len(futures) == self.MAX_THREADS:
completed, futures = wait(futures, return_when=FIRST_COMPLETED)
futures.add(executor.submit(self.get_result_bulk_helper, arg, i, results))
wait(futures)
return results
def get_captcha(self, captcha):
captcha_link = urljoin(self.LINK, captcha)
captcha_src = Path(self.TEMP_PATH, f"Captcha_{''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10))}.jfif")
captcha_src = str(captcha_src)
urlretrieve(captcha_link, captcha_src)
captcha_text = self.crack_captcha(captcha_src)
captcha_text = captcha_text[:5]
os.remove(captcha_src)
return captcha_text
def crack_captcha(self, captcha):
img = cv2.imread(captcha)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
imask = mask > 0
green = np.zeros_like(img, np.uint8)
green[imask] = img[imask]
h,w,bpp = np.shape(green)
for py in range(0, h):
for px in range(0, w):
if (green[py][px][0] < 100 and green[py][px][1] < 100 and green[py][px][2] < 100):
green[py][px] = (255, 255, 255)
custom_config = r'-l eng --oem 3 --psm 7 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"'
text = pytesseract.image_to_string(green, config=custom_config)
text = re.sub(r'\W+', '', unidecode(text)).upper()
return text
def scrape_result(self, html):
student_result = {}
student_result["Session"] = html.find('span', {'id': 'lblSession'}).getText()
student_result["Name"] = html.find('span', {'id': 'lblStudent'}).getText()
student_result["Enrollment"] = html.find('span', {'id': 'lblRollno'}).getText()
student_result["Semester"] = html.find('span', {'id': 'lblSemester'}).getText()
student_result["Degree"] = html.find('span', {'id': 'lbldegree'}).getText()
student_result["Branch"] = html.find('span', {'id': 'lblbranch'}).getText()
student_result["Publish Date"] = html.find('span', {'id': 'lblPublishDate'}).getText()
student_result["Semester Credits"] = html.find('span', {'id': 'lblearn'}).getText()
student_result["Semester Grade Points"] = html.find('span', {'id': 'lblgd'}).getText()
student_result["SGPA"] = html.find('span', {'id': 'lblSgpa'}).getText()
student_result["Cumulative Credits"] = html.find('span', {'id': 'lblearn1'}).getText()
student_result["Cumulative Grade Points"] = html.find('span', {'id': 'lblgd1'}).getText()
student_result["CGPA"] = html.find('span', {'id': 'lblSgpa1'}).getText()
student_result["Result"] = html.find('span', {'id': 'lblresult'}).getText()
subject_keys = ["Code", "Name", "Grade", "Credits"]
subject_result = []
for subject in html.find_all('table', {'class': 'table-data'})[1].find_all('tr')[1:]:
subject_values = [x.getText().strip() for x in subject.find_all('td')]
subject_result.append(dict(zip(subject_keys, subject_values)))
student_result["Subjects"] = subject_result
return student_result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment