Skip to content

Instantly share code, notes, and snippets.

Created December 10, 2023 14:00
Show Gist options
  • Save ShivangamSoni/dd2ee5d20348054f9440e099c6f53c7d to your computer and use it in GitHub Desktop.
Save ShivangamSoni/dd2ee5d20348054f9440e099c6f53c7d to your computer and use it in GitHub Desktop.
Extract JavaPoint MCQs Into an Excel File (Dependencies: requests, beautifulsoup, xlsxwriter)
import requests
from bs4 import BeautifulSoup
import xlsxwriter
import time
from datetime import datetime
def main():
url = input("Enter a JavaPoint MCQ URL: ")
xlsx_name = input("Enter a Name for the Excel File: ")
print("\n\nRequesting Web Page")
response, exec_time = execute_with_time(requests.get, url)
html_text = response.text
print(f"Web Page Requested ({exec_time})")
print("\n\nParsing Web Page")
soup, exec_time = execute_with_time(BeautifulSoup, html_text, 'lxml')
print(f"Web Page Parsed ({exec_time})")
print("\n\nExtracting MCQs")
data, exec_time = execute_with_time(extract_data, soup)
print(f"MCQs Extracted ({exec_time})")
print("\n\nGenerating Excel")
data, exec_time = execute_with_time(generate_excel, data, xlsx_name)
print(f"Excel Generated ({exec_time})")
def extract_data(soup):
questions = soup.find_all('p', class_="pq")
options = soup.find_all('ol', class_="pointsa")
answers = soup.find_all('div', class_="testanswer")
return questions, options, answers
def generate_excel(data, title):
workbook = xlsxwriter.Workbook(f"{title}.xlsx")
worksheet = workbook.add_worksheet()
worksheet.write(0, 0, "Question")
worksheet.write(0, 1, "Option A")
worksheet.write(0, 2, "Option B")
worksheet.write(0, 3, "Option C")
worksheet.write(0, 4, "Option D")
worksheet.write(0, 5, "Answer")
worksheet.write(0, 6, "Explanation")
row = 1
column = 0
for q, o, a in zip(*data):
column = 0
worksheet.write(row, column, q.text.strip())
column += 1
for i, opt in enumerate(o.find_all("li")):
worksheet.write(row, column, f"{chr(i + 65)}) {opt.text.strip()}")
column += 1
answer, explanation, *_ = a.find_all("p")
worksheet.write(row, column, answer.text.strip())
column += 1
worksheet.write(row, column, explanation.text.strip())
column += 1
row += 1
def execute_with_time(func, *args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
elapsed_time = end_time - start_time
minutes, seconds = divmod(elapsed_time, 60)
seconds, milliseconds = divmod(seconds, 1)
return result, "{:02}:{:02}:{:.3f}".format(
int(minutes), int(seconds), milliseconds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment