Created
December 10, 2023 14:00
-
-
Save ShivangamSoni/dd2ee5d20348054f9440e099c6f53c7d to your computer and use it in GitHub Desktop.
Extract JavaPoint MCQs Into an Excel File (Dependencies: requests, beautifulsoup, xlsxwriter)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import xlsxwriter | |
import time | |
from datetime import datetime | |
def main(): | |
url = input("Enter a JavaPoint MCQ URL: ") | |
xlsx_name = input("Enter a Name for the Excel File: ") | |
print("\n\nRequesting Web Page") | |
response, exec_time = execute_with_time(requests.get, url) | |
html_text = response.text | |
print(f"Web Page Requested ({exec_time})") | |
print("\n\nParsing Web Page") | |
soup, exec_time = execute_with_time(BeautifulSoup, html_text, 'lxml') | |
print(f"Web Page Parsed ({exec_time})") | |
print("\n\nExtracting MCQs") | |
data, exec_time = execute_with_time(extract_data, soup) | |
print(f"MCQs Extracted ({exec_time})") | |
print("\n\nGenerating Excel") | |
data, exec_time = execute_with_time(generate_excel, data, xlsx_name) | |
print(f"Excel Generated ({exec_time})") | |
def extract_data(soup): | |
questions = soup.find_all('p', class_="pq") | |
options = soup.find_all('ol', class_="pointsa") | |
answers = soup.find_all('div', class_="testanswer") | |
return questions, options, answers | |
def generate_excel(data, title): | |
workbook = xlsxwriter.Workbook(f"{title}.xlsx") | |
worksheet = workbook.add_worksheet() | |
worksheet.write(0, 0, "Question") | |
worksheet.write(0, 1, "Option A") | |
worksheet.write(0, 2, "Option B") | |
worksheet.write(0, 3, "Option C") | |
worksheet.write(0, 4, "Option D") | |
worksheet.write(0, 5, "Answer") | |
worksheet.write(0, 6, "Explanation") | |
row = 1 | |
column = 0 | |
for q, o, a in zip(*data): | |
column = 0 | |
worksheet.write(row, column, q.text.strip()) | |
column += 1 | |
for i, opt in enumerate(o.find_all("li")): | |
worksheet.write(row, column, f"{chr(i + 65)}) {opt.text.strip()}") | |
column += 1 | |
answer, explanation, *_ = a.find_all("p") | |
worksheet.write(row, column, answer.text.strip()) | |
column += 1 | |
worksheet.write(row, column, explanation.text.strip()) | |
column += 1 | |
row += 1 | |
workbook.close() | |
def execute_with_time(func, *args, **kwargs): | |
start_time = time.time() | |
result = func(*args, **kwargs) | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
minutes, seconds = divmod(elapsed_time, 60) | |
seconds, milliseconds = divmod(seconds, 1) | |
return result, "{:02}:{:02}:{:.3f}".format( | |
int(minutes), int(seconds), milliseconds) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment