Skip to content

Instantly share code, notes, and snippets.

@nfmcclure
Last active June 7, 2022 14:26
Show Gist options
  • Save nfmcclure/eb79164332c0d052d67e2604d29bd4b7 to your computer and use it in GitHub Desktop.
Save nfmcclure/eb79164332c0d052d67e2604d29bd4b7 to your computer and use it in GitHub Desktop.
webscrape some test results
import json
import time
import requests
from bs4 import BeautifulSoup
def main():
# ????? UNSURE where to use these two options!!
opts = ['B22', 'BM22']
# Setup a wait for being nice to server (sleep in seconds between each request).
wait_time = 0.5 # seconds
# Header:
header = {
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"en-US,en;q=0.9",
"Connection":"keep-alive,",
"Upgrade-Insecure-Requests":"1",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
}
cnt_range = range(1, 900)
cnt_range_str = [str(x).rjust(4, '0') for x in cnt_range]
roll_range = range(1, 1000)
roll_range_str = [str(x).rjust(4, '0') for x in roll_range]
final_results = {}
for cnt in cnt_range_str:
for roll in roll_range_str:
time.sleep(wait_time)
print('Processing CNT = {}, ROLL = {}'.format(cnt, roll))
# Create url request:
url = 'https://result.sebaonline.org/result?CNT_CODE=-{}&ROLLNO={}'.format(cnt, roll)
response = requests.get(url, headers=header)
if response.status_code != 200:
# Exit roll loop - no more roll. Proceed onto next CNT value.
break
content = BeautifulSoup(response.content, 'html.parser')
result_data = json.loads(content.text)
if result_data:
if cnt not in final_results:
final_results[cnt] = {}
final_results[cnt][roll] = result_data['hits']['hits']
else:
break
print('DONE! Writing to result file.')
with open('results.json', 'w') as f:
json.dump(final_results, f)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment