Skip to content

Instantly share code, notes, and snippets.

@lgommans
Last active July 2, 2023 23:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save lgommans/1b766fd7a24b85f56be13b37c4400218 to your computer and use it in GitHub Desktop.
Save lgommans/1b766fd7a24b85f56be13b37c4400218 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
triviafile = 'trivia.csv'
apiurl = 'https://opentdb.com/'
sleep_between_requests = 0.1
questionsPerRequest = 50
import requests, os, hashlib, json, sys, csv, urllib, time
unquote = urllib.parse.unquote
def cget(url, cachedir = b'cache'):
''' Cached GET request. The URL must be an UTF-8 string and cachedir should be bytes. '''
global _cache, _cacheindex
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
if '_cache' not in globals():
indexfile = os.path.join(cachedir, b'index')
if os.path.isfile(indexfile):
with open(indexfile) as f:
_cache = f.read().replace('\r', '').split('\n')
else:
_cache = {}
_cacheindex = open(indexfile, 'a')
h = b'c-' + bytes(hashlib.new('md5', bytes(url, 'utf-8')).hexdigest(), 'utf-8')
fname = os.path.join(cachedir, h)
if url in _cache:
with open(fname) as f:
return f.read()
print('Performing GET for {}'.format(url))
response = requests.get(url)
if response.status_code != 200:
return False
with open(fname, 'w', encoding='utf-8') as f:
f.write(response.text)
_cache.append(url)
_cacheindex.write(url + '\n')
return response.text
#{'overall': {'total_num_of_questions': 6098, 'total_num_of_pending_questions': 1710, 'total_num_of_verified_questions': 3031, 'total_num_of_rejected_questions': 1357}, 'categories': {'9': {'total_num_of_questions': 822, 'total_num_of_pending_questions': 307, 'total_num_of_verified_questions': 208, 'total_num_of_rejected_questions': 307}, '10': {'total_num_of_questions': 134, 'total_num_of_pending_questions': 50, 'total_num_of_verified_questions': 69, 'total_num_of_rejected_questions': 15}, '11': {'total_num_of_questions': 296, 'total_num_of_pending_questions': 83, 'total_num_of_verified_questions': 179, 'total_num_of_rejected_questions': 34}, '12': {'total_num_of_questions': 459, 'total_num_of_pending_questions': 130, 'total_num_of_verified_questions': 270, 'total_num_of_rejected_questions': 59}, '13': {'total_num_of_questions': 37, 'total_num_of_pending_questions': 8, 'total_num_of_verified_questions': 22, 'total_num_of_rejected_questions': 7},
counts = json.loads(cget('{}api_count_global.php'.format(apiurl)))
totalrows = int(counts['overall']['total_num_of_verified_questions'])
# {'trivia_categories': [{'id': 9, 'name': 'General Knowledge'}, {'id': 10, 'name': 'Entertainment: Books'}, {'id': 11, 'name': 'Entertainment: Film'}, {'id': 12, 'name': 'Entertainment: Music'}, {'id': 13, 'name': 'Entertainment: Musicals & Theatres'}, {'id': 14, 'name': 'Entertainment: Television'}, {'id': 15, 'name': 'Entertainment: Video Games'}, {'id': 16, 'name': 'Entertainment: Board Games'}, {'id': 17, 'name': 'Science & Nature'}, {'id': 18, 'name': 'Science: Computers'}, {'id': 19, 'name': 'Science: Mathematics'}, {'id': 20, 'name': 'Mythology'}, {'id': 21, 'name': 'Sports'}, {'id': 22, 'name': 'Geography'}, {'id': 23, 'name': 'History'}, {'id': 24, 'name': 'Politics'}, {'id': 25, 'name': 'Art'}, {'id': 26, 'name': 'Celebrities'}, {'id': 27, 'name': 'Animals'}, {'id': 28, 'name': 'Vehicles'}, {'id': 29, 'name': 'Entertainment: Comics'}, {'id': 30, 'name': 'Science: Gadgets'}, {'id': 31, 'name': 'Entertainment: Japanese Anime & Manga'}, {'id': 32, 'name': 'Entertainment: Cartoon & Animations'}]}
tmpcats = json.loads(cget('{}api_category.php'.format(apiurl)))
cats = {}
for cat in tmpcats['trivia_categories']:
cats[cat['id']] = cat['name']
del tmpcats
if '-h' in sys.argv or '--help' in sys.argv:
print('''Usage:
getanswers.py
It will create a directory called "cache" and a file called "trivia.csv".
The file trivia.csv has its own headers, so the fields should be clear.
The first answer, 'Answer 1', is the correct answer to the question.
Answers 3 and 4 are empty if the type is 'boolean' (true/false question).
''')
exit(1)
out = csv.writer(open(triviafile, 'w', newline='', encoding='utf-8'))
out.writerow(['Category', 'Type', 'Difficulty', 'Question', 'Answer 1', 'Answer 2', 'Answer 3', 'Answer 4'])
token = requests.get('https://opentdb.com/api_token.php?command=request').json()['token']
donerows = 0
last = False
while True:
if totalrows - donerows <= 0:
questionsPerRequest = 1
print('Should be done, adjusting questions per request to 1')
elif totalrows - donerows < questionsPerRequest:
questionsPerRequest = totalrows - donerows
print('Should be done soon, adjusting questions per request to {}'.format(questionsPerRequest))
request = requests.get('{}api.php?amount={}&token={}&encode=url3986'.format(apiurl, questionsPerRequest, token)).json()
if request['response_code'] == 4:
last = True
elif request['response_code'] != 0:
print('Error: response code {}. Full response:\n{}'.format(request['response_code'], json.dumps(request)))
exit(2)
rows = []
for question in request['results']:
cat = unquote(question['category'])
typ = unquote(question['type'])
dif = unquote(question['difficulty'])
q = unquote(question['question'])
answers = [unquote(question['correct_answer'])] + [unquote(_) for _ in question['incorrect_answers']]
rows.append([cat, typ, dif, q] + answers)
out.writerows(rows)
donerows += len(rows)
print('Did {}/{} ({}%)...'.format(donerows, totalrows, round(donerows / totalrows * 100, 2)))
if last:
print('The API indicated that we are done.')
break
time.sleep(sleep_between_requests)
#{'response_code': 0, 'results': [{'category': 'Science%20%26%20Nature', 'type': 'boolean', 'difficulty': 'easy', 'question': 'Igneous%20rocks%20are%20formed%20by%20excessive%20heat%20and%20pressure.', 'correct_answer': 'False', 'incorrect_answers': ['True']}, {'category': 'Entertainment%3A%20Cartoon%20%26%20Animations', 'type': 'multiple', 'difficulty': 'easy', 'question': 'In%20South%20Park%2C%20what%20is%20Stan%27s%20surname%3F', 'correct_answer': 'Marsh', 'incorrect_answers': ['Stotch', 'Broflovski', 'Tweak']}, etc
@lgommans
Copy link
Author

Added encoding='utf-8' as a fix identified by Lukáš Bláha. Thanks, Lukáš!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment