-
-
Save adeak/acbbc406feea98b2423eca2e9954a0fa to your computer and use it in GitHub Desktop.
hunt for pythoff candidate answers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import json | |
import re | |
# assumes files generated by grab_all_answers.py, loops over every available user ID | |
# uncomment lines in the middle for printout of pythoff-suspect lines in answers | |
# json has ['items'] -> [i] (i in range(100)) -> ['body'], ['answer_id'] etc. | |
suspect_ids = [] | |
for file in sorted(pathlib.Path('.').glob('user*_answers_batch_*.json')): | |
with open(file) as f: | |
dat = json.load(f) | |
for answer in dat['items']: | |
answer_id = answer['answer_id'] | |
body_lines = answer['body'].splitlines() | |
suspect_lines = [line for line in body_lines if re.search(r'\b(print |xrange)', line)] | |
if suspect_lines: | |
suspect_ids.append(answer_id) | |
# on the fly output of suspicious lines: | |
#print(f'answer {answer_id}:') | |
#print(*suspect_lines, sep='\n') | |
#print() | |
# export suspect ids into a file | |
userprefix = file.name.split('_', 1)[0] # user1234 | |
with open(f'{userprefix}_suspect_pythoff_answer_ids.json', 'wt') as f: | |
json.dump(suspect_ids, f) | |
# write suspect links into a file | |
with open(f'{userprefix}_suspect_pythoff_answer_links', 'wt') as f: | |
for suspect_id in suspect_ids: | |
print(f'https://stackoverflow.com/a/{suspect_id}', file=f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import count | |
import json | |
import requests | |
user = 5067311 # user ID | |
site = 'stackoverflow' | |
# grab answer text in batches of 100, use custom filter to get body in response | |
# filter generated at https://api.stackexchange.com/docs/answers-by-ids, clicky clicky | |
filter = '!)s7PIKB-1AU8aHrIGJeP' # grab answer_id, question_id and body for answers | |
responses = [] | |
postcount = 0 | |
for page in count(1): | |
resp = requests.get(f'https://api.stackexchange.com/2.2/users/{user}/answers?order=desc&sort=activity&site={site}&pagesize=100&page={page}&filter={filter}') | |
if resp.status_code != 200: | |
raise Error('Something went wrong, oops! Response code is {resp.status_code}, reason {resp.reason}.') | |
responses.append(resp) | |
dat = resp.json() | |
postcount += len(dat['items']) | |
if not dat['has_more']: | |
print(f'All {postcount} answers found.') | |
break | |
if dat['quota_remaining'] == 0: | |
print(f'Ran out of quota after {postcount} answers...last page was {page}. Moving on.') | |
break | |
# save all the batches for future use | |
for batch,resp in enumerate(responses, start=1): | |
with open(f'user{user}_answers_batch_{batch}.json', 'wt') as f: | |
f.write(resp.text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment