Skip to content

Instantly share code, notes, and snippets.

@adeak
Last active October 2, 2020 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adeak/acbbc406feea98b2423eca2e9954a0fa to your computer and use it in GitHub Desktop.
Save adeak/acbbc406feea98b2423eca2e9954a0fa to your computer and use it in GitHub Desktop.
hunt for pythoff candidate answers
import pathlib
import json
import re
# assumes files generated by grab_all_answers.py, loops over every available user ID
# uncomment lines in the middle for printout of pythoff-suspect lines in answers
# json has ['items'] -> [i] (i in range(100)) -> ['body'], ['answer_id'] etc.
suspect_ids = []
for file in sorted(pathlib.Path('.').glob('user*_answers_batch_*.json')):
with open(file) as f:
dat = json.load(f)
for answer in dat['items']:
answer_id = answer['answer_id']
body_lines = answer['body'].splitlines()
suspect_lines = [line for line in body_lines if re.search(r'\b(print |xrange)', line)]
if suspect_lines:
suspect_ids.append(answer_id)
# on the fly output of suspicious lines:
#print(f'answer {answer_id}:')
#print(*suspect_lines, sep='\n')
#print()
# export suspect ids into a file
userprefix = file.name.split('_', 1)[0] # user1234
with open(f'{userprefix}_suspect_pythoff_answer_ids.json', 'wt') as f:
json.dump(suspect_ids, f)
# write suspect links into a file
with open(f'{userprefix}_suspect_pythoff_answer_links', 'wt') as f:
for suspect_id in suspect_ids:
print(f'https://stackoverflow.com/a/{suspect_id}', file=f)
from itertools import count
import json
import requests
user = 5067311 # user ID
site = 'stackoverflow'
# grab answer text in batches of 100, use custom filter to get body in response
# filter generated at https://api.stackexchange.com/docs/answers-by-ids, clicky clicky
filter = '!)s7PIKB-1AU8aHrIGJeP' # grab answer_id, question_id and body for answers
responses = []
postcount = 0
for page in count(1):
resp = requests.get(f'https://api.stackexchange.com/2.2/users/{user}/answers?order=desc&sort=activity&site={site}&pagesize=100&page={page}&filter={filter}')
if resp.status_code != 200:
raise Error('Something went wrong, oops! Response code is {resp.status_code}, reason {resp.reason}.')
responses.append(resp)
dat = resp.json()
postcount += len(dat['items'])
if not dat['has_more']:
print(f'All {postcount} answers found.')
break
if dat['quota_remaining'] == 0:
print(f'Ran out of quota after {postcount} answers...last page was {page}. Moving on.')
break
# save all the batches for future use
for batch,resp in enumerate(responses, start=1):
with open(f'user{user}_answers_batch_{batch}.json', 'wt') as f:
f.write(resp.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment