adeak/find_pythoff_candidates.py Secret

## find_pythoff_candidates.py
import pathlib
import json
import re

# assumes files generated by grab_all_answers.py, loops over every available user ID
# uncomment lines in the middle for printout of pythoff-suspect lines in answers
# json has ['items'] -> [i] (i in range(100)) -> ['body'], ['answer_id'] etc.

suspect_ids = []
for file in sorted(pathlib.Path('.').glob('user*_answers_batch_*.json')):
    with open(file) as f:
        dat = json.load(f)

    for answer in dat['items']:
        answer_id = answer['answer_id']
        body_lines = answer['body'].splitlines()
        suspect_lines = [line for line in body_lines if re.search(r'\b(print |xrange)', line)]
        if suspect_lines:
            suspect_ids.append(answer_id)

            # on the fly output of suspicious lines:
            #print(f'answer {answer_id}:')
            #print(*suspect_lines, sep='\n')
            #print()

# export suspect ids into a file
userprefix = file.name.split('_', 1)[0]  # user1234
with open(f'{userprefix}_suspect_pythoff_answer_ids.json', 'wt') as f:
    json.dump(suspect_ids, f)

# write suspect links into a file
with open(f'{userprefix}_suspect_pythoff_answer_links', 'wt') as f:
    for suspect_id in suspect_ids:
        print(f'https://stackoverflow.com/a/{suspect_id}', file=f)

## grab_all_answers.py
from itertools import count
import json
import requests

user = 5067311  # user ID
site = 'stackoverflow'

# grab answer text in batches of 100, use custom filter to get body in response
# filter generated at  https://api.stackexchange.com/docs/answers-by-ids, clicky clicky
filter = '!)s7PIKB-1AU8aHrIGJeP'  # grab answer_id, question_id and body for answers

responses = []
postcount = 0
for page in count(1):
    resp = requests.get(f'https://api.stackexchange.com/2.2/users/{user}/answers?order=desc&sort=activity&site={site}&pagesize=100&page={page}&filter={filter}')
    if resp.status_code != 200:
        raise Error('Something went wrong, oops! Response code is {resp.status_code}, reason {resp.reason}.')

    responses.append(resp)
    dat = resp.json()
    postcount += len(dat['items'])
    if not dat['has_more']:
        print(f'All {postcount} answers found.')
        break
    if dat['quota_remaining'] == 0:
        print(f'Ran out of quota after {postcount} answers...last page was {page}. Moving on.')
        break

# save all the batches for future use
for batch,resp in enumerate(responses, start=1):
    with open(f'user{user}_answers_batch_{batch}.json', 'wt') as f:
        f.write(resp.text)
	import pathlib
	import json
	import re

	# assumes files generated by grab_all_answers.py, loops over every available user ID
	# uncomment lines in the middle for printout of pythoff-suspect lines in answers
	# json has ['items'] -> [i] (i in range(100)) -> ['body'], ['answer_id'] etc.

	suspect_ids = []
	for file in sorted(pathlib.Path('.').glob('user_answers_batch_.json')):
	with open(file) as f:
	dat = json.load(f)

	for answer in dat['items']:
	answer_id = answer['answer_id']
	body_lines = answer['body'].splitlines()
	suspect_lines = [line for line in body_lines if re.search(r'\b(print \|xrange)', line)]
	if suspect_lines:
	suspect_ids.append(answer_id)

	# on the fly output of suspicious lines:
	#print(f'answer {answer_id}:')
	#print(*suspect_lines, sep='\n')
	#print()

	# export suspect ids into a file
	userprefix = file.name.split('_', 1)[0] # user1234
	with open(f'{userprefix}_suspect_pythoff_answer_ids.json', 'wt') as f:
	json.dump(suspect_ids, f)

	# write suspect links into a file
	with open(f'{userprefix}_suspect_pythoff_answer_links', 'wt') as f:
	for suspect_id in suspect_ids:
	print(f'https://stackoverflow.com/a/{suspect_id}', file=f)
	from itertools import count
	import json
	import requests

	user = 5067311 # user ID
	site = 'stackoverflow'

	# grab answer text in batches of 100, use custom filter to get body in response
	# filter generated at https://api.stackexchange.com/docs/answers-by-ids, clicky clicky
	filter = '!)s7PIKB-1AU8aHrIGJeP' # grab answer_id, question_id and body for answers

	responses = []
	postcount = 0
	for page in count(1):
	resp = requests.get(f'https://api.stackexchange.com/2.2/users/{user}/answers?order=desc&sort=activity&site={site}&pagesize=100&page={page}&filter={filter}')
	if resp.status_code != 200:
	raise Error('Something went wrong, oops! Response code is {resp.status_code}, reason {resp.reason}.')

	responses.append(resp)
	dat = resp.json()
	postcount += len(dat['items'])
	if not dat['has_more']:
	print(f'All {postcount} answers found.')
	break
	if dat['quota_remaining'] == 0:
	print(f'Ran out of quota after {postcount} answers...last page was {page}. Moving on.')
	break

	# save all the batches for future use
	for batch,resp in enumerate(responses, start=1):
	with open(f'user{user}_answers_batch_{batch}.json', 'wt') as f:
	f.write(resp.text)