aperson/sidebar_updater.py

## sidebar_updater.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
import urllib.request
import time
from collections import defaultdict

def _isVideo(submission):
    '''Returns video author name if this is a video'''
    if submission['domain'] in ('m.youtube.com', 'youtube.com', 'youtu.be'):
        if 'media' in submission:
            if submission['media'] is not None:
                if 'oembed' in submission['media']:
                    if 'author_name' in submission['media']['oembed']:
                        if submission['media']['oembed']['author_name'] is not None:
                            return submission['media']['oembed']['author_name'].replace(
                                ' ', '').lower()
        if '/user' in submission['url']:
            return re.findall(r'''user/(.*)(?:\?|/|$)''', submission['url'])[0].lower()

def _checkProfile(user):
    '''Returns the percentage of things that the user only contributed to themselves.
    ie: submitting and only commenting on their content.  Currently, the criteria is:
        * linking to videos of the same author (which implies it is their account)
        * commenting on your own submissions (not just videos)
    these all will count against the user and an overall score will be returned.  Also, we only
    check against the last 100 items on the user's profile.'''
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'spam.py')]
    try:
        with opener.open(
            'http://www.reddit.com/user/{}/comments/.json?limit=100&sort=new'.format(
                user)) as w:
            comments = json.loads(w.read().decode('utf-8'))
            comments = comments['data']['children']
            time.sleep(2)
        with opener.open(
            'http://www.reddit.com/user/{}/submitted/.json?limit=100&sort=new'.format(
                user)) as w:
            submitted = json.loads(w.read().decode('utf-8'))['data']['children']
            time.sleep(2)
    except urllib.error.HTTPError:
        # This is a hack to get around shadowbanned or deleted users
        p("Could not parse /u/{}, probably shadowbanned or deleted".format(user))
        return False
    video_count = defaultdict(lambda: 0)
    video_submissions = set()
    comments_on_self = 0
    for item in submitted:
        item = item['data']
        video_author = _isVideo(item)
        if video_author:
            video_count[video_author] += 1
            video_submissions.add(item['name'])
    for item in comments:
        item = item['data']
        if item['link_id'] in video_submissions:
            comments_on_self += 1
    try:
        video_percent = max(
            [video_count[i] / sum(video_count.values()) for i in video_count])
    except ValueError:
        video_percent = 0

    spammer_value = (sum(video_count.values()) + comments_on_self) / (len(
        comments) + len(submitted))
    is_spammer = False
    if video_percent > .85 and sum(video_count.values()) >= 3:
        if spammer_value > .85:
            is_spammer = True
    print("""user: {}\n"""
        """number of video authors: {}\n"""
        """highest % of video authors: {:.2%} (needs to be greater than 85)\n"""
        """video count: {} (needs to be at least 3)\n"""
        """number of comments on own submissions: {}\n"""
        """number of comments on submissions: {}\n"""
        """number of submissions: {}\nspammer value: {:.2% (needs to be greater than 85}\n"""
        """is spammer: {}\n\n""".format(user, len(video_count), video_percent,
            sum(video_count.values()), comments_on_self, len(comments), len(submitted),
            spammer_value, is_spammer))
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import json
	import re
	import urllib.request
	import time
	from collections import defaultdict

	def _isVideo(submission):
	'''Returns video author name if this is a video'''
	if submission['domain'] in ('m.youtube.com', 'youtube.com', 'youtu.be'):
	if 'media' in submission:
	if submission['media'] is not None:
	if 'oembed' in submission['media']:
	if 'author_name' in submission['media']['oembed']:
	if submission['media']['oembed']['author_name'] is not None:
	return submission['media']['oembed']['author_name'].replace(
	' ', '').lower()
	if '/user' in submission['url']:
	return re.findall(r'''user/(.*)(?:\?\|/\|$)''', submission['url'])[0].lower()

	def _checkProfile(user):
	'''Returns the percentage of things that the user only contributed to themselves.
	ie: submitting and only commenting on their content. Currently, the criteria is:
	* linking to videos of the same author (which implies it is their account)
	* commenting on your own submissions (not just videos)
	these all will count against the user and an overall score will be returned. Also, we only
	check against the last 100 items on the user's profile.'''
	opener = urllib.request.build_opener()
	opener.addheaders = [('User-agent', 'spam.py')]
	try:
	with opener.open(
	'http://www.reddit.com/user/{}/comments/.json?limit=100&sort=new'.format(
	user)) as w:
	comments = json.loads(w.read().decode('utf-8'))
	comments = comments['data']['children']
	time.sleep(2)
	with opener.open(
	'http://www.reddit.com/user/{}/submitted/.json?limit=100&sort=new'.format(
	user)) as w:
	submitted = json.loads(w.read().decode('utf-8'))['data']['children']
	time.sleep(2)
	except urllib.error.HTTPError:
	# This is a hack to get around shadowbanned or deleted users
	p("Could not parse /u/{}, probably shadowbanned or deleted".format(user))
	return False
	video_count = defaultdict(lambda: 0)
	video_submissions = set()
	comments_on_self = 0
	for item in submitted:
	item = item['data']
	video_author = _isVideo(item)
	if video_author:
	video_count[video_author] += 1
	video_submissions.add(item['name'])
	for item in comments:
	item = item['data']
	if item['link_id'] in video_submissions:
	comments_on_self += 1
	try:
	video_percent = max(
	[video_count[i] / sum(video_count.values()) for i in video_count])
	except ValueError:
	video_percent = 0

	spammer_value = (sum(video_count.values()) + comments_on_self) / (len(
	comments) + len(submitted))
	is_spammer = False
	if video_percent > .85 and sum(video_count.values()) >= 3:
	if spammer_value > .85:
	is_spammer = True
	print("""user: {}\n"""
	"""number of video authors: {}\n"""
	"""highest % of video authors: {:.2%} (needs to be greater than 85)\n"""
	"""video count: {} (needs to be at least 3)\n"""
	"""number of comments on own submissions: {}\n"""
	"""number of comments on submissions: {}\n"""
	"""number of submissions: {}\nspammer value: {:.2% (needs to be greater than 85}\n"""
	"""is spammer: {}\n\n""".format(user, len(video_count), video_percent,
	sum(video_count.values()), comments_on_self, len(comments), len(submitted),
	spammer_value, is_spammer))