Skip to content

Instantly share code, notes, and snippets.

@mshock
Last active March 26, 2018 18:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mshock/a70e3bd234ff9dc4d11c208e96aa0940 to your computer and use it in GitHub Desktop.
Save mshock/a70e3bd234ff9dc4d11c208e96aa0940 to your computer and use it in GitHub Desktop.
scrape user flair to sqlite db from posts within a subreddit
[reddit]
client_id = XXX
client_secret = XXX
unique_key = XXX
#! python
import praw
import sqlite3
import time
import configparser
import sys
from colors import green, red, yellow
config = configparser.ConfigParser()
config.read('flair_parse.cfg')
print('configs parsed!')
subreddit = 'overwatch'
scan_type = sys.argv[1] if len(sys.argv) > 1 else 'new'
after = ''
page_limit = 10
process_dups = True
user_auth = False
drop_tables = False
conn = sqlite3.connect(subreddit + '.db')
print("db connection successful")
if (drop_tables):
conn.execute("drop table if exists user")
conn.execute("drop table if exists flair;")
conn.execute("drop table if exists page;")
conn.execute("drop table if exists change;")
conn.execute("drop table if exists ticker;")
conn.execute('''create table if not exists user
(id integer primary key autoincrement,
name text not null,
flair_id int,
foreign key(flair_id) references flair(id),
unique(name)
)
''')
conn.execute('''create table if not exists flair
(id integer primary key autoincrement,
name text not null,
unique(name)
)
''')
conn.execute('''create table if not exists page
(id integer primary key autoincrement,
name text not null,
unique(name)
)
''')
conn.execute('''create table if not exists change
(id integer primary key autoincrement,
from_flair int not null,
to_flair int not null,
count int default 0,
unique (from_flair, to_flair)
)
''')
conn.execute('''create table if not exists ticker
(id integer primary key autoincrement,
flair_id int not null,
prev_id int
)
''')
print('db tables initialized')
r = praw.Reddit("flair parser by u/mschock")
sub = r.get_subreddit(subreddit)
if (user_auth):
r.set_oauth_app_info(client_id=config.get('reddit', 'client_id'), client_secret=config.get('reddit', 'client_secret'), redirect_uri='http://127.0.0.1:65010/authorize_callback')
url = r.get_authorize_url(config.get('reddit', 'unique_key'), 'read', True)
import webbrowser
webbrowser.open(url)
access_key = input("enter access key: ")
access_information = r.get_access_information(access_key.rstrip())
r.set_access_credentials(**access_information)
print("user identity set!")
# access_information = r.get_access_information('leL3zVShuR2K-Juv68KTtZ4JNtA')
# r.set_access_credentials(**access_information)
# r.refresh_access_information(access_information['refresh_token'])
# authenticated_user = r.get_me()
# print(authenticated_user.name, authenticated_user.link_karma)
else:
print("using anonymous identity")
p = {'after': after}
num_users = conn.execute("select count(*) from user").fetchone()[0]
running = True
first = True
prev_after = ''
current_get_type = []
current_get_name= ['new','top','rising','hot']
while True:
try:
while running:
start_time = time.time()
print("retrieving {0} submissions...".format(str(page_limit)))
print("{0} @ {1}.{2}".format(subreddit, ('start' if after == '' else after), (scan_type + ' : ' + current_get_name[0] if scan_type == 'auto' else scan_type)))
if first:
current_get_type = [sub.get_new, sub.get_hot, sub.get_rising, sub.get_top]
get_current = current_get_type[0]
if scan_type == 'new':
content = sub.get_new(limit=page_limit,params=p)
elif scan_type == 'top':
content = sub.get_top(limit=page_limit,params=p)
elif scan_type == 'rising':
content = sub.get_rising(limit=page_limit,params=p)
elif scan_type == 'hot':
content = sub.get_hot(limit=page_limit,params=p)
elif scan_type == 'auto':
content = get_current(limit=page_limit, params=p)
else:
print("unknown scan type arg: " + scan_type)
sys.exit(1)
print("loaded!\n")
users_updated = 0
num_subs = 0
for submission in content:
num_subs += 1
after = submission.name
# pull save user to function so that don't have to repeat updated users code etc.
prev_after = after
p['after'] = after
process_prefix = 'NEW'
if (conn.execute("select count(*) from page where name = '{0}'".format(after)).fetchone()[0] > 0):
if (process_dups):
process_prefix = "DUP"
else:
print("skipping duplicate: {0}".format(after))
continue
conn.execute("insert or ignore into page(name) values ('{0}')".format(after))
comments = praw.helpers.flatten_tree(submission.comments)
print("[{0}] parsing page: {1} - ({2}) <{3}> {4}".format(process_prefix, after, submission.score, len(comments), submission.title[:70].encode('ascii', 'replace')))
if (hasattr(submission,'author_flair_css_class') and submission.author_flair_css_class is not None):
conn.execute("insert or ignore into flair(name) values ('{0}')".format(submission.author_flair_css_class))
(flair_id, flair_name) = conn.execute("select id, name from flair where name = '{0}'".format(submission.author_flair_css_class)).fetchone()
flair_prev = conn.execute("select flair_id from user where name = '{0}'".format(submission.author)).fetchone()
flair_change = False
recently_flaired = False
if flair_prev is not None:
flair_prev = flair_prev[0]
if flair_prev != flair_id:
users_updated += 1
flair_change = True
conn.execute("insert or ignore into change (from_flair, to_flair) values ({0}, {1})".format(flair_prev, flair_id))
conn.execute("update change set count = count + 1 where from_flair = {0} and to_flair = {1};".format(flair_prev, flair_id))
else:
flair_prev = 0
else:
flair_prev = 0
recently_flaired = True
conn.execute("insert or replace into user(name, flair_id) values ('{0}', {1})".format(submission.author, flair_id))
conn.commit()
if recently_flaired or flair_change:
conn.execute("insert into ticker(flair_id, prev_id) values ({}, {})".format(flair_id, flair_prev))
conn.commit()
for comment in comments:
if (hasattr(comment,'author_flair_css_class') and comment.author_flair_css_class is not None):
conn.execute("insert or ignore into flair(name) values ('{0}')".format(comment.author_flair_css_class))
(flair_id, flair_name) = conn.execute("select id, name from flair where name = '{0}'".format(comment.author_flair_css_class)).fetchone()
flair_prev = conn.execute("select flair_id from user where name = '{0}'".format(comment.author)).fetchone()
flair_change = False
if flair_prev is not None:
flair_prev = flair_prev[0]
if flair_prev != flair_id:
users_updated += 1
flair_change = True
conn.execute("insert or ignore into change (from_flair, to_flair) values ({0}, {1})".format(flair_prev, flair_id))
conn.execute("update change set count = count + 1 where from_flair = {0} and to_flair = {1};".format(flair_prev, flair_id))
else:
flair_prev = 0
else:
flair_prev = 0
recently_flaired = True
conn.execute("insert or replace into user(name, flair_id) values ('{0}', {1})".format(comment.author, flair_id))
conn.commit()
if recently_flaired or flair_change:
conn.execute("insert into ticker(flair_id, prev_id) values ({}, {})".format(flair_id, flair_prev))
conn.commit()
if num_subs == 0:
print('post limit reached, switching sort type and restarting...')
current_get_type = current_get_type[-1:] + current_get_type[:-1]
current_get_name = current_get_name[-1:] + current_get_name[:-1]
get_current = current_get_type[0]
after = ''
p['after'] = ''
continue
sleep_time = time.time() - start_time
sleep_time = 30 - sleep_time
num_users_new = conn.execute("select count(*) from user").fetchone()[0]
user_diff = num_users_new - num_users
num_users = num_users_new
print(green("\n(+{}) users added".format(user_diff)))
print(yellow("(+{}) users updated".format(users_updated)))
print(red("[{}] total users\n".format(num_users)))
if(sleep_time > 0):
print("sleeping: %f\n" % (sleep_time))
time.sleep(sleep_time)
first = False
except HTTPError as e:
print("HTTP Error: " + str(e.code))
print("sleeping 30 seconds...")
time.sleep(30)
continue
break
#! python
import sqlite3
import sys
import time
import os
clear = lambda: os.system('cls') if os.name == 'nt' else os.system('clear')
from colors import red, green, yellow, magenta, cyan
conn = sqlite3.connect('overwatch.db')
print("\ndb connection successful\n")
first = True
hero_ranks = {}
hero_change = {}
hero_change2 = {}
hero_counts = {}
while True:
clear()
print("\n\n")
counts = conn.execute('select b.name, count(a.flair_id) from user a, flair b on a.flair_id = b.id group by b.name').fetchall()
heroes = dict(zip('Bastion DVa Genji Hanzo Junkrat Lucio Mccree Mei Mercy Pharah Reaper Reinhardt Roadhog Soldier76 Symmetra Torbjorn Tracer Widowmaker Winston Zarya Zenyatta'.split(), [0] * 21))
css_lookup = dict(zip('DVa Symmetra Mercy Mei Lucio Winston Junkrat Roadhog Zarya Reaper Soldier76 Tracer Pharah Genji Reinhardt Mccree Widowmaker Bastion Zenyatta Torbjorn Hanzo'.split(),
'R19 R10 R13 R18 R15 R06 R17 R16 R08 R05 R02 R14 R07 R20 R09 R04 R01 R11 R00 R03 R12'.split()))
total_flaired = 0
total = 0
for hero in heroes:
for flair_css, count in counts:
if hero in flair_css or css_lookup[hero] in flair_css:
heroes[hero] += count
total_flaired += count
for i, (hero, hero_count) in enumerate(sorted(heroes.items(), key=lambda tup: tup[1], reverse=True)):
if first:
hero_ranks[hero] = i
hero_change[hero] = ''
hero_change2[hero] = ''
hero_counts[hero] = hero_count
if hero_counts[hero] < hero_count:
hero_change2[hero] = '(+{})'.format(hero_count - hero_counts[hero])
elif hero_counts[hero] > hero_count:
hero_change2[hero] = '(-{})'.format(hero_counts[hero] - hero_count)
if hero_ranks[hero] < i:
hero_change[hero] = '---'
elif hero_ranks[hero] > i:
hero_change[hero] = '+++'
for hero_check in heroes:
if hero == hero_check:
continue
if heroes[hero] == heroes[hero_check]:
hero_change[hero] = ''
color = red
if i <= 4:
color = green
elif i < 10:
color = yellow
elif i < 15:
color = magenta
print(color("\t[{0}]\t{1:<10} : {2:<5} - {3:>5.2f}% {4:<5} {5:<3}".format(
i+1,
hero,
hero_count,
(hero_count / float(total_flaired)) * 100,
hero_change2[hero],
hero_change[hero]
)))
hero_ranks[hero] = i
total_flaired = conn.execute('select count(1) from user where flair_id != 1').fetchone()[0];
total_users = conn.execute('select count(1) from user').fetchone()[0];
print(cyan("\n\t({0} / {1}) - {2:.2f}% users flaired".format(total_flaired, total_users, (total_flaired / float(total_users)) * 100)))
first = False
time.sleep(15)
#! python
import sqlite3
from colors import red, green, yellow, magenta, cyan
import time
import os
clear = lambda: os.system('cls') if os.name == 'nt' else os.system('clear')
sleep_dur = 5
num_rows = 25
def get_name(css):
for hero in heroes:
if hero in css or css_lookup[hero] in css:
return hero
return "default"
heroes = dict(zip('Bastion DVa Genji Hanzo Junkrat Lucio Mccree Mei Mercy Pharah Reaper Reinhardt Roadhog Soldier76 Symmetra Torbjorn Tracer Widowmaker Winston Zarya Zenyatta'.split(), [0] * 21))
css_lookup = dict(zip('DVa Symmetra Mercy Mei Lucio Winston Junkrat Roadhog Zarya Reaper Soldier76 Tracer Pharah Genji Reinhardt Mccree Widowmaker Bastion Zenyatta Torbjorn Hanzo'.split(),
'R19 R10 R13 R18 R15 R06 R17 R16 R08 R05 R02 R14 R07 R20 R09 R04 R01 R11 R00 R03 R12'.split()))
conn = sqlite3.connect('overwatch.db')
while True:
clear()
print("\n")
ticker = conn.execute("select * from ticker order by id desc limit {};".format(num_rows)).fetchall()
for (id, to_flair, from_flair) in ticker:
to_name = conn.execute("select name from flair where id = {}".format(to_flair)).fetchone()[0]
to_name = get_name(to_name)
if from_flair != 0:
from_name = conn.execute("select name from flair where id = {}".format(from_flair)).fetchone()[0]
from_name = get_name(from_name)
if to_flair == 1:
print(yellow(" [UPD]\t{0:<10} -> {1:<10}".format(to_name, from_name)))
else:
print(red(" [UPD]\t{0:<10} -> {1:<10}".format(to_name, from_name)))
else:
if to_flair == 1:
print(magenta(" [NEW]\t{0:<10}".format(to_name)))
else:
print(green(" [NEW]\t{0:<10}".format(to_name)))
time.sleep(sleep_dur)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment