Skip to content

Instantly share code, notes, and snippets.

@hayzem
Forked from Two9A/decronym.php
Created October 12, 2017 09:50
Show Gist options
  • Save hayzem/e78371a8277f15aeb8fe34ff0cb5cb45 to your computer and use it in GitHub Desktop.
Save hayzem/e78371a8277f15aeb8fe34ff0cb5cb45 to your computer and use it in GitHub Desktop.
Decronym: A simple Reddit bot
<?php
/**
* Dirty, dirty Reddit bot: Decronym
*/
class Reddit {
const USERNAME = 'Decronym';
const PASSWORD = '***';
const CLIENTID = '***';
const SECRET = '***';
const ACCESS_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token';
const API_BASE_URL = 'https://oauth.reddit.com';
const THING_COMMENT = 't1';
const THING_THREAD = 't3';
const THING_SUBREDDIT = 't5';
private static $token;
public static function me() {
return self::_send(self::API_BASE_URL.'/api/v1/me');
}
public static function fetch_comments($subreddit) {
return self::_send(
self::API_BASE_URL.'/r/'.$subreddit.'/comments/?'.http_build_query(array(
'cb' => time(),
'sort' => 'new',
'limit' => '100'
))
);
}
public static function fetch_threads($threadids) {
$names = array();
foreach ($threadids as $threadid) {
$names[] = self::_to_thingid(self::THING_THREAD, $threadid);
}
return self::_send(
self::API_BASE_URL.'/api/info?'.http_build_query(array(
'id' => join(',', $names),
))
);
}
public static function comment($threadid, $body) {
return self::_send(
self::API_BASE_URL.'/api/comment',
array(
'api_type' => 'json',
'thing_id' => self::_to_thingid(self::THING_THREAD, $threadid),
'text' => $body
)
);
}
public static function edit($commentid, $body) {
return self::_send(
self::API_BASE_URL.'/api/editusertext',
array(
'api_type' => 'json',
'thing_id' => self::_to_thingid(self::THING_COMMENT, $commentid),
'text' => $body
)
);
}
public static function inbox() {
return self::_send(self::API_BASE_URL.'/message/unread');
}
public static function mark_read($thingid) {
return self::_send(
self::API_BASE_URL.'/api/read_message',
array(
'id' => $thingid
)
);
}
public static function send_message($to, $subject, $text) {
return self::_send(
self::API_BASE_URL.'/api/compose',
array(
'api_type' => 'json',
'from_sr' => '',
'to' => $to,
'subject' => $subject,
'text' => $text
)
);
}
public static function _to_thingid($type, $id) {
return $type.'_'.base_convert($id, 10, 36);
}
public static function _from_thingid($thingid) {
list($type, $id) = explode('_', $thingid);
return array(
'type' => $type,
'id' => base_convert($id, 36, 10)
);
}
public static function _to_id($id) {
return base_convert($id, 10, 36);
}
public static function _from_id($id) {
return base_convert($id, 36, 10);
}
private static function _send($url, $data = array()) {
self::_ensure_loggedin();
$result = self::_curl($url, $data, null, array(
'Authorization' => 'bearer '.self::$token
));
if (isset($result['body']['error']) && $result['body']['error']) {
if (in_array($result['body']['error'], array(401, 'invalid_grant'))) {
self::$token = null;
self::_ensure_loggedin();
return self::_send($url, $data);
} else {
throw new Exception('Server error: '.$result['body']['error']);
}
} else if (isset($result['body']['json'], $result['body']['json']['errors'])) {
if (count($result['body']['json']['errors'])) {
throw new Exception(json_encode($result['body']['json']['errors']));
}
}
return $result;
}
private static function _ensure_loggedin() {
if (!isset(self::$token)) {
$login = self::_curl(
self::ACCESS_TOKEN_URL,
array(
'grant_type' => 'password',
'username' => self::USERNAME,
'password' => self::PASSWORD
),
array(
'username' => self::CLIENTID,
'password' => self::SECRET
)
);
if (isset($login['body']['error']) && $login['body']['error']) {
throw new Exception('Authentication failed: '.$login['body']['error']);
}
self::$token = $login['body']['access_token'];
}
}
private static function _curl($url, $data = null, $auth = null, $headers = array()) {
$headers['Expect'] = '';
$curlheaders = array();
foreach ($headers as $k => $v) {
$curlheaders[] = "{$k}: {$v}";
}
$c = curl_init();
$params = array(
CURLOPT_URL => $url,
CURLOPT_HEADER => true,
CURLOPT_VERBOSE => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_USERAGENT => 'Decronym/0.01',
CURLOPT_SSLVERSION => 4,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_HTTPHEADER => $curlheaders
);
if ($data) {
$params += array(
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => http_build_query($data),
CURLOPT_CUSTOMREQUEST => 'POST'
);
}
if ($auth) {
$params += array(
CURLOPT_HTTPAUTH => CURLAUTH_BASIC,
CURLOPT_USERPWD => "{$auth['username']}:{$auth['password']}"
);
}
curl_setopt_array($c, $params);
$r = curl_exec($c);
$headersize = curl_getinfo($c, CURLINFO_HEADER_SIZE);
curl_close($c);
$headers = array();
$header = substr($r, 0, $headersize);
$body = substr($r, $headersize);
foreach (explode("\r\n", $header) as $i => $line) {
if ($i === 0) {
$headers['_code'] = $line;
} else {
if (strlen(trim($line))) {
list($k, $v) = explode(':', $line);
$headers[trim($k)] = trim($v);
}
}
}
return array(
'headers' => $headers,
'body' => json_decode(trim($body), true)
);
}
private static function _log($str) {
$fp = fopen('/tmp/decronym.log', 'a');
fprintf($fp, "[%s] [%s] %s\n", date('YmdHis'), 'API', $str);
fclose($fp);
}
}
class Decronym {
const DB_HOST = 'localhost';
const DB_NAME = 'decronym';
const DB_USER = '***';
const DB_PASS = '***';
const FORWARD_USER = 'NotDecronym';
const ACRONYM_STANDARD = 0;
const ACRONYM_JARGON = 1;
const ACRONYM_EVENT = 2;
private static $dbc;
private static $subreddit;
private static $ignored_authors = array(
'TweetPoster',
'TweetsInCommentsBot',
);
public static function read_comments_since_last_check() {
self::_ensure_unchecked_subreddit();
$ts = self::$subreddit['last_check_ts'];
$comments_return = Reddit::fetch_comments(self::$subreddit['subreddit_name']);
$comments = $comments_return['body']['data']['children'];
$ret = array();
foreach ($comments as $comment) {
if (
($comment['data']['edited'] && $comment['data']['edited'] >= $ts) ||
$comment['data']['created_utc'] >= $ts
) {
// We don't want comments written by the bot, that just gets silly
if ($comment['data']['author'] != Reddit::USERNAME) {
$ret[] = array(
'id' => Reddit::_from_id($comment['data']['id']),
'ts' => (int)$comment['data']['created_utc'],
'body' => $comment['data']['body'],
'author' => $comment['data']['author'],
'thread' => Reddit::_from_thingid($comment['data']['link_id'])
);
}
}
}
self::_log(count($ret).' new comments');
return $ret;
}
public static function read_thread_info($thread_ids) {
$data = Reddit::fetch_threads($thread_ids);
$counts = array();
if (isset($data['body']['data'], $data['body']['data']['children'])) {
foreach ($data['body']['data']['children'] as $child) {
$threadid = Reddit::_from_thingid($child['data']['name']);
$counts[$threadid['id']] = array(
'title' => $child['data']['title'],
'num_comments' => $child['data']['num_comments'],
'score' => $child['data']['score'],
'created_utc' => $child['data']['created_utc']
);
}
}
return $counts;
}
public static function parse_comments($comments) {
self::_ensure_unchecked_subreddit();
$acronyms = self::_fetch_acronyms();
$keys = array();
$threads = array();
$regex = '#\b('.join('|', array_keys($acronyms)).')\b#';
foreach ($comments as $comment) {
self::_log(
'/r/'.
self::$subreddit['subreddit_name'].'/comments/'.
Reddit::_to_id($comment['thread']['id']).'//'.
Reddit::_to_id($comment['id'])
);
if (in_array($comment['author'], self::$ignored_authors)) {
continue;
}
if (preg_match_all($regex, $comment['body'], $matches)) {
self::_log('Found acronyms: '.join(', ', $matches[1]));
foreach ($matches[1] as $match) {
self::_add_acronym_to_thread(
$comment['thread']['id'],
$acronyms[$match]['acronym_id'],
$comment['id'],
$comment['ts']
);
}
}
// Always signal that this thread was updated
if (!isset($threads[$comment['thread']['id']])) {
$threads[$comment['thread']['id']] = true;
}
}
return array_keys($threads);
}
public static function post_or_edit($thread_id, $thread_info) {
if ($thread_info['num_comments'] < self::$subreddit['min_comments']) {
self::_log('Refusing to write comment: below min_comments');
return;
}
if ($thread_info['stickied'] && $thread_info['score'] < self::$subreddit['sticky_min_score']) {
self::_log('Refusing to write comment: below sticky_min_score');
return;
}
if ($thread_info['score'] < self::$subreddit['min_score']) {
self::_log('Refusing to write comment: below min_score');
return;
}
if (time() < ($thread_info['created_utc'] + self::$subreddit['max_thread_wait'])) {
self::_log('Refusing to write comment: thread too new');
return;
}
$thread = self::_fetch_thread($thread_id);
$body = self::_build_body_for_thread($thread_id, $thread_info);
if ($body) {
if ($thread && $thread['bot_comment_id']) {
self::_log('Updating comment for thread '.Reddit::_to_id($thread_id));
$ret = Reddit::edit($thread['bot_comment_id'], $body);
} else {
self::_log('Writing comment for thread '.Reddit::_to_id($thread_id));
$ret = Reddit::comment($thread_id, $body);
if (isset(
$ret['body']['json']['data'],
$ret['body']['json']['data']['things'],
$ret['body']['json']['data']['things'][0],
$ret['body']['json']['data']['things'][0]['data'],
$ret['body']['json']['data']['things'][0]['data']['name']
)) {
$comment = Reddit::_from_thingid($ret['body']['json']['data']['things'][0]['data']['name']);
self::_add_thread($thread_id, $comment['id']);
}
}
}
}
public static function forward_inbox($to = null) {
if (!$to) {
$to = self::FORWARD_USER;
}
$inbox = Reddit::inbox();
if (isset(
$inbox['body'],
$inbox['body']['data'],
$inbox['body']['data']['children']
)) {
foreach ($inbox['body']['data']['children'] as $msg) {
$data = $msg['data'];
self::_log('Forwarding inbox message from '.$data['author']);
Reddit::mark_read($data['name']);
Reddit::send_message(
$to,
'Forwarded from Decronym',
join(" \n", array(
'From: '.$data['author'],
'Date: '.date('Y-m-d H:i:s', $data['created_utc']),
'Subject: '.$data['subject'],
'Context: '.$data['context'],
'',
$data['body']
))
);
}
}
}
private static function _fetch_acronyms() {
self::_ensure_unchecked_subreddit();
$st = self::$dbc->prepare('SELECT acronym_id, acronym_key, acronym_regex, acronym_value, acronym_type FROM acronyms WHERE subreddit_id = :id ORDER BY acronym_prio DESC, acronym_key ASC');
$st->bindValue(':id', self::$subreddit['subreddit_id']);
$st->execute();
$acronyms = array();
foreach ($st->fetchAll() as $row) {
foreach (explode('|', $row['acronym_regex']) as $k) {
$acronyms[$k] = $row;
}
}
return $acronyms;
}
private static function _fetch_thread($thread_id) {
self::_ensure_unchecked_subreddit();
$st = self::$dbc->prepare('SELECT t.bot_comment_id, t.first_check_ts, COUNT(DISTINCT ta.acronym_id) AS acronym_count FROM threads t LEFT JOIN thread_acronyms ta ON t.thread_id = ta.thread_id WHERE t.subreddit_id = :subreddit AND t.thread_id = :thread');
$st->bindValue(':subreddit', self::$subreddit['subreddit_id']);
$st->bindValue(':thread', $thread_id);
$st->execute();
$rows = $st->fetchAll();
if (count($rows)) {
return $rows[0];
}
return false;
}
private static function _fetch_busiest_thread() {
self::_ensure_unchecked_subreddit();
// TODO: Fetch from thread_acronyms where comment_ts is in today
$st = self::$dbc->prepare('SELECT ta.thread_id, COUNT(DISTINCT ta.acronym_id) AS acronym_count FROM thread_acronyms ta LEFT JOIN threads t ON ta.thread_id = t.thread_id WHERE t.subreddit_id = :sub AND ta.thread_id IN (SELECT tt.thread_id FROM thread_acronyms tt WHERE DATE(FROM_UNIXTIME(tt.comment_ts)) = CURRENT_DATE()) GROUP BY ta.thread_id ORDER BY acronym_count DESC LIMIT 1');
$st->bindValue(':sub', self::$subreddit['subreddit_id']);
$st->execute();
return $st->fetch();
}
private static function _fetch_acronyms_for_thread($thread_id, $thread_info = null) {
$st = self::$dbc->prepare('SELECT a.acronym_id, a.acronym_key, a.acronym_value, a.acronym_type, ta.comment_id FROM thread_acronyms ta LEFT JOIN acronyms a ON ta.acronym_id = a.acronym_id WHERE ta.thread_id = :id ORDER BY a.acronym_type, a.acronym_key ASC');
$st->bindValue(':id', $thread_id);
$st->execute();
$acronyms = array();
foreach ($st->fetchAll() as $row) {
if (!isset($acronyms[$row['acronym_type']])) {
$acronyms[$row['acronym_type']] = array();
}
$acronyms[$row['acronym_type']][$row['acronym_key']] = $row;
}
$all_acronyms = self::_fetch_acronyms();
$regex = '#\b('.join('|', array_keys($all_acronyms)).')\b#';
if (isset($thread_info, $thread_info['title'])) {
if (preg_match_all($regex, $thread_info['title'], $matches)) {
self::_log('Found acronyms in title: '.join(', ', $matches[1]));
foreach ($matches[1] as $match) {
$new_acronym = $all_acronyms[$match];
if (!isset($acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']])) {
$acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']] = $new_acronym;
}
}
}
}
do {
$acronym_count = array_reduce($acronyms, 'count', 0);
foreach ($acronyms as $type => $rows) {
foreach ($rows as $key => $row) {
if (preg_match_all($regex, $row['acronym_value'], $matches)) {
self::_log('Found dereferenced acronyms: '.join(', ', $matches[1]));
foreach ($matches[1] as $match) {
$new_acronym = $all_acronyms[$match];
if (!isset($acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']])) {
$acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']] = $new_acronym;
}
}
}
}
}
} while (array_reduce($acronyms, 'count', 0) > $acronym_count);
foreach ($acronyms as $type => $rows) {
ksort($acronyms[$type]);
}
return $acronyms;
}
private static function _build_body_for_thread($thread_id, $thread_info = null) {
$thread = self::_fetch_thread($thread_id);
$busiest_thread = self::_fetch_busiest_thread();
$acronyms = self::_fetch_acronyms_for_thread($thread_id, $thread_info);
if (count($acronyms[self::ACRONYM_STANDARD]) < 2) {
self::_log('Failed to generate text: less than 2 acronyms');
return false;
}
$text = array();
$first_check = ($thread && $thread['first_check_ts']) ? $thread['first_check_ts'] : time();
$text[] = "###&#009;";
$text[] = "######&#009;";
$text[] = "####&#009;";
$text[] = '';
$text[] = "Acronyms, initialisms, abbreviations, contractions, and other phrases which expand to something larger, that I've seen in this thread:";
$table_headers = array(
self::ACRONYM_STANDARD => '|Fewer Letters|More Letters|',
self::ACRONYM_JARGON => '|Jargon|Definition|',
self::ACRONYM_EVENT => '|Event|Date|Description|'
);
foreach ($acronyms as $table => $rows) {
$text[] = '';
$text[] = $table_headers[$table];
$text[] = '|-------|---------|---|';
foreach ($rows as $key => $row) {
if ($row['comment_id']) {
$key_text = sprintf(
'[%s](/r/%s/comments/%s//%s "Last usage")',
$key,
self::$subreddit['subreddit_name'],
Reddit::_to_id($thread_id),
Reddit::_to_id($row['comment_id'])
);
} else {
$key_text = $key;
}
$text[] = sprintf('|%s|%s|', $key_text, $row['acronym_value']);
}
}
$text[] = '';
$text[] = '----------------';
if (self::$subreddit['footer_text']) {
$text[] = '^(' . self::$subreddit['footer_text'] . ') ';
}
$text[] = "^(I'm a bot, and I first saw this thread at ".date('jS M Y, H:i \U\T\C', $first_check).".) ";
if ($thread['acronym_count']) {
if ($thread_id == $busiest_thread['thread_id']) {
$text[] = "^(I've seen ".$thread['acronym_count']." acronyms in this thread, which is the most I've seen in a thread so far today.) ";
} else {
$text[] = "^(I've seen ".$thread['acronym_count']." acronyms in this thread; )[^the ^most ^compressed ^thread ^commented ^on ^today](/r/".self::$subreddit['subreddit_name']."/comments/".Reddit::_to_id($busiest_thread['thread_id']).")^( has ".$busiest_thread['acronym_count']." acronyms.) ";
}
}
$text[] = "[^\[Acronym ^lists\]](http://decronym.xyz/) [^\[Contact ^creator\]](https://reddit.com/message/compose?to=OrangeredStilton&subject=Hey,+your+acronym+bot+sucks) [^\[PHP ^source ^code\]](https://gist.github.com/Two9A/1d976f9b7441694162c8)";
return join("\n", $text);
}
private static function _add_acronym_to_thread($thread_id, $acronym_id, $comment_id, $comment_ts) {
$st = self::$dbc->prepare('REPLACE INTO thread_acronyms(thread_id, acronym_id, comment_id) VALUES(:thread, :acronym, :comment)');
$st->bindValue(':thread', $thread_id);
$st->bindValue(':acronym', $acronym_id);
$st->bindValue(':comment', $comment_id);
$st->execute();
$tst = self::$dbc->prepare('UPDATE thread_acronyms SET comment_ts=:ts WHERE comment_id=:comment');
$tst->bindValue(':ts', $comment_ts);
$tst->bindValue(':comment', $comment_id);
$tst->execute();
}
private static function _add_thread($thread_id, $comment_id) {
$st = self::$dbc->prepare('INSERT INTO threads(thread_id, subreddit_id, bot_comment_id, first_check_ts) VALUES(:thread, :subreddit, :comment, :ts)');
$st->bindValue(':thread', $thread_id);
$st->bindValue(':subreddit', self::$subreddit['subreddit_id']);
$st->bindValue(':comment', $comment_id);
$st->bindValue(':ts', time());
$st->execute();
}
private static function _ensure_unchecked_subreddit() {
self::_ensure_connection();
if (!self::$subreddit) {
$time = time();
$st = self::$dbc->prepare('SELECT subreddit_id, subreddit_name, last_check_ts, min_comments, min_score, sticky_min_score, max_thread_wait, footer_text FROM subreddits WHERE enabled = 1 AND last_check_ts < :ts ORDER BY last_check_ts LIMIT 1');
$st->bindValue(':ts', $time);
$st->execute();
$row = $st->fetch();
$update_st = self::$dbc->prepare('UPDATE subreddits SET last_check_ts = :ts WHERE subreddit_id = :id');
$update_st->bindValue(':ts', $time);
$update_st->bindValue(':id', $row['subreddit_id']);
$update_st->execute();
self::$subreddit = $row;
}
}
private static function _ensure_connection() {
if (!self::$dbc) {
self::$dbc = new PDO(
sprintf('mysql:host=%s;dbname=%s', self::DB_HOST, self::DB_NAME),
self::DB_USER,
self::DB_PASS
);
self::$dbc->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
self::$dbc->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
}
}
private static function _log($str) {
$fp = fopen('/tmp/decronym.log', 'a');
fprintf($fp, "[%s] [%s] %s\n", date('YmdHis'), self::$subreddit['subreddit_name'], $str);
fclose($fp);
}
}
Decronym::forward_inbox();
$threads = Decronym::parse_comments(Decronym::read_comments_since_last_check());
$comment_counts = Decronym::read_thread_info($threads);
foreach ($threads as $thread_id) {
sleep(1);
Decronym::post_or_edit($thread_id, $comment_counts[$thread_id]);
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment