Last active
April 9, 2024 23:25
-
-
Save Two9A/1d976f9b7441694162c8 to your computer and use it in GitHub Desktop.
Decronym: A simple Reddit bot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Dirty, dirty Reddit bot: Decronym | |
*/ | |
class Reddit { | |
const USERNAME = 'Decronym'; | |
const PASSWORD = '***'; | |
const CLIENTID = '***'; | |
const SECRET = '***'; | |
const ACCESS_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token'; | |
const API_BASE_URL = 'https://oauth.reddit.com'; | |
const THING_COMMENT = 't1'; | |
const THING_THREAD = 't3'; | |
const THING_SUBREDDIT = 't5'; | |
private static $token; | |
public static function me() { | |
return self::_send(self::API_BASE_URL.'/api/v1/me'); | |
} | |
public static function fetch_comments($subreddit) { | |
return self::_send( | |
self::API_BASE_URL.'/r/'.$subreddit.'/comments/?'.http_build_query(array( | |
'cb' => time(), | |
'sort' => 'new', | |
'limit' => '100' | |
)) | |
); | |
} | |
public static function fetch_threads($threadids) { | |
$names = array(); | |
foreach ($threadids as $threadid) { | |
$names[] = self::_to_thingid(self::THING_THREAD, $threadid); | |
} | |
return self::_send( | |
self::API_BASE_URL.'/api/info?'.http_build_query(array( | |
'id' => join(',', $names), | |
)) | |
); | |
} | |
public static function comment($threadid, $body) { | |
return self::_send( | |
self::API_BASE_URL.'/api/comment', | |
array( | |
'api_type' => 'json', | |
'thing_id' => self::_to_thingid(self::THING_THREAD, $threadid), | |
'text' => $body | |
) | |
); | |
} | |
public static function edit($commentid, $body) { | |
return self::_send( | |
self::API_BASE_URL.'/api/editusertext', | |
array( | |
'api_type' => 'json', | |
'thing_id' => self::_to_thingid(self::THING_COMMENT, $commentid), | |
'text' => $body | |
) | |
); | |
} | |
public static function inbox() { | |
return self::_send(self::API_BASE_URL.'/message/unread'); | |
} | |
public static function mark_read($thingid) { | |
return self::_send( | |
self::API_BASE_URL.'/api/read_message', | |
array( | |
'id' => $thingid | |
) | |
); | |
} | |
public static function send_message($to, $subject, $text) { | |
return self::_send( | |
self::API_BASE_URL.'/api/compose', | |
array( | |
'api_type' => 'json', | |
'from_sr' => '', | |
'to' => $to, | |
'subject' => $subject, | |
'text' => $text | |
) | |
); | |
} | |
public static function _to_thingid($type, $id) { | |
return $type.'_'.base_convert($id, 10, 36); | |
} | |
public static function _from_thingid($thingid) { | |
list($type, $id) = explode('_', $thingid); | |
return array( | |
'type' => $type, | |
'id' => base_convert($id, 36, 10) | |
); | |
} | |
public static function _to_id($id) { | |
return base_convert($id, 10, 36); | |
} | |
public static function _from_id($id) { | |
return base_convert($id, 36, 10); | |
} | |
private static function _send($url, $data = array()) { | |
self::_ensure_loggedin(); | |
$result = self::_curl($url, $data, null, array( | |
'Authorization' => 'bearer '.self::$token | |
)); | |
if (isset($result['body']['error']) && $result['body']['error']) { | |
if (in_array($result['body']['error'], array(401, 'invalid_grant'))) { | |
self::$token = null; | |
self::_ensure_loggedin(); | |
return self::_send($url, $data); | |
} else { | |
throw new Exception('Server error: '.$result['body']['error']); | |
} | |
} else if (isset($result['body']['json'], $result['body']['json']['errors'])) { | |
if (count($result['body']['json']['errors'])) { | |
throw new Exception(json_encode($result['body']['json']['errors'])); | |
} | |
} | |
return $result; | |
} | |
private static function _ensure_loggedin() { | |
if (!isset(self::$token)) { | |
$login = self::_curl( | |
self::ACCESS_TOKEN_URL, | |
array( | |
'grant_type' => 'password', | |
'username' => self::USERNAME, | |
'password' => self::PASSWORD | |
), | |
array( | |
'username' => self::CLIENTID, | |
'password' => self::SECRET | |
) | |
); | |
if (isset($login['body']['error']) && $login['body']['error']) { | |
throw new Exception('Authentication failed: '.$login['body']['error']); | |
} | |
self::$token = $login['body']['access_token']; | |
} | |
} | |
private static function _curl($url, $data = null, $auth = null, $headers = array()) { | |
$headers['Expect'] = ''; | |
$curlheaders = array(); | |
foreach ($headers as $k => $v) { | |
$curlheaders[] = "{$k}: {$v}"; | |
} | |
$c = curl_init(); | |
$params = array( | |
CURLOPT_URL => $url, | |
CURLOPT_HEADER => true, | |
CURLOPT_VERBOSE => false, | |
CURLOPT_RETURNTRANSFER => true, | |
CURLOPT_FOLLOWLOCATION => true, | |
CURLOPT_USERAGENT => 'Decronym/0.01', | |
CURLOPT_SSLVERSION => 4, | |
CURLOPT_SSL_VERIFYHOST => false, | |
CURLOPT_SSL_VERIFYPEER => false, | |
CURLOPT_HTTPHEADER => $curlheaders | |
); | |
if ($data) { | |
$params += array( | |
CURLOPT_POST => true, | |
CURLOPT_POSTFIELDS => http_build_query($data), | |
CURLOPT_CUSTOMREQUEST => 'POST' | |
); | |
} | |
if ($auth) { | |
$params += array( | |
CURLOPT_HTTPAUTH => CURLAUTH_BASIC, | |
CURLOPT_USERPWD => "{$auth['username']}:{$auth['password']}" | |
); | |
} | |
curl_setopt_array($c, $params); | |
$r = curl_exec($c); | |
$headersize = curl_getinfo($c, CURLINFO_HEADER_SIZE); | |
curl_close($c); | |
$headers = array(); | |
$header = substr($r, 0, $headersize); | |
$body = substr($r, $headersize); | |
foreach (explode("\r\n", $header) as $i => $line) { | |
if ($i === 0) { | |
$headers['_code'] = $line; | |
} else { | |
if (strlen(trim($line))) { | |
list($k, $v) = explode(':', $line); | |
$headers[trim($k)] = trim($v); | |
} | |
} | |
} | |
return array( | |
'headers' => $headers, | |
'body' => json_decode(trim($body), true) | |
); | |
} | |
private static function _log($str) { | |
$fp = fopen('/tmp/decronym.log', 'a'); | |
fprintf($fp, "[%s] [%s] %s\n", date('YmdHis'), 'API', $str); | |
fclose($fp); | |
} | |
} | |
class Decronym { | |
const DB_HOST = 'localhost'; | |
const DB_NAME = 'decronym'; | |
const DB_USER = '***'; | |
const DB_PASS = '***'; | |
const FORWARD_USER = 'NotDecronym'; | |
const ACRONYM_STANDARD = 0; | |
const ACRONYM_JARGON = 1; | |
const ACRONYM_EVENT = 2; | |
private static $dbc; | |
private static $subreddit; | |
private static $ignored_authors = array( | |
'TweetPoster', | |
'TweetsInCommentsBot', | |
); | |
public static function read_comments_since_last_check() { | |
self::_ensure_unchecked_subreddit(); | |
$ts = self::$subreddit['last_check_ts']; | |
$comments_return = Reddit::fetch_comments(self::$subreddit['subreddit_name']); | |
$comments = $comments_return['body']['data']['children']; | |
$ret = array(); | |
foreach ($comments as $comment) { | |
if ( | |
($comment['data']['edited'] && $comment['data']['edited'] >= $ts) || | |
$comment['data']['created_utc'] >= $ts | |
) { | |
// We don't want comments written by the bot, that just gets silly | |
if ($comment['data']['author'] != Reddit::USERNAME) { | |
$ret[] = array( | |
'id' => Reddit::_from_id($comment['data']['id']), | |
'ts' => (int)$comment['data']['created_utc'], | |
'body' => $comment['data']['body'], | |
'author' => $comment['data']['author'], | |
'thread' => Reddit::_from_thingid($comment['data']['link_id']) | |
); | |
} | |
} | |
} | |
self::_log(count($ret).' new comments'); | |
return $ret; | |
} | |
public static function read_thread_info($thread_ids) { | |
$data = Reddit::fetch_threads($thread_ids); | |
$counts = array(); | |
if (isset($data['body']['data'], $data['body']['data']['children'])) { | |
foreach ($data['body']['data']['children'] as $child) { | |
$threadid = Reddit::_from_thingid($child['data']['name']); | |
$counts[$threadid['id']] = array( | |
'title' => $child['data']['title'], | |
'num_comments' => $child['data']['num_comments'], | |
'score' => $child['data']['score'], | |
'created_utc' => $child['data']['created_utc'] | |
); | |
} | |
} | |
return $counts; | |
} | |
public static function parse_comments($comments) { | |
self::_ensure_unchecked_subreddit(); | |
$acronyms = self::_fetch_acronyms(); | |
$keys = array(); | |
$threads = array(); | |
$regex = '#\b('.join('|', array_keys($acronyms)).')\b#'; | |
foreach ($comments as $comment) { | |
self::_log( | |
'/r/'. | |
self::$subreddit['subreddit_name'].'/comments/'. | |
Reddit::_to_id($comment['thread']['id']).'//'. | |
Reddit::_to_id($comment['id']) | |
); | |
if (in_array($comment['author'], self::$ignored_authors)) { | |
continue; | |
} | |
if (preg_match_all($regex, $comment['body'], $matches)) { | |
self::_log('Found acronyms: '.join(', ', $matches[1])); | |
foreach ($matches[1] as $match) { | |
self::_add_acronym_to_thread( | |
$comment['thread']['id'], | |
$acronyms[$match]['acronym_id'], | |
$comment['id'], | |
$comment['ts'] | |
); | |
} | |
} | |
// Always signal that this thread was updated | |
if (!isset($threads[$comment['thread']['id']])) { | |
$threads[$comment['thread']['id']] = true; | |
} | |
} | |
return array_keys($threads); | |
} | |
public static function post_or_edit($thread_id, $thread_info) { | |
if ($thread_info['num_comments'] < self::$subreddit['min_comments']) { | |
self::_log('Refusing to write comment: below min_comments'); | |
return; | |
} | |
if ($thread_info['stickied'] && $thread_info['score'] < self::$subreddit['sticky_min_score']) { | |
self::_log('Refusing to write comment: below sticky_min_score'); | |
return; | |
} | |
if ($thread_info['score'] < self::$subreddit['min_score']) { | |
self::_log('Refusing to write comment: below min_score'); | |
return; | |
} | |
if (time() < ($thread_info['created_utc'] + self::$subreddit['max_thread_wait'])) { | |
self::_log('Refusing to write comment: thread too new'); | |
return; | |
} | |
$thread = self::_fetch_thread($thread_id); | |
$body = self::_build_body_for_thread($thread_id, $thread_info); | |
if ($body) { | |
if ($thread && $thread['bot_comment_id']) { | |
self::_log('Updating comment for thread '.Reddit::_to_id($thread_id)); | |
$ret = Reddit::edit($thread['bot_comment_id'], $body); | |
} else { | |
self::_log('Writing comment for thread '.Reddit::_to_id($thread_id)); | |
$ret = Reddit::comment($thread_id, $body); | |
if (isset( | |
$ret['body']['json']['data'], | |
$ret['body']['json']['data']['things'], | |
$ret['body']['json']['data']['things'][0], | |
$ret['body']['json']['data']['things'][0]['data'], | |
$ret['body']['json']['data']['things'][0]['data']['name'] | |
)) { | |
$comment = Reddit::_from_thingid($ret['body']['json']['data']['things'][0]['data']['name']); | |
self::_add_thread($thread_id, $comment['id']); | |
} | |
} | |
} | |
} | |
public static function forward_inbox($to = null) { | |
if (!$to) { | |
$to = self::FORWARD_USER; | |
} | |
$inbox = Reddit::inbox(); | |
if (isset( | |
$inbox['body'], | |
$inbox['body']['data'], | |
$inbox['body']['data']['children'] | |
)) { | |
foreach ($inbox['body']['data']['children'] as $msg) { | |
$data = $msg['data']; | |
self::_log('Forwarding inbox message from '.$data['author']); | |
Reddit::mark_read($data['name']); | |
Reddit::send_message( | |
$to, | |
'Forwarded from Decronym', | |
join(" \n", array( | |
'From: '.$data['author'], | |
'Date: '.date('Y-m-d H:i:s', $data['created_utc']), | |
'Subject: '.$data['subject'], | |
'Context: '.$data['context'], | |
'', | |
$data['body'] | |
)) | |
); | |
} | |
} | |
} | |
private static function _fetch_acronyms() { | |
self::_ensure_unchecked_subreddit(); | |
$st = self::$dbc->prepare('SELECT acronym_id, acronym_key, acronym_regex, acronym_value, acronym_type FROM acronyms WHERE subreddit_id = :id ORDER BY acronym_prio DESC, acronym_key ASC'); | |
$st->bindValue(':id', self::$subreddit['subreddit_id']); | |
$st->execute(); | |
$acronyms = array(); | |
foreach ($st->fetchAll() as $row) { | |
foreach (explode('|', $row['acronym_regex']) as $k) { | |
$acronyms[$k] = $row; | |
} | |
} | |
return $acronyms; | |
} | |
private static function _fetch_thread($thread_id) { | |
self::_ensure_unchecked_subreddit(); | |
$st = self::$dbc->prepare('SELECT t.bot_comment_id, t.first_check_ts, COUNT(DISTINCT ta.acronym_id) AS acronym_count FROM threads t LEFT JOIN thread_acronyms ta ON t.thread_id = ta.thread_id WHERE t.subreddit_id = :subreddit AND t.thread_id = :thread'); | |
$st->bindValue(':subreddit', self::$subreddit['subreddit_id']); | |
$st->bindValue(':thread', $thread_id); | |
$st->execute(); | |
$rows = $st->fetchAll(); | |
if (count($rows)) { | |
return $rows[0]; | |
} | |
return false; | |
} | |
private static function _fetch_busiest_thread() { | |
self::_ensure_unchecked_subreddit(); | |
// TODO: Fetch from thread_acronyms where comment_ts is in today | |
$st = self::$dbc->prepare('SELECT ta.thread_id, COUNT(DISTINCT ta.acronym_id) AS acronym_count FROM thread_acronyms ta LEFT JOIN threads t ON ta.thread_id = t.thread_id WHERE t.subreddit_id = :sub AND ta.thread_id IN (SELECT tt.thread_id FROM thread_acronyms tt WHERE DATE(FROM_UNIXTIME(tt.comment_ts)) = CURRENT_DATE()) GROUP BY ta.thread_id ORDER BY acronym_count DESC LIMIT 1'); | |
$st->bindValue(':sub', self::$subreddit['subreddit_id']); | |
$st->execute(); | |
return $st->fetch(); | |
} | |
private static function _fetch_acronyms_for_thread($thread_id, $thread_info = null) { | |
$st = self::$dbc->prepare('SELECT a.acronym_id, a.acronym_key, a.acronym_value, a.acronym_type, ta.comment_id FROM thread_acronyms ta LEFT JOIN acronyms a ON ta.acronym_id = a.acronym_id WHERE ta.thread_id = :id ORDER BY a.acronym_type, a.acronym_key ASC'); | |
$st->bindValue(':id', $thread_id); | |
$st->execute(); | |
$acronyms = array(); | |
foreach ($st->fetchAll() as $row) { | |
if (!isset($acronyms[$row['acronym_type']])) { | |
$acronyms[$row['acronym_type']] = array(); | |
} | |
$acronyms[$row['acronym_type']][$row['acronym_key']] = $row; | |
} | |
$all_acronyms = self::_fetch_acronyms(); | |
$regex = '#\b('.join('|', array_keys($all_acronyms)).')\b#'; | |
if (isset($thread_info, $thread_info['title'])) { | |
if (preg_match_all($regex, $thread_info['title'], $matches)) { | |
self::_log('Found acronyms in title: '.join(', ', $matches[1])); | |
foreach ($matches[1] as $match) { | |
$new_acronym = $all_acronyms[$match]; | |
if (!isset($acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']])) { | |
$acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']] = $new_acronym; | |
} | |
} | |
} | |
} | |
do { | |
$acronym_count = array_reduce($acronyms, 'count', 0); | |
foreach ($acronyms as $type => $rows) { | |
foreach ($rows as $key => $row) { | |
if (preg_match_all($regex, $row['acronym_value'], $matches)) { | |
self::_log('Found dereferenced acronyms: '.join(', ', $matches[1])); | |
foreach ($matches[1] as $match) { | |
$new_acronym = $all_acronyms[$match]; | |
if (!isset($acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']])) { | |
$acronyms[$new_acronym['acronym_type']][$new_acronym['acronym_key']] = $new_acronym; | |
} | |
} | |
} | |
} | |
} | |
} while (array_reduce($acronyms, 'count', 0) > $acronym_count); | |
foreach ($acronyms as $type => $rows) { | |
ksort($acronyms[$type]); | |
} | |
return $acronyms; | |
} | |
private static function _build_body_for_thread($thread_id, $thread_info = null) { | |
$thread = self::_fetch_thread($thread_id); | |
$busiest_thread = self::_fetch_busiest_thread(); | |
$acronyms = self::_fetch_acronyms_for_thread($thread_id, $thread_info); | |
if (count($acronyms[self::ACRONYM_STANDARD]) < 2) { | |
self::_log('Failed to generate text: less than 2 acronyms'); | |
return false; | |
} | |
$text = array(); | |
$first_check = ($thread && $thread['first_check_ts']) ? $thread['first_check_ts'] : time(); | |
$text[] = "###	"; | |
$text[] = "######	"; | |
$text[] = "####	"; | |
$text[] = ''; | |
$text[] = "Acronyms, initialisms, abbreviations, contractions, and other phrases which expand to something larger, that I've seen in this thread:"; | |
$table_headers = array( | |
self::ACRONYM_STANDARD => '|Fewer Letters|More Letters|', | |
self::ACRONYM_JARGON => '|Jargon|Definition|', | |
self::ACRONYM_EVENT => '|Event|Date|Description|' | |
); | |
foreach ($acronyms as $table => $rows) { | |
$text[] = ''; | |
$text[] = $table_headers[$table]; | |
$text[] = '|-------|---------|---|'; | |
foreach ($rows as $key => $row) { | |
if ($row['comment_id']) { | |
$key_text = sprintf( | |
'[%s](/r/%s/comments/%s//%s "Last usage")', | |
$key, | |
self::$subreddit['subreddit_name'], | |
Reddit::_to_id($thread_id), | |
Reddit::_to_id($row['comment_id']) | |
); | |
} else { | |
$key_text = $key; | |
} | |
$text[] = sprintf('|%s|%s|', $key_text, $row['acronym_value']); | |
} | |
} | |
$text[] = ''; | |
$text[] = '----------------'; | |
if (self::$subreddit['footer_text']) { | |
$text[] = '^(' . self::$subreddit['footer_text'] . ') '; | |
} | |
$text[] = "^(I'm a bot, and I first saw this thread at ".date('jS M Y, H:i \U\T\C', $first_check).".) "; | |
if ($thread['acronym_count']) { | |
if ($thread_id == $busiest_thread['thread_id']) { | |
$text[] = "^(I've seen ".$thread['acronym_count']." acronyms in this thread, which is the most I've seen in a thread so far today.) "; | |
} else { | |
$text[] = "^(I've seen ".$thread['acronym_count']." acronyms in this thread; )[^the ^most ^compressed ^thread ^commented ^on ^today](/r/".self::$subreddit['subreddit_name']."/comments/".Reddit::_to_id($busiest_thread['thread_id']).")^( has ".$busiest_thread['acronym_count']." acronyms.) "; | |
} | |
} | |
$text[] = "[^\[Acronym ^lists\]](http://decronym.xyz/) [^\[Contact ^creator\]](https://reddit.com/message/compose?to=OrangeredStilton&subject=Hey,+your+acronym+bot+sucks) [^\[PHP ^source ^code\]](https://gist.github.com/Two9A/1d976f9b7441694162c8)"; | |
return join("\n", $text); | |
} | |
private static function _add_acronym_to_thread($thread_id, $acronym_id, $comment_id, $comment_ts) { | |
$st = self::$dbc->prepare('REPLACE INTO thread_acronyms(thread_id, acronym_id, comment_id) VALUES(:thread, :acronym, :comment)'); | |
$st->bindValue(':thread', $thread_id); | |
$st->bindValue(':acronym', $acronym_id); | |
$st->bindValue(':comment', $comment_id); | |
$st->execute(); | |
$tst = self::$dbc->prepare('UPDATE thread_acronyms SET comment_ts=:ts WHERE comment_id=:comment'); | |
$tst->bindValue(':ts', $comment_ts); | |
$tst->bindValue(':comment', $comment_id); | |
$tst->execute(); | |
} | |
private static function _add_thread($thread_id, $comment_id) { | |
$st = self::$dbc->prepare('INSERT INTO threads(thread_id, subreddit_id, bot_comment_id, first_check_ts) VALUES(:thread, :subreddit, :comment, :ts)'); | |
$st->bindValue(':thread', $thread_id); | |
$st->bindValue(':subreddit', self::$subreddit['subreddit_id']); | |
$st->bindValue(':comment', $comment_id); | |
$st->bindValue(':ts', time()); | |
$st->execute(); | |
} | |
private static function _ensure_unchecked_subreddit() { | |
self::_ensure_connection(); | |
if (!self::$subreddit) { | |
$time = time(); | |
$st = self::$dbc->prepare('SELECT subreddit_id, subreddit_name, last_check_ts, min_comments, min_score, sticky_min_score, max_thread_wait, footer_text FROM subreddits WHERE enabled = 1 AND last_check_ts < :ts ORDER BY last_check_ts LIMIT 1'); | |
$st->bindValue(':ts', $time); | |
$st->execute(); | |
$row = $st->fetch(); | |
$update_st = self::$dbc->prepare('UPDATE subreddits SET last_check_ts = :ts WHERE subreddit_id = :id'); | |
$update_st->bindValue(':ts', $time); | |
$update_st->bindValue(':id', $row['subreddit_id']); | |
$update_st->execute(); | |
self::$subreddit = $row; | |
} | |
} | |
private static function _ensure_connection() { | |
if (!self::$dbc) { | |
self::$dbc = new PDO( | |
sprintf('mysql:host=%s;dbname=%s', self::DB_HOST, self::DB_NAME), | |
self::DB_USER, | |
self::DB_PASS | |
); | |
self::$dbc->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | |
self::$dbc->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC); | |
} | |
} | |
private static function _log($str) { | |
$fp = fopen('/tmp/decronym.log', 'a'); | |
fprintf($fp, "[%s] [%s] %s\n", date('YmdHis'), self::$subreddit['subreddit_name'], $str); | |
fclose($fp); | |
} | |
} | |
Decronym::forward_inbox(); | |
$threads = Decronym::parse_comments(Decronym::read_comments_since_last_check()); | |
$comment_counts = Decronym::read_thread_info($threads); | |
foreach ($threads as $thread_id) { | |
sleep(1); | |
Decronym::post_or_edit($thread_id, $comment_counts[$thread_id]); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could you add CSAM to the acronym database? It's coming up a lot on Lemmy lately, and it's not obvious what it means. Thank you!!