Created
March 17, 2017 23:21
-
-
Save hinchley/05c2872620b5618389d130107e084d6b to your computer and use it in GitHub Desktop.
Archive your twitter timeline into a sqlite database using PHP.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# define api keys. | |
define('CONSUMER_KEY', 'XXX'); | |
define('CONSUMER_SECRET', 'XXX'); | |
define('OAUTH_TOKEN', 'XXX'); | |
define('OAUTH_SECRET', 'XXX'); | |
define('USER_TIMELINE', 'https://api.twitter.com/1.1/statuses/user_timeline.json'); | |
# tweets to retrieve per request. | |
# max value is 200. | |
define('PAGESIZE', 200); | |
# establish db connection. | |
$db = new PDO('sqlite:./db.sqlite3'); | |
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | |
# create the db table if it doesn't exist. | |
$sql = <<<sql | |
CREATE TABLE IF NOT EXISTS `tweets` ( | |
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, | |
`guid` TEXT NOT NULL, | |
`json` TEXT NOT NULL, | |
`text` TEXT NOT NULL, | |
`isreply` BOOLEAN NOT NULL, | |
`replyguid` TEXT, | |
`replyuser` TEXT, | |
`isretweet` BOOLEAN NOT NULL, | |
`retweetguid` TEXT, | |
`retweetuser` TEXT, | |
`timestamp` TEXT NOT NULL | |
); | |
sql; | |
$db->exec($sql); | |
# recursively sort an array. | |
function rksort(&$a) { | |
foreach ($a as &$v) if (is_array($v)) rksort($v); | |
return ksort($a); | |
} | |
# get the highest tweet id. | |
function since($db) { | |
$q = 'SELECT guid FROM tweets ORDER BY guid + 0 DESC LIMIT 1'; | |
return $db->query($q)->fetchcolumn() ?: null; | |
} | |
# prepare query string. | |
function query($max, $since = null) { | |
$query = [ | |
'count' => PAGESIZE, | |
'tweet_mode' => 'extended', | |
'trim_user' => 'true', | |
'include_rts' => 'true', | |
'exclude_replies' => 'false' | |
]; | |
if ($max !== null) { $query['max_id'] = $max; } | |
if ($since !== null) { $query['since_id'] = $since; } | |
return array_map('rawurlencode', $query); | |
} | |
# prepare oauth request. | |
function auth($query) { | |
$oauth = [ | |
'oauth_consumer_key' => CONSUMER_KEY, | |
'oauth_nonce' => md5(mt_rand()), | |
'oauth_signature_method' => 'HMAC-SHA1', | |
'oauth_timestamp' => time(), | |
'oauth_token' => OAUTH_TOKEN, | |
'oauth_version' => '1.0']; | |
$oauth = array_map('rawurlencode', $oauth); | |
$array = array_merge($oauth, $query); | |
asort($array); | |
ksort($array); | |
$q = urldecode(http_build_query($array, '', '&')); | |
$req = 'GET&' . rawurlencode(USER_TIMELINE) . '&' . rawurlencode($q); | |
$key = rawurlencode(CONSUMER_SECRET) . '&' . rawurlencode(OAUTH_SECRET); | |
$sig = rawurlencode(base64_encode(hash_hmac('sha1', $req, $key, true))); | |
$oauth['oauth_signature'] = $sig; | |
ksort($oauth); | |
$oauth = array_map(function($s) { | |
return '"' . $s . '"'; | |
}, $oauth); | |
return 'OAuth ' . urldecode(http_build_query($oauth, '', ', ')); | |
} | |
# request block of tweets. | |
function tweets($max, $since) { | |
$query = query($max, $since); | |
$token = auth($query); | |
$url = USER_TIMELINE . '?' . http_build_query($query); | |
$options = [ | |
CURLOPT_HTTPHEADER => ["Authorization: $token"], | |
CURLOPT_HEADER => false, | |
CURLOPT_URL => $url, | |
CURLOPT_RETURNTRANSFER => true, | |
CURLOPT_SSL_VERIFYPEER => false]; | |
$feed = curl_init(); | |
curl_setopt_array($feed, $options); | |
$json = curl_exec($feed); | |
if (curl_error($feed)) echo 'Error: ' . curl_error($feed); | |
curl_close($feed); | |
return json_decode($json, true); | |
} | |
# request all available tweets. | |
function archive($since, $max = null) { | |
$archive = []; do { | |
$tweets = tweets($max, $since); | |
if (count($tweets) == 0) break; | |
$archive = array_merge($archive, $tweets); | |
$max = end($tweets)['id_str'] - 1; | |
} while (true); | |
return $archive; | |
} | |
# sort tweets from oldest to newest. | |
$tweets = array_reverse(archive(since($db))); | |
# sort properties of each tweet. | |
rksort($tweets); | |
# insert tweets into database. | |
foreach ($tweets as $tweet) { | |
$q = 'INSERT INTO tweets ' . | |
'(guid, json, text, isreply, replyguid, replyuser, ' . | |
'isretweet, retweetguid, retweetuser, timestamp) ' . | |
'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'; | |
$s = $db->prepare($q); | |
$s->execute([ | |
$tweet['id_str'], | |
serialize($tweet), | |
$tweet['full_text'], | |
$tweet['in_reply_to_status_id_str'] !== null, | |
$tweet['in_reply_to_status_id_str'], | |
$tweet['in_reply_to_user_id_str'], | |
$retweeted = $tweet['retweeted'], | |
$retweeted ? $tweet['retweeted_status']['id_str'] : null, | |
$retweeted ? $tweet['retweeted_status']['user']['id_str'] : null, | |
$tweet['created_at']]); | |
} | |
echo $db->query('select count(*) from tweets')->fetchColumn(); |
The code runs against the Twitter account defined by the API keys. Try this link to get started: https://dev.twitter.com/apps/new
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello friend I would like to know where I put the id of twitter to show please thanks for the reply