Skip to content

Instantly share code, notes, and snippets.

@elazar
Created May 27, 2022 13:15
Show Gist options
  • Save elazar/0911faf8fb72de9fb4f19645f848726d to your computer and use it in GitHub Desktop.
Save elazar/0911faf8fb72de9fb4f19645f848726d to your computer and use it in GitHub Desktop.
Twitter Follower Crawler
<?php
// https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api
define('TWITTER_BEARER_TOKEN', '...');
date_default_timezone_set('America/Chicago');
function sendRequest($path)
{
$context = stream_context_create([
'http' => [
'header' => 'Authorization: Bearer ' . TWITTER_BEARER_TOKEN,
],
]);
$url = 'https://api.twitter.com/2' . $path;
$response = @file_get_contents($url, false, $context);
$headers = [];
foreach (array_slice($http_response_header, 1) as $header) {
preg_match('/^([^:]+): (.+)$/', $header, $match);
$headers[$match[1]] = $match[2];
}
if ($headers['x-rate-limit-remaining'] === '0') {
$end = (int) $headers['x-rate-limit-reset'];
$wait = $end - time();
echo 'Waiting until ', date('h:i:s A', $end), PHP_EOL;
sleep($wait);
}
return json_decode($response);
}
function convertApiUserToDbUser(object $apiUser)
{
return (object) [
'id' => $apiUser->id,
'username' => $apiUser->username,
'followers_count' => $apiUser->public_metrics->followers_count,
];
}
function getUserByUsername($username)
{
$decoded = sendRequest("/users/by/username/$username?user.fields=public_metrics");
return convertApiUserToDbUser($decoded->data);
}
function getUserFollowers($userId)
{
$requests = 0;
do {
echo 'Sending request ', ++$requests, PHP_EOL;
$path = "/users/$userId/followers?max_results=1000&user.fields=public_metrics";
if (isset($nextToken)) {
$path .= "&pagination_token=$nextToken";
}
$decoded = sendRequest($path);
if (isset($decoded->errors)) {
echo 'Error: ', $decoded->errors[0]->detail, PHP_EOL;
$requests = 0;
$start = null;
break;
}
foreach ($decoded->data as $follower) {
yield convertApiUserToDbUser($follower);
}
$nextToken = $decoded->meta->next_token ?? null;
} while ($nextToken);
}
$pdo = new PDO('sqlite:' . __DIR__ . '/TwitterCrawler.sq3');
$pdo->exec('
CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY,
username TEXT UNIQUE,
followers_count INT
);
CREATE TABLE IF NOT EXISTS followers (
followee_user_id TEXT,
follower_user_id TEXT,
PRIMARY KEY (followee_user_id, follower_user_id)
);
');
$userStmt = $pdo->prepare('
REPLACE INTO users (
id,
username,
followers_count
)
VALUES (
:id,
:username,
:followers_count
)
');
$followerStmt = $pdo->prepare('
REPLACE INTO followers (
followee_user_id,
follower_user_id
)
VALUES (
:followee_user_id,
:follower_user_id
)
');
$usersStmt = $pdo->query('
SELECT
id,
username,
followers_count
FROM
users
WHERE
id IN (
SELECT
id
FROM (
SELECT
users.id,
COUNT(*) AS fetched_followers_count
FROM
users
LEFT JOIN
followers ON (users.id = followers.followee_user_id)
GROUP BY
users.id
HAVING
users.followers_count > fetched_followers_count
)
)
');
$user = getUserByUsername("truthlafayette");
$userStmt->execute((array) $user);
$usersStmt->execute();
$users = $usersStmt->fetchAll(PDO::FETCH_OBJ);
foreach ($users as $user) {
echo 'Processing user ', $user->username, PHP_EOL;
$userStmt->execute([
'id' => $user->id,
'username' => $user->username,
'followers_count' => $user->followers_count,
]);
foreach (getUserFollowers($user->id) as $follower) {
$userStmt->execute([
'id' => $follower->id,
'username' => $follower->username,
'followers_count' => $follower->followers_count,
]);
$followerStmt->execute([
'followee_user_id' => $user->id,
'follower_user_id' => $follower->id,
]);
}
}
<?php
$pdo = new PDO('sqlite:' . __DIR__ . '/TwitterCrawler.sq3');
$results = $pdo->query('
SELECT
users_followees.username AS followee,
users_followers.username AS follower
FROM
(SELECT
*
FROM
followers
WHERE
follower_user_id IN (
SELECT DISTINCT
followee_user_id
FROM
followers
)
) AS followers
JOIN
users AS users_followees ON (users_followees.id = followers.followee_user_id)
JOIN
users AS users_followers ON (users_followers.id = followers.follower_user_id)
');
$fp = fopen(__DIR__ . '/TwitterDot.dot', 'w');
fwrite($fp, "digraph {\n");
while ($result = $results->fetchObject()) {
fwrite($fp, "\"{$result->follower}\" -> \"{$result->followee}\"\n");
}
fwrite($fp, '}');
fclose($fp);
#!/bin/sh
dot -v -Goverlap=scale -Tpng TwitterDot.dot > TwitterDot.png
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment