Skip to content

Instantly share code, notes, and snippets.

@kiyoto
Created March 28, 2012 06:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kiyoto/2224166 to your computer and use it in GitHub Desktop.
Save kiyoto/2224166 to your computer and use it in GitHub Desktop.
github scraper. get user info for the users watching a particular repo
#!/usr/bin/env php
<?php
function get_watchers($login, $repo) {
$per_page = 100; // this is the max set by github http://developer.github.com/v3/#pagination
$page_number = 1;
$watchers = array();
while (1) {
$ch = curl_init(sprintf('https://api.github.com/repos/%s/%s/watchers?page=%d&per_page=%d',
$login, $repo, $page_number, $per_page));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$new_watchers = json_decode(curl_exec($ch), true);
curl_close($ch);
$watchers = array_merge($watchers, $new_watchers);
if (count($new_watchers) < $per_page || $page_number > 15) {
return $watchers;
}
$page_number++;
}
}
function get_user($login) {
$ch = curl_init('https://api.github.com/users/'.$login);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$json = json_decode(curl_exec($ch), true);
fprintf(STDERR, "$login processed!\n");
curl_close($ch);
return $json;
}
function get_users($users) {
$user_data = array();
foreach ($users as $user) {
$login = $user['login'];
$company = $user['company'];
$user_data["$company:$login"] = get_user($login);
}
ksort($user_data);
return $user_data;
}
function output_data($users) {
$buf = array();
foreach ($users as $user) {
$user['company'] = str_replace(',', ' ', $user['company']);
$buf[] = "{$user['company']}, {$user['login']}, {$user['followers']}, {$user['email']}, {$user['blog']}";
}
echo implode("\n", $buf);
}
global $argc;
global $argv;
$options = getopt('h');
if (isset($options['h']) || $argc < 2) {
echo <<<HELP
Usage:
You need PHP 5.3 or above with curl enabled.
$ php github-scraper.php <login name>:<repo name>
e.g.
$ php github-scraper fluent:fluentd
HELP;
exit(1);
}
list($login, $repo) = explode(':', $argv[1]);
output_data(get_users(get_watchers($login, $repo)));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment