Skip to content

Instantly share code, notes, and snippets.

@jdecool
Created June 2, 2016 16:00
Show Gist options
  • Save jdecool/c4a0cccadcc2b686bb547ddc08db49c9 to your computer and use it in GitHub Desktop.
Save jdecool/c4a0cccadcc2b686bb547ddc08db49c9 to your computer and use it in GitHub Desktop.
Download Github archives
#!/usr/bin/env php
<?php
// URL => https://www.githubarchive.org/
// wget http://data.githubarchive.org/2015-01-{01..30}-{0..23}.json.gz
$dayMonth = [
1 => 31,
2 => 30,
3 => 31,
4 => 30,
5 => 31,
6 => 30,
7 => 31,
8 => 31,
9 => 30,
10 => 31,
11 => 30,
12 => 31,
];
// 2011
for ($day = 2; $day <= 31; $day++) {
for ($hour = 0; $hour <= 23; $hour++) {
$command = sprintf('wget http://data.githubarchive.org/2011-12-%1$02d-%2$d.json.gz -O 2011/2011-12-%1$02d-%2$02d.json.gz', $day, $hour);
shell_exec($command);
echo $command, PHP_EOL;
}
}
// 2012 -> 2015
for ($year = 2012; $year <= 2015; $year++) {
for ($month = 1; $month <= 12; $month++){
$folder = sprintf('%d/%02d', $year, $month);
if (!file_exists($folder)) {
mkdir($folder, 0777, true);
}
for ($day = 1; $day <= $dayMonth[$month]; $day++) {
for ($hour = 0; $hour <= 23; $hour++) {
$command = sprintf('wget http://data.githubarchive.org/%1$04d-%2$02d-%3$02d-%4$d.json.gz -O %1$04d/%2$02d/%1$04d-%2$02d-%3$02d-%4$02d.json.gz', $year, $month, $day, $hour);
shell_exec($command);
echo $command, PHP_EOL;
}
}
}
}
// 2016
$year = 2016;
for ($month = 1; $month <= date('m'); $month++){
$folder = sprintf('%d/%02d', $year, $month);
if (!file_exists($folder)) {
mkdir($folder, 0777, true);
}
for ($day = 1; $day <= 31; $day++) {
for ($hour = 0; $hour <= 23; $hour++) {
$command = sprintf('wget http://data.githubarchive.org/%1$04d-%2$02d-%3$02d-%4$d.json.gz -O %1$04d/%2$02d/%1$04d-%2$02d-%3$02d-%4$02d.json.gz', $year, $month, $day, $hour);
shell_exec($command);
echo $command, PHP_EOL;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment