Skip to content

Instantly share code, notes, and snippets.

@brucealdridge
Created May 13, 2011 09:02
Show Gist options
  • Save brucealdridge/970235 to your computer and use it in GitHub Desktop.
Save brucealdridge/970235 to your computer and use it in GitHub Desktop.
<?php
function doubleclick_adplanner() {
$data = file_get_contents('http://www.google.com/adplanner/static/top1000/');
preg_match_all('/href=["\']([^"\']+)?["\']target=["\']_blank/im',$data,$matches);
return implode("\n",$matches[1]);
}
function alexa_global($num = 500) {
static $topsites;
if($topsites) return implode("\n", array_slice($topsites,0,$num));
$topsites = array();
if (! file_put_contents('top-1m.csv.zip',
file_get_contents('http://s3.amazonaws.com/alexa-static/top-1m.csv.zip')
)
) {
throw new Exception('Unable to write file check permissions');
return;
}
$res = exec('gunzip -S .zip top-1m.csv.zip');
if (file_exists('top-1m.csv')) {
$data = file_get_contents('top-1m.csv');
$lines = explode("\n",$data);
foreach($lines as $line) {
$parts = explode(',',$line);
if (count($parts) == 2)
$topsites[$parts[0]] = 'http://'.$parts[1];
}
}else{
throw new Exception('Unable to extract zip or filename has changed');
}
unlink('top-1m.csv');
return implode("\n", array_slice($topsites,0,$num));
}
function quantcast($num = 500) {
static $topsites;
if($topsites) return implode("\n", array_slice($topsites,0,$num));
$topsites = array();
if (! file_put_contents('Quantcast-Top-Million.txt.zip',
file_get_contents('http://www.quantcast.com/quantcast-top-million.zip')
)
) {
throw new Exception('Unable to write file check permissions');
return;
}
exec('gunzip -S .zip Quantcast-Top-Million.txt.zip');
if (file_exists('Quantcast-Top-Million.txt')) {
$data = file_get_contents('Quantcast-Top-Million.txt');
$lines = explode("\n",$data);
foreach($lines as $line) {
preg_match('/(?:\d+)(?:\t)+?([^\s]+)/',$line,$matches);
if (count($matches))
$topsites[] = 'http://'.$matches[1];
}
}else{
throw new Exception('Unable to extract zip or filename has changed');
}
unlink('Quantcast-Top-Million.txt');
return implode("\n", array_slice($topsites,0,$num));
}
function alexa_us($num = 500) {
static $topsites;
if ($topsites) return implode("\n", array_slice($topsites,0,$num));
$topsites = array();
for ($i = 0; $i < 20; $i++)
{
$url = 'http://www.alexa.com/topsites/countries'.($i ? ';'.$i : '').'/US';
$data = file_get_contents($url);
preg_match_all('/topsites-label["\']\>([^<]+)?/im',$data,$matches);
$topsites = array_merge($topsites,$matches[1]);
}
foreach($topsites as &$site) {
$site = 'http://'.$site;
}
return implode("\n", array_slice($topsites,0,$num));
}
file_put_contents('lists/DoubleClick Ad Planner.txt',doubleclick_adplanner());
file_put_contents('lists/Alexa 500.txt',alexa_global(500));
file_put_contents('lists/Alexa Global 1000.txt',alexa_global(1000));
file_put_contents('lists/Alexa10k.txt',alexa_global(10000));
file_put_contents('lists/Alexa US 500.txt',alexa_us(500));
file_put_contents('lists/Alexa US 100.txt',alexa_us(100));
file_put_contents('lists/Quantcast10K.txt',quantcast(10000));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment