Skip to content

Instantly share code, notes, and snippets.

@jamescridland
Last active November 29, 2022 02:29
Show Gist options
  • Save jamescridland/e982520faa5e5bc3cfcf926e768c3126 to your computer and use it in GitHub Desktop.
Save jamescridland/e982520faa5e5bc3cfcf926e768c3126 to your computer and use it in GitHub Desktop.
Get all OP3 podcast hits from a single day
<?php
// To get all podcast hits on a specific day
// Very rough PHP code from james@crid.land
if (empty($config['op3-bearer'])) {
// Grab a API key here: https://op3.dev/api/keys
// Get the bearer and add it in this line below
// For testing purposes, the preview bearer given here will also work
$config['op3-bearer']="preview07ce";
}
$datefrom="2022-11-25"; // this is the date we are going to grab from (at midnight)
$dateto=date_create($datefrom);
$dateto=date_add($dateto,date_interval_create_from_date_string("1 day")); // this calculates the end date: one day after (at midnight)
$dateto=date_format($dateto, "Y-m-d\TH:i:s");
$requests=1000; //we are going to grab 1,000 requests every time, to keep the number of requests low
// Here is the URL search we're doing. Podnews uses the "pg" - podcast guid - URL structure here
// Podnews, all episodes
$itemurl='https://op3.dev/e,pg=9b024349-ccf0-5f69-a609-6b82873eab3c/podnews.net/audio/podnews*';
// Alternatives for fun
// $itemurl='https://op3.dev/e/mp3s.nashownotes.com/PC20-111-*'; // Podcasting 2.0 podcast, episode 111, released 18 Nov 22
// $itemurl='https://op3.dev/e/chrt.fm/track/4EB79A/https://verifi.podscribe.com/rss/p/traffic.megaphone.fm/GLSS7428822213.mp3*'; // Practical Stoicism, episode S3/E26, released 25 Nov 22
// $itemurl='https://op3.dev/e/https://dts.podtrac.com/redirect.mp3/www.buzzsprout.com/231452/11757910-apple-s-hidden-project-to-help-podcasts-get-discovered.mp3'; // Buzzcast ep 90, released 25 Nov 22
//$itemurl='https://op3.dev/e/mp3s.nashownotes.com/NA-1506-*'; // NoAgenda, episode 1506, released 24 Nov 22. Caution: not recommended. VERY big: more than 384,000 hits in the one day. Will result in 384 API grabs, and probably exhausting all your memory.
$context = stream_context_create(array(
'http'=>array(
'method'=>"GET",
'ignore_errors'=>true,
'header'=>"User-Agent: PodnewsBot/1.0 https://podnews.net\r\n"
)
)
); //Set a useragent, but also mark errors to be ignored so that we can grab the error messsages
$data=array(); //this is where all the data will go.
$fetch=true;
$fetchdate=$datefrom;
while($fetch) {
$op3url="https://op3.dev/api/1/redirect-logs?startAfter=".substr(date_format(date_create($fetchdate), "Y-m-d\TH:i:s.u"),0,23).'Z'."&end=".substr(date_format(date_create($dateto), "Y-m-d\TH:i:s.u"),0,23).'Z'."&format=json&url=".$itemurl."&limit=".$requests."&token=".$config['op3-bearer'];
// uncomment the below to see every call this makes to OP3. It should be one every $request hits.
// echo '<hr>Calling URL: '.$op3url;
$op3data=json_decode(file_get_contents($op3url,false,$context),TRUE);
if (isset($op3data['message'])){
echo $op3data['message']; exit;
}
$data=array_merge($data,$op3data['rows']); // add to $data all the data we've just got.
if(!empty($op3data['rows'][$requests-1]['time'])) {
// we asked for a total of $requests and we have them. So, reset the date to the latest one for the query, and continue stepping through
// note: for a *very* heavy podcast, there is a slight possibility that multiple hits happened on the same microsecond. This could
// skip those hits, but only at this exact moment when making multiple calls.
// Possibility for skipping one or more downloads in this way is 1 in 86,400,000 per thousand hits.
$fetchdate=$op3data['rows'][$requests-1]['time'];
} else {
// we didn't get the full number of $requests and so we're done. Don't fetch any more
$fetch=false;
}
}
echo '<h1>OP3 stats</h1>';
echo 'For: '.$itemurl.'<br>';
echo 'On: '.date_format(date_create($datefrom), "Y-m-d\TH:i:s.u");
echo "<h2>Totals</h2>";
echo number_format(count($data),0).' hits';
echo number_format(count(array_unique(array_column($data,'hashedIpAddress')))).' unique IPs';
echo "<h2>Continents</h2>";
$continentcodes=array("AF"=>"Africa","NA"=>"North America","OC"=>"Oceania","AN"=>"Antarctica","AS"=>"Asia","EU"=>"Europe","SA"=>"South America");
$continents=array_count_values(array_column($data,'continent')); // count all the 'continent' values
arsort($continents); // sort so biggest is top
foreach($continents AS $continentcode=>$continentcount) {
echo $continentcodes[$continentcode].': '.$continentcount.'<br>';
}
echo "<h2>Countries</h2>";
$countries=array_count_values(array_column($data,'country'));
arsort($countries);
print_r($countries);
echo "<h2>Timezones</h2>";
$timezones=array_count_values(array_column($data,'timezone'));
arsort($timezones);
print_r($timezones);
echo "<h2>Referers</h2>";
$referers=array_count_values(array_column($data,'referer'));
arsort($referers);
print_r($referers);
echo '<h2>Total data</h2>';
echo '<pre>';
print_r($data);
echo '</pre>';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment