Skip to content

Instantly share code, notes, and snippets.

@bisko
Created January 16, 2016 12:42
Show Gist options
  • Save bisko/2027f6a94972fd525023 to your computer and use it in GitHub Desktop.
Save bisko/2027f6a94972fd525023 to your computer and use it in GitHub Desktop.
Detect real URL behind an audio stream.
<?php
/**
* The idea behind this is that I had to check several hundred streams
* and validate their URLs if they contain valid stream or not.
* If they didn't - fix it.
*
* It is best to automate such things, so the code below does that.
* It's basic - detects mostly shoutcast/icecast streams and .as[fx], .m3u8? playlists
*/
function get_valid_stream_url($streamURL) {
$ctx = stream_context_create(array('http'=>
array(
'timeout' => 10, //1200 Seconds is 20 Minutes
)
));
// detect m3u
if (preg_match('!\.m3u8?!', $streamURL)) {
$url_streams = parsem3u(file_get_contents($streamURL,false, $ctx));
}
// detect asx
elseif (preg_match('!\.as[fx]$!', $streamURL)) {
$url_streams = parseasx(file_get_contents($streamURL,false, $ctx));
}
else {
$url_streams = array($streamURL);
}
$url_streams = array_unique($url_streams);
foreach($url_streams as $stream) {
$parts = parse_url($stream);
$result = detectStream($parts['host'], $parts['port'], $parts['path']);
if (!empty($result)) {
switch ($result) {
case 'shoutcast':
$stream = preg_replace('!;.*$!uis', '', $stream);
return $stream.';1';
case 'icecast':
case 'other':
return $stream;
}
}
else {
continue;
}
}
return reset($url_streams);
}
function detectStream($host, $port = 80, $path) {
if (!$port) {
$port = 80;
}
/* Get the IP address for the target host. */
$address = gethostbyname($host);
$socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
socket_set_option($socket, SOL_SOCKET, SO_RCVTIMEO, array("sec" => 5, "usec" => 0));
if ($socket === false) {
return '';
}
$socket = fsockopen($host, $port, $errno, $err, 5);
if ($socket === false) {
return false;
}
$in = "GET ".$path." HTTP/1.1\r\n";
$in .= "Connection: Close\r\n\r\n";
$out = '';
fwrite($socket, $in, strlen($in));
// get headers
while (!feof($socket)){
$line = fgets($socket, 4096);
if('' == trim($line)){
break;
}
$theaders .= $line;
}
$headers = array();
$theaders = preg_split('![\r\n]+!', $theaders);
foreach ($theaders as $header){
$t = explode(':', $header);
if (isset($t[0]) && trim($t[0]) != ''){
$name = preg_replace('/[^a-z][^-a-z0-9]*/i','', strtolower(trim($t[0])));
array_shift($t);
$value = trim(implode(':', $t));
if ($value != ''){
if (is_numeric($value)){
$headers[$name] = (int)$value;
}else{
$headers[$name] = $value;
}
}
}
}
if (isset($headers['server']) && preg_match('!(icecast|MediaGateway)!uis',$headers['server'])) {
return 'icecast';
}
// detect icecast
foreach(array_keys($headers) as $header_name) {
if (preg_match('!^icy!uis',$header_name)) {
return 'shoutcast';
}
}
return 'other';
}
function parsem3u($file_content) {
// should contain only urls
if (!preg_match_all('!^(http://[^\n]+)!uim',$file_content, $matches)) {
return array();
}
if (empty($matches[1])) {
return array();
}
return array_unique($matches[1]);
}
function parseasx($file_content) {
$file_content = trim($file_content);
if (empty($file_content)) {
return array();
}
$file_content = mb_strtolower($file_content);
$file_content = preg_replace('@&(?!amp;)@uis', '&amp;', $file_content);
$possible_urls = array();
$doc = new DOMDocument();
$doc->loadXML($file_content);
$xpath = new DOMXpath($doc);
$elements = $xpath->query("//ref");
if (!is_null($elements)) {
foreach ($elements as $element) {
$url = $element->getAttribute('href');
if (!empty($url)) {
$possible_urls[] = $url;
}
}
}
if (empty($possible_urls)) {
echo $doc->saveXML();
var_dump('Nothing found here :( ');
echo $file_content;
return array();
}
return $possible_urls;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment