Created
January 16, 2016 12:42
-
-
Save bisko/2027f6a94972fd525023 to your computer and use it in GitHub Desktop.
Detect real URL behind an audio stream.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* The idea behind this is that I had to check several hundred streams | |
* and validate their URLs if they contain valid stream or not. | |
* If they didn't - fix it. | |
* | |
* It is best to automate such things, so the code below does that. | |
* It's basic - detects mostly shoutcast/icecast streams and .as[fx], .m3u8? playlists | |
*/ | |
function get_valid_stream_url($streamURL) { | |
$ctx = stream_context_create(array('http'=> | |
array( | |
'timeout' => 10, //1200 Seconds is 20 Minutes | |
) | |
)); | |
// detect m3u | |
if (preg_match('!\.m3u8?!', $streamURL)) { | |
$url_streams = parsem3u(file_get_contents($streamURL,false, $ctx)); | |
} | |
// detect asx | |
elseif (preg_match('!\.as[fx]$!', $streamURL)) { | |
$url_streams = parseasx(file_get_contents($streamURL,false, $ctx)); | |
} | |
else { | |
$url_streams = array($streamURL); | |
} | |
$url_streams = array_unique($url_streams); | |
foreach($url_streams as $stream) { | |
$parts = parse_url($stream); | |
$result = detectStream($parts['host'], $parts['port'], $parts['path']); | |
if (!empty($result)) { | |
switch ($result) { | |
case 'shoutcast': | |
$stream = preg_replace('!;.*$!uis', '', $stream); | |
return $stream.';1'; | |
case 'icecast': | |
case 'other': | |
return $stream; | |
} | |
} | |
else { | |
continue; | |
} | |
} | |
return reset($url_streams); | |
} | |
function detectStream($host, $port = 80, $path) { | |
if (!$port) { | |
$port = 80; | |
} | |
/* Get the IP address for the target host. */ | |
$address = gethostbyname($host); | |
$socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP); | |
socket_set_option($socket, SOL_SOCKET, SO_RCVTIMEO, array("sec" => 5, "usec" => 0)); | |
if ($socket === false) { | |
return ''; | |
} | |
$socket = fsockopen($host, $port, $errno, $err, 5); | |
if ($socket === false) { | |
return false; | |
} | |
$in = "GET ".$path." HTTP/1.1\r\n"; | |
$in .= "Connection: Close\r\n\r\n"; | |
$out = ''; | |
fwrite($socket, $in, strlen($in)); | |
// get headers | |
while (!feof($socket)){ | |
$line = fgets($socket, 4096); | |
if('' == trim($line)){ | |
break; | |
} | |
$theaders .= $line; | |
} | |
$headers = array(); | |
$theaders = preg_split('![\r\n]+!', $theaders); | |
foreach ($theaders as $header){ | |
$t = explode(':', $header); | |
if (isset($t[0]) && trim($t[0]) != ''){ | |
$name = preg_replace('/[^a-z][^-a-z0-9]*/i','', strtolower(trim($t[0]))); | |
array_shift($t); | |
$value = trim(implode(':', $t)); | |
if ($value != ''){ | |
if (is_numeric($value)){ | |
$headers[$name] = (int)$value; | |
}else{ | |
$headers[$name] = $value; | |
} | |
} | |
} | |
} | |
if (isset($headers['server']) && preg_match('!(icecast|MediaGateway)!uis',$headers['server'])) { | |
return 'icecast'; | |
} | |
// detect icecast | |
foreach(array_keys($headers) as $header_name) { | |
if (preg_match('!^icy!uis',$header_name)) { | |
return 'shoutcast'; | |
} | |
} | |
return 'other'; | |
} | |
function parsem3u($file_content) { | |
// should contain only urls | |
if (!preg_match_all('!^(http://[^\n]+)!uim',$file_content, $matches)) { | |
return array(); | |
} | |
if (empty($matches[1])) { | |
return array(); | |
} | |
return array_unique($matches[1]); | |
} | |
function parseasx($file_content) { | |
$file_content = trim($file_content); | |
if (empty($file_content)) { | |
return array(); | |
} | |
$file_content = mb_strtolower($file_content); | |
$file_content = preg_replace('@&(?!amp;)@uis', '&', $file_content); | |
$possible_urls = array(); | |
$doc = new DOMDocument(); | |
$doc->loadXML($file_content); | |
$xpath = new DOMXpath($doc); | |
$elements = $xpath->query("//ref"); | |
if (!is_null($elements)) { | |
foreach ($elements as $element) { | |
$url = $element->getAttribute('href'); | |
if (!empty($url)) { | |
$possible_urls[] = $url; | |
} | |
} | |
} | |
if (empty($possible_urls)) { | |
echo $doc->saveXML(); | |
var_dump('Nothing found here :( '); | |
echo $file_content; | |
return array(); | |
} | |
return $possible_urls; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment