Created
March 22, 2014 04:14
-
-
Save anonymous/9700974 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><body><pre><?php | |
// URL that generated this code: | |
// http://txt2re.com/index-php.php3?s=http://www.twitch.tv/backlogathon/b/512356452&-20&6 | |
// modifications by KK with a little help from Dogfish Head 90 Minute IPA | |
// this is the string we want to parse, i.e. the twitch URL in question | |
$txt='http://www.twitch.tv/backlogathon/b/512356452'; | |
// lets define a bunch of mini-regexes and then string them all into one big one later | |
$re1=''; # not sure why this is required but it breaks if you remove it! | |
$re2='https?'; # says "https, with the last character optional", so http or https | |
$re3=':\/\/'; # string "://" but in regex you have to escape your "/" which turns it into "\/" | |
$re4='www'; # string "www" | |
$re5='\.'; # string "." but . has to be escaped also, hence "\." | |
$re6='twitch'; # string "twitch" | |
$re7='\.'; # string "." | |
$re8='tv'; # string "tv" | |
$re9='\/'; # string "/" | |
$re10='((?:[a-z][a-z]+))'; # any string value of any length here, store as variable 1* | |
$re12='(\/b\/)'; # string "/b/", store as variable 2* | |
$re14='(\\d+)'; # any integer value here, store as variable 3* | |
// * in regex, any matches to a pattern enclosed in () are stored in the results array | |
// if you turned $re8 up there into "(tv)" instead of just "tv" you'd see it return | |
// as the first result in the array. | |
// show the final regex string | |
echo $re1.$re2.$re3.$re4.$re5.$re6.$re7.$re8.$re9.$re10.$re12.$re14; | |
// a nice visual separation from the result set | |
echo "<hr />"; | |
// try the regex against the URL. store the results in an array called $matches and dump it to screen. | |
// first position in the $matches array is the string we ran the pattern against, our URL. | |
// all subsequent items in the array are the results of our pattern matches, i.e. those wrapped in () above. | |
if ($c=preg_match_all ("/".$re1.$re2.$re3.$re4.$re5.$re6.$re7.$re8.$re9.$re10.$re12.$re14."/is", $txt, $matches)) | |
{ | |
print_r($matches); | |
// these should also work if you want to do more than just dump the array: | |
// $twitch_username = $matches[1][0]; //column 1, row 0 (arrays are 0 based) | |
// $twitch_archived_broadcast_id = $matches[3][0]; | |
} | |
//these are the original parameters from the URL at the top of the file | |
//as originally generated by txt2re | |
//we store them simply for backup purposes, in case anything blows up | |
//but they're also educational. helpful to see what it turned into | |
//all but $re10 and $re14 were modified | |
//in all cases, these were modified from "very generic" to "very specfic" | |
//$re11 and $re13 were removed completely because the "/"s that proceed and | |
//follows the "b" in the URL were moved into $re12. I did this because I want | |
//it to return in the $matches array as "/b/" rather than just "b". | |
//$re1='.*?'; # Non-greedy match on filler | |
//$re2='(?:[a-z][a-z]+)'; # Uninteresting: word http | |
//$re3='.*?'; # Non-greedy match on filler | |
//$re4='(?:[a-z][a-z]+)'; # Uninteresting: word www | |
//$re5='.*?'; # Non-greedy match on filler | |
//$re6='(?:[a-z][a-z]+)'; # Uninteresting: word twitch | |
//$re7='.*?'; # Non-greedy match on filler | |
//$re8='(?:[a-z][a-z]+)'; # Uninteresting: word tv | |
//$re9='.*?'; # Non-greedy match on filler | |
//$re11='.*?'; # Non-greedy match on filler | |
//$re12='(b)'; # Any Single Word Character (Not Whitespace) 1 | |
//$re13='.*?'; # Non-greedy match on filler | |
?></pre></body></html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment