Skip to content

Instantly share code, notes, and snippets.

@Jekis
Created January 19, 2020 16:08
Show Gist options
  • Save Jekis/14c4cab60e87049c1c3b671381663ce3 to your computer and use it in GitHub Desktop.
Save Jekis/14c4cab60e87049c1c3b671381663ce3 to your computer and use it in GitHub Desktop.
Finds js code in the <script> tag.
<?php
/**
* This is a regex builder.
* Final regular expression finds the js code in the <script> tag.
* It is complex because any js code will be found.
*/
$jsCodeGroups = [];
// Match js comment.
$jsCodeGroups['comment-1'] = [
'name' => 'c1',
'exp' => '//[^\n]*\n',
'match_chars' => '// Foo comment here',
];
// Match js comment.
$jsCodeGroups['comment-2'] = [
'name' => 'c2',
'exp' => '/\*.*?\*/',
'match_chars' => '/* Foo comment here */',
];
// Match any character except single quote and double quote.
$jsCodeGroups['not-a-quote'] = [
'name' => 'notq',
'exp' => '[^\'"]', // In regexp it must look like text: [^'"]
'match_chars' => 'var a = 1; alert(1);',
];
// Match any js string wrapped in single quotes.
$jsCodeGroups['single-quotes-wrap'] = [
'name' => 'sqw',
'exp' => "'(?:\\\\'|[^'])*'", // In regexp it must look like text: '(?:\\'|[^'])*'
'match_chars' => "'I\'m a foo string'",
];
// Match any js string wrapped in double quotes.
$jsCodeGroups['double-quotes-wrap'] = [
'name' => 'dqw',
'exp' => '"(?:\\\\"|[^"])*"', // In regexp it must look like text: "(?:\\"|[^"])*"
'match_chars' => '"Another \"foo string\""',
];
// Match any new line.
$jsCodeGroups['new-line'] = [
'name' => 'nl',
'exp' => '\n', // In regexp it must look like text: \n
'match_chars' => "\n",
];
// Generate regular expression from all groups.
$jsCodeGroupsRegexps = [];
foreach ($jsCodeGroups as $gData) {
if (!empty($gData['name'])) {
$jsCodeGroupsRegexps[] = sprintf('(?<%s>%s)', $gData['name'], $gData['exp']);
} else {
$jsCodeGroupsRegexps[] = sprintf('(%s)', $gData['exp']);
}
}
$jsCodeGroupsRegexp = join('|', $jsCodeGroupsRegexps);
// And the final regular expression.
// TODO: <script> tag could have attributes.
$finalRegExp = sprintf('~<script>(?<js>(%s)*?)?</script>~si', $jsCodeGroupsRegexp);
// <script>(?<js>((?<c1>//[^\n]*\n)|(?<c2>/\*.*?\*/)|(?<notq>[^'"])|(?<sqw>'(?:\\'|[^'])*')|(?<dqw>"(?:\\"|[^"])*")|(?<nl>\n))*?)?</script>
$html = <<<HTML
<script>
/*start<!--*/
var num = 1;
var singleQuote = 'foo <script> \'foo\' </script> foo';
var doubleQuote = "foo <script> \"foo\" </script> foo";
// foo <script> foo </script> foo
/* foo <script> foo </script> foo */
/*
foo <script> foo </script> foo
*/
alert(num);
/*-->end*/
</script>
HTML;
printf("RegExp: %s\n", $finalRegExp);
if (preg_match($finalRegExp, $html, $matches)) {
if (isset($matches['js']) && strpos($matches['js'], 'start') && strpos($matches['js'], 'end')) {
echo 'Correct RegExp: js code found!';
} else {
echo 'Wrong RegExp: found js code is incorrect.';
}
} else {
echo 'Wrong RegExp: Nothing matched';
}
exit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment