soaj1664/XSS Protection in 5 common contexts

## XSS Protection in 5 common contexts
<?php

    /**
     * XSS protection function for HTML context only
     * @usecases
     * <title>use this function if output reflects here or as a content of any HTML tag.</title>
     * e.g.,  <span>use this function if output reflects here</span>
     * e.g., <div>use this function if output reflects here</div>
     * @description
     * Sanitize/Filter < and > so that attacker can not leverage them for JavaScript execution.
     * @author Ashar Javed
     * @Link https://twitter.com/soaj1664ashar
     * @demo http://xssplaygroundforfunandlearn.netai.net/final.html
     */
    function htmlContextCleaner($input) {
        $bad_chars = array("<", ">");

        $safe_chars = array("&lt;", "&gt;");

        $output = str_replace($bad_chars, $safe_chars, $input);

        return stripslashes($output);
    }

    /**
     * XSS protection function for script context only. It does not support unquoted string literals.
     * @usecases
     * @double quoted case e.g.,
     * <script> var searchquery = "use this function if output reflects here"; </script>
     * @single quoted case e.g.,
     * <script> var searchquery = 'use this function if output reflects here'; </script>
     * @back-tick quoted case (ES6 Template Strings or Multi-line Strings) e.g.,
     * <script> var searchquery = `use this function if output reflects here`; </script>
     * see https://leanpub.com/understandinges6/read#leanpub-auto-template-strings for reference
     * @description
     * Sanitize/Filter meta or control characters that attacker may use to break the context e.g.,
     * "; confirm(1); " OR '; prompt(1); // OR </script><script>alert(1)</script> OR `;alert(1);`
     * \ and % are filtered because they may break the page e.g., \n or %0a
     * The attacker may use \ in case of double injection points in JavaScript string literal context.
     * % only cause syntax error.
     * & is sanitized because of complex or nested context (if in use)
     * File Descriptor (@filedescriptor) reported a ES6 based bypass making use of ES6 template string substitutions.
     * Thanks to him for heads up. The template string substitutions starts with opening ${ and by keeping this in
     * mind both characters are now converted into their harmless form.
     * @author Ashar Javed
     * @Link https://twitter.com/soaj1664ashar
     * @demo http://xssplaygroundforfunandlearn.netai.net/final.html
     */
    function scriptContextCleaner($input) {
        $bad_chars = array("\"", "<", "'", "`", "$", "{", "\\\\", "%", "&");

        $safe_chars = array("&quot;", "&lt;", "&apos;", "&grave;", "&dollar;", "&lbrace;", "&bsol;", "&percnt;", "&amp;");

        $output = str_replace($bad_chars, $safe_chars, $input);

        return stripslashes($output);
    }

    /**
     * XSS protection function for an attribute context only. It does not support unquoted attribute values.
     * Use quotes (either single or double) around attribute values.
     * @usecases
     * @double quoted case e.g.,
     * <div class="use this function if output reflects here">attribute context</div>
     * In above example class attribute have been used but it can be any like id or alt etc.
     * @single quoted case e.g.,
     * <input type='text' value='use this function if output reflects here'>
     * @description
     * Sanitize/Filter meta or control characters that attacker may use to break the context e.g.,
     * "onmouseover="alert(1) OR 'onfocus='confirm(1) OR ``onmouseover=prompt(1)
     * back-tick ` is filtered because old IE browsers treat it as a valid separator. The attacker may use
     * `` or `\` in order to break the quoted attribute value but for exploitation it needs innerHTML assignment.
     * Even in old IE compat view ` may be used as quoted attribute value. Credit to @garethheyes for finding and bypassing
     * attributeContextCleaner funtion's old implementation in old IE browsers.
     * @author Ashar Javed
     * @Link https://twitter.com/soaj1664ashar
     * @demo http://xssplaygroundforfunandlearn.netai.net/final.html
     */
    function attributeContextCleaner($input) {
        $bad_chars = array("\"", "'",  "`");

        $safe_chars = array("&quot;", "&apos;", "&grave;");

        $output = str_replace($bad_chars, $safe_chars, $input);

        return stripslashes($output);
    }

    /**
     * XSS protection function for style context only. It does not support unquoted style attribute value.
     * @usecases
     * @double quoted case e.g.,
     * <span style="use this function if output reflects here"></span>
     * @single quoted case e.g.,
     * <div style='use this function if output reflects here'></div>
     * OR <style>use this function if output reflects here</style>
     * @description
     * Sanitize/Filter meta or control characters that attacker may use to execute JavaScript e.g.,
     * ( is filtered because width:expression(alert(1))
     * & is filtered in order to stop decimal + hex + HTML5 entity encoding
     * < is filtered in case developers are using <style></style> tags instead of style attribute.
     * < is filtered because attacker may close the </style> tag and then execute JavaScript.
     * The function allows simple styles e.g., color:red, height:100px etc.
     * @author Ashar Javed
     * @Link https://twitter.com/soaj1664ashar
     * @demo http://xssplaygroundforfunandlearn.netai.net/final.html
     */
    function styleContextCleaner($input) {
        $bad_chars = array("\"", "'",  "(", "\\\\", "<", "&");

        $safe_chars = array("&quot;", "&apos;", "&lpar;", "&bsol;", "&lt;", "&amp;");

        $output = str_replace($bad_chars, $safe_chars, $input);

        return stripslashes($output);
    }

    /**
     * XSS protection function for URL context. Please use quoted (either single or double) attribute.
     * @usecases
     * <a href="use this function if output reflects here">click</a>
     * <img src='use this function if output reflects here'>
     * <iframe src="use this function if output reflects here">
     * @description
     * Only allows URLs that start with http(s) or ftp. e.g.,
     * https://www.google.com
     * Protection against JavaScript, VBScript and Data URI JavaScript code execution etc.
     * @author Ashar Javed
     * @Link https://twitter.com/soaj1664ashar
     * @demo http://xssplaygroundforfunandlearn.netai.net/final.html
     */
    function urlContextCleaner($url) {
        if(preg_match("#^(?:(?:https?|ftp):{1})\/\/[^\"\'\s\\\\]*.[^\"\'\s\\\\]*$#iu",(string)$url,$match))
        {
            return $match[0];
        }
        else {
            $noxss='javascript:void(0)';
            return $noxss;
        }
    }
	<?php

	/**
	* XSS protection function for HTML context only
	* @usecases
	* <title>use this function if output reflects here or as a content of any HTML tag.</title>
	* e.g., <span>use this function if output reflects here</span>
	* e.g., <div>use this function if output reflects here</div>
	* @description
	* Sanitize/Filter < and > so that attacker can not leverage them for JavaScript execution.
	* @author Ashar Javed
	* @Link https://twitter.com/soaj1664ashar
	* @demo http://xssplaygroundforfunandlearn.netai.net/final.html
	*/
	function htmlContextCleaner($input) {
	$bad_chars = array("<", ">");

	$safe_chars = array("<", ">");

	$output = str_replace($bad_chars, $safe_chars, $input);

	return stripslashes($output);
	}

	/**
	* XSS protection function for script context only. It does not support unquoted string literals.
	* @usecases
	* @double quoted case e.g.,
	* <script> var searchquery = "use this function if output reflects here"; </script>
	* @single quoted case e.g.,
	* <script> var searchquery = 'use this function if output reflects here'; </script>
	* @back-tick quoted case (ES6 Template Strings or Multi-line Strings) e.g.,
	* <script> var searchquery = `use this function if output reflects here`; </script>
	* see https://leanpub.com/understandinges6/read#leanpub-auto-template-strings for reference
	* @description
	* Sanitize/Filter meta or control characters that attacker may use to break the context e.g.,
	* "; confirm(1); " OR '; prompt(1); // OR </script><script>alert(1)</script> OR `;alert(1);`
	* \ and % are filtered because they may break the page e.g., \n or %0a
	* The attacker may use \ in case of double injection points in JavaScript string literal context.
	* % only cause syntax error.
	* & is sanitized because of complex or nested context (if in use)
	* File Descriptor (@filedescriptor) reported a ES6 based bypass making use of ES6 template string substitutions.
	* Thanks to him for heads up. The template string substitutions starts with opening ${ and by keeping this in
	* mind both characters are now converted into their harmless form.
	* @author Ashar Javed
	* @Link https://twitter.com/soaj1664ashar
	* @demo http://xssplaygroundforfunandlearn.netai.net/final.html
	*/
	function scriptContextCleaner($input) {
	$bad_chars = array("\"", "<", "'", "`", "$", "{", "\\\\", "%", "&");

	$safe_chars = array(""", "<", "'", "&grave;", "&dollar;", "{", "\", "&percnt;", "&");

	$output = str_replace($bad_chars, $safe_chars, $input);

	return stripslashes($output);
	}

	/**
	* XSS protection function for an attribute context only. It does not support unquoted attribute values.
	* Use quotes (either single or double) around attribute values.
	* @usecases
	* @double quoted case e.g.,
	* <div class="use this function if output reflects here">attribute context</div>
	* In above example class attribute have been used but it can be any like id or alt etc.
	* @single quoted case e.g.,
	* <input type='text' value='use this function if output reflects here'>
	* @description
	* Sanitize/Filter meta or control characters that attacker may use to break the context e.g.,
	* "onmouseover="alert(1) OR 'onfocus='confirm(1) OR ``onmouseover=prompt(1)
	* back-tick ` is filtered because old IE browsers treat it as a valid separator. The attacker may use
	* `` or `\` in order to break the quoted attribute value but for exploitation it needs innerHTML assignment.
	* Even in old IE compat view ` may be used as quoted attribute value. Credit to @garethheyes for finding and bypassing
	* attributeContextCleaner funtion's old implementation in old IE browsers.
	* @author Ashar Javed
	* @Link https://twitter.com/soaj1664ashar
	* @demo http://xssplaygroundforfunandlearn.netai.net/final.html
	*/
	function attributeContextCleaner($input) {
	$bad_chars = array("\"", "'", "`");

	$safe_chars = array(""", "'", "&grave;");

	$output = str_replace($bad_chars, $safe_chars, $input);

	return stripslashes($output);
	}

	/**
	* XSS protection function for style context only. It does not support unquoted style attribute value.
	* @usecases
	* @double quoted case e.g.,
	* <span style="use this function if output reflects here"></span>
	* @single quoted case e.g.,
	* <div style='use this function if output reflects here'></div>
	* OR <style>use this function if output reflects here</style>
	* @description
	* Sanitize/Filter meta or control characters that attacker may use to execute JavaScript e.g.,
	* ( is filtered because width:expression(alert(1))
	* & is filtered in order to stop decimal + hex + HTML5 entity encoding
	* < is filtered in case developers are using <style></style> tags instead of style attribute.
	* < is filtered because attacker may close the </style> tag and then execute JavaScript.
	* The function allows simple styles e.g., color:red, height:100px etc.
	* @author Ashar Javed
	* @Link https://twitter.com/soaj1664ashar
	* @demo http://xssplaygroundforfunandlearn.netai.net/final.html
	*/
	function styleContextCleaner($input) {
	$bad_chars = array("\"", "'", "(", "\\\\", "<", "&");

	$safe_chars = array(""", "'", "(", "\", "<", "&");

	$output = str_replace($bad_chars, $safe_chars, $input);

	return stripslashes($output);
	}

	/**
	* XSS protection function for URL context. Please use quoted (either single or double) attribute.
	* @usecases
	* <a href="use this function if output reflects here">click</a>
	* <img src='use this function if output reflects here'>
	* <iframe src="use this function if output reflects here">
	* @description
	* Only allows URLs that start with http(s) or ftp. e.g.,
	* https://www.google.com
	* Protection against JavaScript, VBScript and Data URI JavaScript code execution etc.
	* @author Ashar Javed
	* @Link https://twitter.com/soaj1664ashar
	* @demo http://xssplaygroundforfunandlearn.netai.net/final.html
	*/
	function urlContextCleaner($url) {
	if(preg_match("#^(?:(?:https?\|ftp):{1})\/\/[^\"\'\s\\\\].[^\"\'\s\\\\]$#iu",(string)$url,$match))
	{
	return $match[0];
	}
	else {
	$noxss='javascript:void(0)';
	return $noxss;
	}
	}