Profile a modified version of wp_kses_bad_protocol
to see if we can speed it up.
This is used to benchmark the patch introduced for Trac ticket #22951.
Results of the benchmark are found in a shared Google Sheet.
Profile a modified version of wp_kses_bad_protocol
to see if we can speed it up.
This is used to benchmark the patch introduced for Trac ticket #22951.
Results of the benchmark are found in a shared Google Sheet.
<?php | |
// Bootstrapping. | |
// Provide needed constants. | |
define( 'ABSPATH', __DIR__ ); | |
define( 'WPINC', '/wp-includes' ); | |
// Require needed files. | |
require_once ABSPATH . WPINC . '/functions.php'; | |
require_once ABSPATH . WPINC . '/formatting.php'; | |
require_once ABSPATH . WPINC . '/kses.php'; | |
// Mock needed functions. | |
function apply_filters( $filter, $value ) { | |
return $value; | |
} | |
function did_action( $hook_name ) { | |
global $wp_actions; | |
if ( ! isset( $wp_actions[ $hook_name ] ) ) { | |
return 0; | |
} | |
return $wp_actions[ $hook_name ]; | |
} | |
// Setup. | |
$urls = [ | |
// Good. | |
'http://example.org/', | |
'HTTP://example.org/', | |
'http://subsite.example.org/', | |
'http://example.org/', | |
'http://example.org/', | |
'https://example.org', | |
'http://example.org/wp-admin/post.php?post=2&action=edit', | |
'http://example.org/index.php?test='blah'', | |
'https://example.org/wp-admin/post.php?post=2&action=edit', | |
'http://subsite.example.org/wp-admin/post.php?post=2&action=edit', | |
// Bad. | |
// 'dummy:alert(1)', | |
// 'JaVaScRiPt:alert(1)', | |
// 'javascript:alert(1);', | |
// 'javascript:alert('XSS')', | |
// 'jav ascript:alert(1);', | |
// 'jav	ascript:alert(1);', | |
// '  javascript:alert(1);', | |
// 'javascript:javascript:alert(1);', | |
// 'javascript:alert(1)//?:', | |
// 'feed:javascript:feed:javascript:feed:javascript:alert(1)', | |
]; | |
$repetitions = 10; | |
$iterations = 1000; | |
$stringcount = count( $urls ); | |
function microtime_float() { | |
list( $usec, $sec ) = explode( ' ', microtime() ); | |
return ( (float) $usec + (float) $sec ); | |
} | |
// Refactoring functions. | |
function new_esc_url( $url, $protocols = null, $_context = 'display' ) { | |
$original_url = $url; | |
if ( '' === $url ) { | |
return $url; | |
} | |
$url = str_replace( ' ', '%20', ltrim( $url ) ); | |
$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url ); | |
if ( '' === $url ) { | |
return $url; | |
} | |
if ( 0 !== stripos( $url, 'mailto:' ) ) { | |
$strip = array( '%0d', '%0a', '%0D', '%0A' ); | |
$url = _deep_replace( $strip, $url ); | |
} | |
$url = str_replace( ';//', '://', $url ); | |
/* | |
* If the URL doesn't appear to contain a scheme, we presume | |
* it needs http:// prepended (unless it's a relative link | |
* starting with /, # or ?, or a PHP file). | |
*/ | |
if ( strpos( $url, ':' ) === false && ! in_array( $url[0], array( '/', '#', '?' ), true ) && | |
! preg_match( '/^[a-z0-9-]+?\.php/i', $url ) ) { | |
$url = 'http://' . $url; | |
} | |
// Replace ampersands and single quotes only when displaying. | |
if ( 'display' === $_context ) { | |
$url = wp_kses_normalize_entities( $url ); | |
$url = str_replace( '&', '&', $url ); | |
$url = str_replace( "'", ''', $url ); | |
} | |
if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, ']' ) ) ) { | |
$parsed = wp_parse_url( $url ); | |
$front = ''; | |
if ( isset( $parsed['scheme'] ) ) { | |
$front .= $parsed['scheme'] . '://'; | |
} elseif ( '/' === $url[0] ) { | |
$front .= '//'; | |
} | |
if ( isset( $parsed['user'] ) ) { | |
$front .= $parsed['user']; | |
} | |
if ( isset( $parsed['pass'] ) ) { | |
$front .= ':' . $parsed['pass']; | |
} | |
if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) { | |
$front .= '@'; | |
} | |
if ( isset( $parsed['host'] ) ) { | |
$front .= $parsed['host']; | |
} | |
if ( isset( $parsed['port'] ) ) { | |
$front .= ':' . $parsed['port']; | |
} | |
$end_dirty = str_replace( $front, '', $url ); | |
$end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), $end_dirty ); | |
$url = str_replace( $end_dirty, $end_clean, $url ); | |
} | |
if ( '/' === $url[0] ) { | |
$good_protocol_url = $url; | |
} else { | |
if ( ! is_array( $protocols ) ) { | |
$protocols = wp_allowed_protocols(); | |
} | |
$good_protocol_url = new_wp_kses_bad_protocol( $url, $protocols ); | |
if ( strtolower( $good_protocol_url ) != strtolower( $url ) ) { | |
return ''; | |
} | |
} | |
/** | |
* Filters a string cleaned and escaped for output as a URL. | |
* | |
* @since 2.3.0 | |
* | |
* @param string $good_protocol_url The cleaned URL to be returned. | |
* @param string $original_url The URL prior to cleaning. | |
* @param string $_context If 'display', replace ampersands and single quotes only. | |
*/ | |
return apply_filters( 'clean_url', $good_protocol_url, $original_url, $_context ); | |
} | |
function new_wp_kses_bad_protocol( $string, $allowed_protocols ) { | |
$string = wp_kses_no_null( $string ); | |
// Short-circuit if the string starts with `https://` or `http://`. Most common cases. | |
if ( in_array( 'https', $allowed_protocols, true ) ) { | |
// Check if the string starts with `https` and it is lower case. | |
if ( 0 === strpos( $string, 'https://' ) ) { | |
return $string; | |
} elseif ( 0 === stripos( $string, 'https://' ) ) { | |
// The protocol matches but it is not lower case. | |
return 'https://' . substr( $string, 8 ); | |
} | |
} elseif ( in_array( 'http', $allowed_protocols, true ) ) { | |
// Same as above but for `http`. | |
if ( 0 === strpos( $string, 'http://' ) ) { | |
return $string; | |
} elseif ( 0 === stripos( $string, 'http://' ) ) { | |
// The protocol matches but it is not lower case. | |
return 'http://' . substr( $string, 7 ); | |
} | |
} | |
$iterations = 0; | |
do { | |
$original_string = $string; | |
$string = wp_kses_bad_protocol_once( $string, | |
$allowed_protocols ); | |
} while ( $original_string != $string && ++ $iterations < 6 ); | |
if ( $original_string != $string ) { | |
return ''; | |
} | |
return $string; | |
} | |
// Profiling. | |
for ( $repetition = 0; $repetition <= $repetitions; $repetition ++ ) { | |
$time_start = microtime_float(); | |
for ( $index = 0; $index < $iterations; $index ++ ) { | |
$url = $urls[ $index % $stringcount ]; | |
$result = esc_url( $url ); | |
} | |
$time_end = microtime_float(); | |
$esc_url_time[ $repetition ] = $time_end - $time_start; | |
$time_start = microtime_float(); | |
for ( $index = 0; $index < $iterations; $index ++ ) { | |
$url = $urls[ $index % $stringcount ]; | |
$result = new_esc_url( $url ); | |
} | |
$time_end = microtime_float(); | |
$new_esc_url_time[ $repetition ] = $time_end - $time_start; | |
} | |
// Drop first iteration. | |
unset( $esc_url_time[0], $new_esc_url_time[0] ); | |
$esc_url_time_avg = array_sum( $esc_url_time ) / $repetitions; | |
$new_esc_url_time_avg = array_sum( $new_esc_url_time ) / $repetitions; | |
$percentage = round( $new_esc_url_time_avg / $esc_url_time_avg * 100, 1 ) - 100; | |
printf( "Average time spent for ${repetitions} x ${iterations} iterations:\n esc_url() - ${esc_url_time_avg}\nnew_esc_url() - ${new_esc_url_time_avg}\n\nChange: ${percentage}%%\n" ); |