Profile a modified version of wp_kses_bad_protocol
to see if we can speed it up.
This is used to benchmark the patch introduced for Trac ticket #22951.
Results of the benchmark are found in a shared Google Sheet.
Profile a modified version of wp_kses_bad_protocol
to see if we can speed it up.
This is used to benchmark the patch introduced for Trac ticket #22951.
Results of the benchmark are found in a shared Google Sheet.
<?php | |
// Bootstrapping. | |
// Provide needed constants. | |
define( 'ABSPATH', realpath( __DIR__ . '/build/' ) ); | |
define( 'WPINC', '/wp-includes' ); | |
// Require needed files. | |
require_once ABSPATH . WPINC . '/functions.php'; | |
require_once ABSPATH . WPINC . '/formatting.php'; | |
require_once ABSPATH . WPINC . '/kses.php'; | |
// Mock needed functions. | |
function apply_filters( $filter, $value ) { | |
return $value; | |
} | |
// Setup. | |
$urls = [ | |
// Good. | |
'http://example.org/', | |
'HTTP://example.org/', | |
'http://subsite.example.org/', | |
'http://example.org/', | |
'http://example.org/', | |
'https://example.org', | |
'http://example.org/wp-admin/post.php?post=2&action=edit', | |
'http://example.org/index.php?test='blah'', | |
'https://example.org/wp-admin/post.php?post=2&action=edit', | |
'http://subsite.example.org/wp-admin/post.php?post=2&action=edit', | |
// Bad. | |
// 'dummy:alert(1)', | |
// 'JaVaScRiPt:alert(1)', | |
// 'javascript:alert(1);', | |
// 'javascript:alert('XSS')', | |
// 'jav ascript:alert(1);', | |
// 'jav	ascript:alert(1);', | |
// '  javascript:alert(1);', | |
// 'javascript:javascript:alert(1);', | |
// 'javascript:alert(1)//?:', | |
// 'feed:javascript:feed:javascript:feed:javascript:alert(1)', | |
]; | |
$repetitions = 10; | |
$iterations = 1000; | |
$stringcount = count( $urls ); | |
function microtime_float() { | |
list( $usec, $sec ) = explode( ' ', microtime() ); | |
return ( (float) $usec + (float) $sec ); | |
} | |
// Refactoring functions. | |
function new_esc_url( $url, $protocols = null, $_context = 'display' ) { | |
$original_url = $url; | |
if ( '' == $url ) { | |
return $url; | |
} | |
$url = str_replace( ' ', '%20', $url ); | |
$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', | |
'', $url ); | |
if ( '' === $url ) { | |
return $url; | |
} | |
if ( 0 !== stripos( $url, 'mailto:' ) ) { | |
$strip = array( '%0d', '%0a', '%0D', '%0A' ); | |
$url = _deep_replace( $strip, $url ); | |
} | |
$url = str_replace( ';//', '://', $url ); | |
/* If the URL doesn't appear to contain a scheme, we | |
* presume it needs http:// prepended (unless a relative | |
* link starting with /, # or ? or a php file). | |
*/ | |
if ( strpos( $url, ':' ) === false && ! in_array( $url[0], | |
array( '/', '#', '?' ) ) && | |
! preg_match( '/^[a-z0-9-]+?\.php/i', $url ) | |
) { | |
$url = 'http://' . $url; | |
} | |
// Replace ampersands and single quotes only when displaying. | |
if ( 'display' == $_context ) { | |
$url = wp_kses_normalize_entities( $url ); | |
$url = str_replace( '&', '&', $url ); | |
$url = str_replace( "'", ''', $url ); | |
} | |
if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, | |
']' ) ) | |
) { | |
$parsed = wp_parse_url( $url ); | |
$front = ''; | |
if ( isset( $parsed['scheme'] ) ) { | |
$front .= $parsed['scheme'] . '://'; | |
} elseif ( '/' === $url[0] ) { | |
$front .= '//'; | |
} | |
if ( isset( $parsed['user'] ) ) { | |
$front .= $parsed['user']; | |
} | |
if ( isset( $parsed['pass'] ) ) { | |
$front .= ':' . $parsed['pass']; | |
} | |
if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) { | |
$front .= '@'; | |
} | |
if ( isset( $parsed['host'] ) ) { | |
$front .= $parsed['host']; | |
} | |
if ( isset( $parsed['port'] ) ) { | |
$front .= ':' . $parsed['port']; | |
} | |
$end_dirty = str_replace( $front, '', $url ); | |
$end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), | |
$end_dirty ); | |
$url = str_replace( $end_dirty, $end_clean, $url ); | |
} | |
if ( '/' === $url[0] ) { | |
$good_protocol_url = $url; | |
} else { | |
if ( ! is_array( $protocols ) ) { | |
$protocols = wp_allowed_protocols(); | |
} | |
$good_protocol_url = new_wp_kses_bad_protocol( $url, $protocols ); | |
if ( strtolower( $good_protocol_url ) != strtolower( $url ) ) { | |
return ''; | |
} | |
} | |
return apply_filters( 'clean_url', $good_protocol_url, $original_url, | |
$_context ); | |
} | |
function new_wp_kses_bad_protocol( $string, $allowed_protocols ) { | |
// Try to detect normal URL early. | |
$regex = '_^(?:(?:https?)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS'; | |
if ( 1 === preg_match( $regex, $string ) ) { | |
return $string; | |
} | |
$string = wp_kses_no_null( $string ); | |
$iterations = 0; | |
do { | |
$original_string = $string; | |
$string = wp_kses_bad_protocol_once( $string, | |
$allowed_protocols ); | |
} while ( $original_string != $string && ++ $iterations < 6 ); | |
if ( $original_string != $string ) { | |
return ''; | |
} | |
return $string; | |
} | |
// Profiling. | |
for ( $repetition = 0; $repetition <= $repetitions; $repetition ++ ) { | |
$time_start = microtime_float(); | |
for ( $index = 0; $index < $iterations; $index ++ ) { | |
$url = $urls[ $index % $stringcount ]; | |
$result = esc_url( $url ); | |
} | |
$time_end = microtime_float(); | |
$esc_url_time[ $repetition ] = $time_end - $time_start; | |
$time_start = microtime_float(); | |
for ( $index = 0; $index < $iterations; $index ++ ) { | |
$url = $urls[ $index % $stringcount ]; | |
$result = new_esc_url( $url ); | |
} | |
$time_end = microtime_float(); | |
$new_esc_url_time[ $repetition ] = $time_end - $time_start; | |
} | |
// Drop first iteration. | |
unset( $esc_url_time[0], $new_esc_url_time[0] ); | |
$esc_url_time_avg = array_sum( $esc_url_time ) / $repetitions; | |
$new_esc_url_time_avg = array_sum( $new_esc_url_time ) / $repetitions; | |
$percentage = round( $new_esc_url_time_avg / $esc_url_time_avg * 100, 1 ) - 100; | |
printf( "Average time spent for ${repetitions} x ${iterations} iterations:\n esc_url() - ${esc_url_time_avg}\nnew_esc_url() - ${new_esc_url_time_avg}\n\nChange: ${percentage}%%\n" ); |