Skip to content

Instantly share code, notes, and snippets.

@schlessera
Created October 18, 2016 15:21
Show Gist options
  • Save schlessera/d4dd4db71d59dfee6ee6c5b9a0ca8a33 to your computer and use it in GitHub Desktop.
Save schlessera/d4dd4db71d59dfee6ee6c5b9a0ca8a33 to your computer and use it in GitHub Desktop.
Profile a modified version of wp_kses_bad_protocol to see if we can speed it up.

Benchmark Bad Protocol Detection Change

Profile a modified version of wp_kses_bad_protocol to see if we can speed it up.

This is used to benchmark the patch introduced for Trac ticket #22951.

Results of the benchmark are found in a shared Google Sheet.

<?php
// Bootstrapping.
// Provide needed constants.
define( 'ABSPATH', realpath( __DIR__ . '/build/' ) );
define( 'WPINC', '/wp-includes' );
// Require needed files.
require_once ABSPATH . WPINC . '/functions.php';
require_once ABSPATH . WPINC . '/formatting.php';
require_once ABSPATH . WPINC . '/kses.php';
// Mock needed functions.
function apply_filters( $filter, $value ) {
return $value;
}
// Setup.
$urls = [
// Good.
'http://example.org/',
'HTTP://example.org/',
'http://subsite.example.org/',
'http&#58;//example.org/',
'http&#x3A;//example.org/',
'https://example.org',
'http://example.org/wp-admin/post.php?post=2&amp;action=edit',
'http://example.org/index.php?test=&#039;blah&#039;',
'https://example.org/wp-admin/post.php?post=2&amp;action=edit',
'http://subsite.example.org/wp-admin/post.php?post=2&amp;action=edit',
// Bad.
// 'dummy:alert(1)',
// 'JaVaScRiPt:alert(1)',
// 'javascript:alert(1);',
// '&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29',
// 'jav ascript:alert(1);',
// 'jav&#x09;ascript:alert(1);',
// ' &#14; javascript:alert(1);',
// 'javascript&#58;javascript:alert(1);',
// 'javascript&#0000058alert(1)//?:',
// 'feed:javascript:feed:javascript:feed:javascript:alert(1)',
];
$repetitions = 10;
$iterations = 1000;
$stringcount = count( $urls );
function microtime_float() {
list( $usec, $sec ) = explode( ' ', microtime() );
return ( (float) $usec + (float) $sec );
}
// Refactoring functions.
function new_esc_url( $url, $protocols = null, $_context = 'display' ) {
$original_url = $url;
if ( '' == $url ) {
return $url;
}
$url = str_replace( ' ', '%20', $url );
$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i',
'', $url );
if ( '' === $url ) {
return $url;
}
if ( 0 !== stripos( $url, 'mailto:' ) ) {
$strip = array( '%0d', '%0a', '%0D', '%0A' );
$url = _deep_replace( $strip, $url );
}
$url = str_replace( ';//', '://', $url );
/* If the URL doesn't appear to contain a scheme, we
* presume it needs http:// prepended (unless a relative
* link starting with /, # or ? or a php file).
*/
if ( strpos( $url, ':' ) === false && ! in_array( $url[0],
array( '/', '#', '?' ) ) &&
! preg_match( '/^[a-z0-9-]+?\.php/i', $url )
) {
$url = 'http://' . $url;
}
// Replace ampersands and single quotes only when displaying.
if ( 'display' == $_context ) {
$url = wp_kses_normalize_entities( $url );
$url = str_replace( '&amp;', '&#038;', $url );
$url = str_replace( "'", '&#039;', $url );
}
if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url,
']' ) )
) {
$parsed = wp_parse_url( $url );
$front = '';
if ( isset( $parsed['scheme'] ) ) {
$front .= $parsed['scheme'] . '://';
} elseif ( '/' === $url[0] ) {
$front .= '//';
}
if ( isset( $parsed['user'] ) ) {
$front .= $parsed['user'];
}
if ( isset( $parsed['pass'] ) ) {
$front .= ':' . $parsed['pass'];
}
if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) {
$front .= '@';
}
if ( isset( $parsed['host'] ) ) {
$front .= $parsed['host'];
}
if ( isset( $parsed['port'] ) ) {
$front .= ':' . $parsed['port'];
}
$end_dirty = str_replace( $front, '', $url );
$end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ),
$end_dirty );
$url = str_replace( $end_dirty, $end_clean, $url );
}
if ( '/' === $url[0] ) {
$good_protocol_url = $url;
} else {
if ( ! is_array( $protocols ) ) {
$protocols = wp_allowed_protocols();
}
$good_protocol_url = new_wp_kses_bad_protocol( $url, $protocols );
if ( strtolower( $good_protocol_url ) != strtolower( $url ) ) {
return '';
}
}
return apply_filters( 'clean_url', $good_protocol_url, $original_url,
$_context );
}
function new_wp_kses_bad_protocol( $string, $allowed_protocols ) {
// Try to detect normal URL early.
$regex = '_^(?:(?:https?)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS';
if ( 1 === preg_match( $regex, $string ) ) {
return $string;
}
$string = wp_kses_no_null( $string );
$iterations = 0;
do {
$original_string = $string;
$string = wp_kses_bad_protocol_once( $string,
$allowed_protocols );
} while ( $original_string != $string && ++ $iterations < 6 );
if ( $original_string != $string ) {
return '';
}
return $string;
}
// Profiling.
for ( $repetition = 0; $repetition <= $repetitions; $repetition ++ ) {
$time_start = microtime_float();
for ( $index = 0; $index < $iterations; $index ++ ) {
$url = $urls[ $index % $stringcount ];
$result = esc_url( $url );
}
$time_end = microtime_float();
$esc_url_time[ $repetition ] = $time_end - $time_start;
$time_start = microtime_float();
for ( $index = 0; $index < $iterations; $index ++ ) {
$url = $urls[ $index % $stringcount ];
$result = new_esc_url( $url );
}
$time_end = microtime_float();
$new_esc_url_time[ $repetition ] = $time_end - $time_start;
}
// Drop first iteration.
unset( $esc_url_time[0], $new_esc_url_time[0] );
$esc_url_time_avg = array_sum( $esc_url_time ) / $repetitions;
$new_esc_url_time_avg = array_sum( $new_esc_url_time ) / $repetitions;
$percentage = round( $new_esc_url_time_avg / $esc_url_time_avg * 100, 1 ) - 100;
printf( "Average time spent for ${repetitions} x ${iterations} iterations:\n esc_url() - ${esc_url_time_avg}\nnew_esc_url() - ${new_esc_url_time_avg}\n\nChange: ${percentage}%%\n" );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment