Skip to content

Instantly share code, notes, and snippets.

@mukeshpanchal27
Created November 15, 2022 06:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mukeshpanchal27/0d815c737a5c637d06beaf9d6ce93a0b to your computer and use it in GitHub Desktop.
Save mukeshpanchal27/0d815c737a5c637d06beaf9d6ce93a0b to your computer and use it in GitHub Desktop.
Profile a modified version of wp_kses_bad_protocol to see if we can speed it up.

Benchmark Bad Protocol Detection Change - Approach 2

Profile a modified version of wp_kses_bad_protocol to see if we can speed it up.

This is used to benchmark the patch introduced for Trac ticket #22951.

Results of the benchmark are found in a shared Google Sheet.

<?php
// Bootstrapping.
// Provide needed constants.
define( 'ABSPATH', __DIR__ );
define( 'WPINC', '/wp-includes' );
// Require needed files.
require_once ABSPATH . WPINC . '/functions.php';
require_once ABSPATH . WPINC . '/formatting.php';
require_once ABSPATH . WPINC . '/kses.php';
// Mock needed functions.
function apply_filters( $filter, $value ) {
return $value;
}
function did_action( $hook_name ) {
global $wp_actions;
if ( ! isset( $wp_actions[ $hook_name ] ) ) {
return 0;
}
return $wp_actions[ $hook_name ];
}
// Setup.
$urls = [
// Good.
'http://example.org/',
'HTTP://example.org/',
'http://subsite.example.org/',
'http&#58;//example.org/',
'http&#x3A;//example.org/',
'https://example.org',
'http://example.org/wp-admin/post.php?post=2&amp;action=edit',
'http://example.org/index.php?test=&#039;blah&#039;',
'https://example.org/wp-admin/post.php?post=2&amp;action=edit',
'http://subsite.example.org/wp-admin/post.php?post=2&amp;action=edit',
// Bad.
// 'dummy:alert(1)',
// 'JaVaScRiPt:alert(1)',
// 'javascript:alert(1);',
// '&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29',
// 'jav ascript:alert(1);',
// 'jav&#x09;ascript:alert(1);',
// ' &#14; javascript:alert(1);',
// 'javascript&#58;javascript:alert(1);',
// 'javascript&#0000058alert(1)//?:',
// 'feed:javascript:feed:javascript:feed:javascript:alert(1)',
];
$repetitions = 10;
$iterations = 1000;
$stringcount = count( $urls );
function microtime_float() {
list( $usec, $sec ) = explode( ' ', microtime() );
return ( (float) $usec + (float) $sec );
}
// Refactoring functions.
function new_esc_url( $url, $protocols = null, $_context = 'display' ) {
$original_url = $url;
if ( '' === $url ) {
return $url;
}
$url = str_replace( ' ', '%20', ltrim( $url ) );
$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url );
if ( '' === $url ) {
return $url;
}
if ( 0 !== stripos( $url, 'mailto:' ) ) {
$strip = array( '%0d', '%0a', '%0D', '%0A' );
$url = _deep_replace( $strip, $url );
}
$url = str_replace( ';//', '://', $url );
/*
* If the URL doesn't appear to contain a scheme, we presume
* it needs http:// prepended (unless it's a relative link
* starting with /, # or ?, or a PHP file).
*/
if ( strpos( $url, ':' ) === false && ! in_array( $url[0], array( '/', '#', '?' ), true ) &&
! preg_match( '/^[a-z0-9-]+?\.php/i', $url ) ) {
$url = 'http://' . $url;
}
// Replace ampersands and single quotes only when displaying.
if ( 'display' === $_context ) {
$url = wp_kses_normalize_entities( $url );
$url = str_replace( '&amp;', '&#038;', $url );
$url = str_replace( "'", '&#039;', $url );
}
if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, ']' ) ) ) {
$parsed = wp_parse_url( $url );
$front = '';
if ( isset( $parsed['scheme'] ) ) {
$front .= $parsed['scheme'] . '://';
} elseif ( '/' === $url[0] ) {
$front .= '//';
}
if ( isset( $parsed['user'] ) ) {
$front .= $parsed['user'];
}
if ( isset( $parsed['pass'] ) ) {
$front .= ':' . $parsed['pass'];
}
if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) {
$front .= '@';
}
if ( isset( $parsed['host'] ) ) {
$front .= $parsed['host'];
}
if ( isset( $parsed['port'] ) ) {
$front .= ':' . $parsed['port'];
}
$end_dirty = str_replace( $front, '', $url );
$end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), $end_dirty );
$url = str_replace( $end_dirty, $end_clean, $url );
}
if ( '/' === $url[0] ) {
$good_protocol_url = $url;
} else {
if ( ! is_array( $protocols ) ) {
$protocols = wp_allowed_protocols();
}
$good_protocol_url = new_wp_kses_bad_protocol( $url, $protocols );
if ( strtolower( $good_protocol_url ) != strtolower( $url ) ) {
return '';
}
}
/**
* Filters a string cleaned and escaped for output as a URL.
*
* @since 2.3.0
*
* @param string $good_protocol_url The cleaned URL to be returned.
* @param string $original_url The URL prior to cleaning.
* @param string $_context If 'display', replace ampersands and single quotes only.
*/
return apply_filters( 'clean_url', $good_protocol_url, $original_url, $_context );
}
function new_wp_kses_bad_protocol( $string, $allowed_protocols ) {
$string = wp_kses_no_null( $string );
// Short-circuit if the string starts with `https://` or `http://`. Most common cases.
if ( in_array( 'https', $allowed_protocols, true ) ) {
// Check if the string starts with `https` and it is lower case.
if ( 0 === strpos( $string, 'https://' ) ) {
return $string;
} elseif ( 0 === stripos( $string, 'https://' ) ) {
// The protocol matches but it is not lower case.
return 'https://' . substr( $string, 8 );
}
} elseif ( in_array( 'http', $allowed_protocols, true ) ) {
// Same as above but for `http`.
if ( 0 === strpos( $string, 'http://' ) ) {
return $string;
} elseif ( 0 === stripos( $string, 'http://' ) ) {
// The protocol matches but it is not lower case.
return 'http://' . substr( $string, 7 );
}
}
$iterations = 0;
do {
$original_string = $string;
$string = wp_kses_bad_protocol_once( $string,
$allowed_protocols );
} while ( $original_string != $string && ++ $iterations < 6 );
if ( $original_string != $string ) {
return '';
}
return $string;
}
// Profiling.
for ( $repetition = 0; $repetition <= $repetitions; $repetition ++ ) {
$time_start = microtime_float();
for ( $index = 0; $index < $iterations; $index ++ ) {
$url = $urls[ $index % $stringcount ];
$result = esc_url( $url );
}
$time_end = microtime_float();
$esc_url_time[ $repetition ] = $time_end - $time_start;
$time_start = microtime_float();
for ( $index = 0; $index < $iterations; $index ++ ) {
$url = $urls[ $index % $stringcount ];
$result = new_esc_url( $url );
}
$time_end = microtime_float();
$new_esc_url_time[ $repetition ] = $time_end - $time_start;
}
// Drop first iteration.
unset( $esc_url_time[0], $new_esc_url_time[0] );
$esc_url_time_avg = array_sum( $esc_url_time ) / $repetitions;
$new_esc_url_time_avg = array_sum( $new_esc_url_time ) / $repetitions;
$percentage = round( $new_esc_url_time_avg / $esc_url_time_avg * 100, 1 ) - 100;
printf( "Average time spent for ${repetitions} x ${iterations} iterations:\n esc_url() - ${esc_url_time_avg}\nnew_esc_url() - ${new_esc_url_time_avg}\n\nChange: ${percentage}%%\n" );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment