Last active
November 25, 2024 10:17
-
-
Save PatelUtkarsh/d4a3b69acf0d665e0361588749413286 to your computer and use it in GitHub Desktop.
Benchmarking HTML Parsing: PHP 8.4 vs WP_HTML_Tag_Processor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* DOM Operations Benchmark Test | |
* | |
* Comparing PHP 8.4 DOM vs WP_HTML_Tag_Processor | |
*/ | |
class DOMBenchmark { | |
private const SAMPLE_HTML = <<<HTML | |
<main> | |
<article>First Article</article> | |
<article class="featured">Second Article</article> | |
<article class="featured special">Third Article</article> | |
<div class="container"> | |
<article class="nested featured">Nested Article</article> | |
</div> | |
</main> | |
HTML; | |
private const ITERATIONS = 100000; | |
private $results = []; | |
/** | |
* PHP 8.4 DOM Implementation | |
*/ | |
public function benchmarkPHP84DOM() { | |
$startTime = microtime( true ); | |
$successEl = []; | |
for ( $i = 0; $i < self::ITERATIONS; $i ++ ) { | |
try { | |
$dom = \Dom\HTMLDocument::createFromString( | |
self::SAMPLE_HTML, | |
LIBXML_NOERROR | |
); | |
// Test various DOM operations | |
$lastArticle = $dom->querySelector( 'main > article:last-of-type' ); | |
$successEl[] = $lastArticle?->classList->contains( 'special' ); | |
} | |
catch ( Exception $e ) { | |
// Handle older PHP versions | |
$this->results['PHP84_DOM']['error'] = $e->getMessage(); | |
return; | |
} | |
} | |
$endTime = microtime( true ); | |
$this->results['PHP84_DOM'] = [ | |
'time' => ( $endTime - $startTime ), | |
'iterations' => self::ITERATIONS, | |
'success' => $successEl | |
]; | |
} | |
/** | |
* WP_HTML_Tag_Processor Implementation | |
*/ | |
public function benchmarkWordPressDOM() { | |
$startTime = microtime( true ); | |
$success = []; | |
require_once __DIR__ . '/class-wp-html-tag-processor.php'; | |
require_once __DIR__ . '/class-wp-html-attribute-token.php'; | |
require_once __DIR__ . '/class-wp-html-decoder.php'; | |
require_once __DIR__ . '/class-wp-html-span.php'; | |
for ( $i = 0; $i < self::ITERATIONS; $i ++ ) { | |
// Use WP_HTML_Tag_Processor instead of DOMDocument | |
$processor = new WP_HTML_Tag_Processor( self::SAMPLE_HTML ); | |
$bookmark = 'maybe-last-article'; | |
while ( $processor->next_tag( 'article' ) ) { | |
// Without using special class. | |
if ( ! $processor->has_class( 'nested' ) ) { | |
if ( $processor->has_bookmark( $bookmark ) ) { | |
$processor->release_bookmark( $bookmark ); | |
} | |
$processor->set_bookmark( $bookmark ); | |
} | |
} | |
$processor->seek( $bookmark ); | |
$success[] = $processor->has_class( 'special' ); | |
} | |
$endTime = microtime( true ); | |
$this->results['WP_DOM'] = [ | |
'time' => ( $endTime - $startTime ), | |
'iterations' => self::ITERATIONS, | |
'success' => $success | |
]; | |
} | |
/** | |
* Run the benchmark | |
*/ | |
public function run() { | |
echo "Starting DOM Operations Benchmark...\n"; | |
// Run PHP 8.4 DOM benchmark | |
echo "Running PHP 8.4 DOM benchmark...\n"; | |
$this->benchmarkPHP84DOM(); | |
// Run WordPress DOM benchmark | |
echo "Running WordPress DOM benchmark...\n"; | |
$this->benchmarkWordPressDOM(); | |
// Display results | |
$this->displayResults(); | |
} | |
/** | |
* Display benchmark results | |
*/ | |
private function displayResults() { | |
echo "\nBenchmark Results:\n"; | |
echo str_repeat( "-", 50 ) . "\n"; | |
foreach ( $this->results as $type => $data ) { | |
if ( isset( $data['error'] ) ) { | |
echo "$type: Error - {$data['error']}\n"; | |
continue; | |
} | |
if ( array_unique( $data['success'] ) !== [ true ] ) { | |
echo "$type: All featured articles are false\n"; | |
continue; | |
} | |
$timePerOperation = ( $data['time'] / $data['iterations'] ) * 1000; // Convert to milliseconds | |
echo "$type:\n"; | |
echo "Total Time: " . number_format( $data['time'], 4 ) . " seconds\n"; | |
echo "Iterations: {$data['iterations']}\n"; | |
echo "Average Time per Operation: " . number_format( $timePerOperation, 4 ) . " ms\n"; | |
echo str_repeat( "-", 50 ) . "\n"; | |
} | |
// Write which is faster and by how much %. | |
$php84Time = $this->results['PHP84_DOM']['time']; | |
$wpTime = $this->results['WP_DOM']['time']; | |
$percentFaster = ( ( $wpTime - $php84Time ) / $wpTime ) * 100; | |
$percentFaster = number_format( $percentFaster, 2 ); | |
$faster = $php84Time < $wpTime ? 'PHP 8.4 DOM' : 'WordPress DOM'; | |
echo "$faster is faster by ~$percentFaster%\n"; | |
} | |
} | |
// Run the benchmark | |
$benchmark = new DOMBenchmark(); | |
$benchmark->run(); |
Author
PatelUtkarsh
commented
Nov 25, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment