Skip to content

Instantly share code, notes, and snippets.

@PatelUtkarsh
Last active November 25, 2024 10:17
Show Gist options
  • Save PatelUtkarsh/d4a3b69acf0d665e0361588749413286 to your computer and use it in GitHub Desktop.
Save PatelUtkarsh/d4a3b69acf0d665e0361588749413286 to your computer and use it in GitHub Desktop.
Benchmarking HTML Parsing: PHP 8.4 vs WP_HTML_Tag_Processor
<?php
/**
* DOM Operations Benchmark Test
*
* Comparing PHP 8.4 DOM vs WP_HTML_Tag_Processor
*/
class DOMBenchmark {
private const SAMPLE_HTML = <<<HTML
<main>
<article>First Article</article>
<article class="featured">Second Article</article>
<article class="featured special">Third Article</article>
<div class="container">
<article class="nested featured">Nested Article</article>
</div>
</main>
HTML;
private const ITERATIONS = 100000;
private $results = [];
/**
* PHP 8.4 DOM Implementation
*/
public function benchmarkPHP84DOM() {
$startTime = microtime( true );
$successEl = [];
for ( $i = 0; $i < self::ITERATIONS; $i ++ ) {
try {
$dom = \Dom\HTMLDocument::createFromString(
self::SAMPLE_HTML,
LIBXML_NOERROR
);
// Test various DOM operations
$lastArticle = $dom->querySelector( 'main > article:last-of-type' );
$successEl[] = $lastArticle?->classList->contains( 'special' );
}
catch ( Exception $e ) {
// Handle older PHP versions
$this->results['PHP84_DOM']['error'] = $e->getMessage();
return;
}
}
$endTime = microtime( true );
$this->results['PHP84_DOM'] = [
'time' => ( $endTime - $startTime ),
'iterations' => self::ITERATIONS,
'success' => $successEl
];
}
/**
* WP_HTML_Tag_Processor Implementation
*/
public function benchmarkWordPressDOM() {
$startTime = microtime( true );
$success = [];
require_once __DIR__ . '/class-wp-html-tag-processor.php';
require_once __DIR__ . '/class-wp-html-attribute-token.php';
require_once __DIR__ . '/class-wp-html-decoder.php';
require_once __DIR__ . '/class-wp-html-span.php';
for ( $i = 0; $i < self::ITERATIONS; $i ++ ) {
// Use WP_HTML_Tag_Processor instead of DOMDocument
$processor = new WP_HTML_Tag_Processor( self::SAMPLE_HTML );
$bookmark = 'maybe-last-article';
while ( $processor->next_tag( 'article' ) ) {
// Without using special class.
if ( ! $processor->has_class( 'nested' ) ) {
if ( $processor->has_bookmark( $bookmark ) ) {
$processor->release_bookmark( $bookmark );
}
$processor->set_bookmark( $bookmark );
}
}
$processor->seek( $bookmark );
$success[] = $processor->has_class( 'special' );
}
$endTime = microtime( true );
$this->results['WP_DOM'] = [
'time' => ( $endTime - $startTime ),
'iterations' => self::ITERATIONS,
'success' => $success
];
}
/**
* Run the benchmark
*/
public function run() {
echo "Starting DOM Operations Benchmark...\n";
// Run PHP 8.4 DOM benchmark
echo "Running PHP 8.4 DOM benchmark...\n";
$this->benchmarkPHP84DOM();
// Run WordPress DOM benchmark
echo "Running WordPress DOM benchmark...\n";
$this->benchmarkWordPressDOM();
// Display results
$this->displayResults();
}
/**
* Display benchmark results
*/
private function displayResults() {
echo "\nBenchmark Results:\n";
echo str_repeat( "-", 50 ) . "\n";
foreach ( $this->results as $type => $data ) {
if ( isset( $data['error'] ) ) {
echo "$type: Error - {$data['error']}\n";
continue;
}
if ( array_unique( $data['success'] ) !== [ true ] ) {
echo "$type: All featured articles are false\n";
continue;
}
$timePerOperation = ( $data['time'] / $data['iterations'] ) * 1000; // Convert to milliseconds
echo "$type:\n";
echo "Total Time: " . number_format( $data['time'], 4 ) . " seconds\n";
echo "Iterations: {$data['iterations']}\n";
echo "Average Time per Operation: " . number_format( $timePerOperation, 4 ) . " ms\n";
echo str_repeat( "-", 50 ) . "\n";
}
// Write which is faster and by how much %.
$php84Time = $this->results['PHP84_DOM']['time'];
$wpTime = $this->results['WP_DOM']['time'];
$percentFaster = ( ( $wpTime - $php84Time ) / $wpTime ) * 100;
$percentFaster = number_format( $percentFaster, 2 );
$faster = $php84Time < $wpTime ? 'PHP 8.4 DOM' : 'WordPress DOM';
echo "$faster is faster by ~$percentFaster%\n";
}
}
// Run the benchmark
$benchmark = new DOMBenchmark();
$benchmark->run();
@PatelUtkarsh
Copy link
Author

Benchmark Results:
--------------------------------------------------
PHP84_DOM:
Total Time: 0.6766 seconds
Iterations: 100000
Average Time per Operation: 0.0068 ms
--------------------------------------------------
WP_DOM:
Total Time: 2.8463 seconds
Iterations: 100000
Average Time per Operation: 0.0285 ms
--------------------------------------------------
PHP 8.4 DOM is faster by ~76.23%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment