Skip to content

Instantly share code, notes, and snippets.

@edorian
Last active February 6, 2025 17:51
Show Gist options
  • Save edorian/945ee661944114257bb5aadd587df404 to your computer and use it in GitHub Desktop.
Save edorian/945ee661944114257bb5aadd587df404 to your computer and use it in GitHub Desktop.
Extract programm listings from php-doc-en
<?php
extractProgramListings($_SERVER['argv'][1]);
function extractProgramListings($path) {
if (is_file($path)) {
$files = [$path];
} else {
$files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($path));
}
foreach ($files as $file) {
if (pathinfo($file, PATHINFO_EXTENSION) === 'xml') {
$xmlContent = file_get_contents($file);
$xmlContent = str_replace(
'<?xml version="1.0" encoding="utf-8"?>',
'<?xml version="1.0" encoding="utf-8"?>' .
<<<DTD
<!DOCTYPE root [
<!ENTITY reftitle.description "Description">
<!ENTITY style.oop "OOP">
<!ENTITY style.procedural "Procedural">
<!ENTITY reftitle.parameters "Parameters">
<!ENTITY date.datetime.description "Date/Time Description">
<!ENTITY url.rfc "RFC URL">
<!ENTITY reftitle.returnvalues "Return Values">
<!ENTITY reftitle.changelog "Changelog">
<!ENTITY Description "Description">
<!ENTITY Version "Version">
<!ENTITY reftitle.examples "reftitle.examples">
<!ENTITY example.outputs "example.outputs">
<!ENTITY reftitle.notes "reftitle.notes">
<!ENTITY reftitle.required "reftitle.required">
<!ENTITY url.xattr "url.xattr">
<!ENTITY reftitle.returnvalues "reftitle.returnvalues">
<!ENTITY true "true">
<!ENTITY false "false">
<!ENTITY reference.xattr.configure "reference.xattr.configure">
<!ENTITY return.success "return.success">
<!ENTITY reftitle.seealso "reftitle.seealso">
<!ENTITY xattr.namespace "xattr.namespace">
<!ENTITY reftitle.setup "reftitle.setup">
<!ENTITY no.function.parameters "no.function.parameters">
<!ENTITY reftitle.seealso "reftitle.seealso">
]>
DTD,
$xmlContent
);
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->resolveExternals = true;
$dom->strictErrorChecking = false;
$dom->loadXML($xmlContent, LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NOENT | LIBXML_NOERROR | LIBXML_NOWARNING);
foreach (libxml_get_errors() as $error) {
echo "XML Error: ", $error->message, "\n";
}
libxml_clear_errors();
$xpath = new DOMXPath($dom);
$xpath->registerNamespace('db', 'http://docbook.org/ns/docbook');
$entries = $xpath->query('//db:programlisting[@role="php"]');
if ($entries->length > 0) {
echo "Extracting from: $file\n";
foreach ($entries as $entry) {
echo trim($entry->textContent) . "\n\n";
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment