Created
December 2, 2010 16:46
-
-
Save moustaki/725644 to your computer and use it in GitHub Desktop.
ARC2 RDFa bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$data = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:og="http://ogp.me/ns#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:rss="http://purl.org/rss/1.0/" xmlns:mo="http://purl.org/ontology/mo/" xmlns:po="http://purl.org/ontology/po/"><head><title>Component test</title></head><body id="component"><div class="box-component box-component-orphan"><div class="box-content"><ol class="programmes"> | |
<li id="b00s1s1s" about="/programmes/b00s1s1s#programme" typeof="po:Series" class="series"> | |
<div class="head"> | |
<div class="series_summary blq-clearfix"> | |
<h2 class="title" property="dc:title">Series 1</h2> | |
<p property="po:short_synopsis">short one 1</p> | |
<p><a rel="rdfs:seeAlso" href="/programmes/b00s1s1s">Go to Series 1 home</a></p> | |
</div> | |
</div> | |
<div class="summary"> | |
<div class="episodes_summary"><h3><a rel="rdfs:seeAlso" href="/programmes/b00s1s1s/episodes/guide#b00s1s1s">Episodes</a></h3></div> | |
<div class="available_summary"><p>AVAILABLE ON BBC iPLAYER <span class="count">0</span></p></div> | |
<div class="next_on_summary"><p>NEXT ON <span class="count">0</span></p></div> | |
<div class="repeats_summary"><p>Repeats coming up</p></div> | |
</div> | |
<div class="body"> | |
<div class="foot"></div> | |
</div> | |
</li> | |
<li id="b00s2s2s" about="/programmes/b00s2s2s#programme" typeof="po:Series" class="series alt"> | |
<div class="head"> | |
<div class="series_summary blq-clearfix"> | |
<h2 class="title" property="dc:title">Series 2</h2> | |
<p property="po:short_synopsis">short too 2</p> | |
<p><a rel="rdfs:seeAlso" href="/programmes/b00s2s2s">Go to Series 2 home</a></p> | |
</div> | |
</div> | |
<div class="summary"> | |
<div class="episodes_summary"><h3><a rel="rdfs:seeAlso" href="/programmes/b00s2s2s/episodes/guide#b00s2s2s">Episodes</a></h3></div> | |
<div class="available_summary"><p>AVAILABLE ON BBC iPLAYER <span class="count">0</span></p></div> | |
<div class="next_on_summary"><p>NEXT ON <span class="count">0</span></p></div> | |
<div class="repeats_summary"><p>Repeats coming up</p></div> | |
</div> | |
<div class="body"> | |
<div class="foot"></div> | |
</div> | |
</li> | |
</ol> | |
</div></div></body></html>'; | |
$config = array('auto_extract' => 0); | |
$parser = ARC2::getSemHTMLParser($config); | |
$parser->parse('http://www.bbc.co.uk/programmes/b00bbbbb/episodes/guide', $data); | |
$parser->extractRDF('rdfa'); | |
$triples = $parser->getTriples(); | |
var_dump($triples); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment