Skip to content

Instantly share code, notes, and snippets.

@niklasl
Created February 11, 2010 10:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niklasl/301385 to your computer and use it in GitHub Desktop.
Save niklasl/301385 to your computer and use it in GitHub Desktop.
# -*- coding: UTF-8 -*-
import urllib2
from lxml import etree
from lxml.cssselect import CSSSelector as css
from rdflib import Graph
def check_examples(url, verbose=False):
for title, prologue, text, fmt in find_examples(url):
header = 'Example "%s" as %s: ' % (title, fmt)
fill = 72 if verbose else 40
try:
g = Graph().parse(data=prologue+text, format=fmt)
except Exception, e:
print header,
print "Error:"
print "-"*fill
print unicode(e).encode('utf-8')
if verbose:
print "-"*fill
print text
print "="*fill
print
else:
if verbose:
print header,
print "Ok!"
print "="*fill
print
def find_examples(url):
parser = etree.HTMLParser()
doc = etree.parse(urllib2.urlopen(url), parser)
prefixes = []
for i, elem in enumerate(css("pre")(doc)):
title = i+1
rows = [txt for txt in elem.xpath('text()')]
prefixes += [row for row in rows if "@prefix" in row and row not in prefixes]
text = "\n".join(rows).encode('utf-8')
text = text.replace(' ', ' ') # nbsp
guessed_fmt = 'xml' if ("xmlns:" in text or "rdf:about" in text) else 'n3'
if guessed_fmt == 'n3':
prologue = '\n'.join(pfx for pfx in prefixes if pfx not in text) + '\n'
else:
prologue = ''
yield title, prologue, text, guessed_fmt
if __name__ == '__main__':
from sys import argv
args = argv[1:]
verbose = '-v' in args
if verbose: args.remove('-v')
if args:
url = args.pop()
check_examples(url, verbose)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment