danbri/gist:ad684a50872fffb30e0bbd2c22ea3e18 Secret

## gistfile1.txt
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from selenium import webdriver
from bs4 import BeautifulSoup
from rdflib import Graph, plugin
import json, rdflib_jsonld
from rdflib.plugin import register, Serializer
from SPARQLWrapper import SPARQLWrapper

import warnings

u = "http://danbri.org/2016/browserdemo/helloworld.html"

# This is a quick example showing the use of Selenium (headless webdriver browser)
# as a preprocessor for extraction of JSON-LD from Web content, allowing JSON-LD
# to be injected by scripts into pages. We could do similar for Microdata/RDFa.
#
# Dan Brickley <danbri@google.com>

register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer')
browser = webdriver.Firefox()
#browser = webdriver.Chrome() # TODO
browser.get(u)
pagetext  = browser.page_source


with warnings.catch_warnings():
        try:
                warnings.simplefilter("ignore")
                browser.close()
                browser.quit()
        except e:
                print "..."


soup = BeautifulSoup(pagetext, 'lxml')

print "Extracting script tags."

for tag in soup.find_all('script'):
        tt = str(tag.get('type',None))
        if tt.endswith("application/ld+json"):
                myJsonLd = tag.get_text()
                g = Graph()
                g.parse(data=myJsonLd, format='json-ld', base=u)
                g.close()
                for s,p,o in g.triples( (None,  None, None) ):
                           print "%s %s %s"%(s,p,o)
	#!/usr/bin/env python
	# -- coding: UTF-8 --

	from selenium import webdriver
	from bs4 import BeautifulSoup
	from rdflib import Graph, plugin
	import json, rdflib_jsonld
	from rdflib.plugin import register, Serializer
	from SPARQLWrapper import SPARQLWrapper

	import warnings

	u = "http://danbri.org/2016/browserdemo/helloworld.html"

	# This is a quick example showing the use of Selenium (headless webdriver browser)
	# as a preprocessor for extraction of JSON-LD from Web content, allowing JSON-LD
	# to be injected by scripts into pages. We could do similar for Microdata/RDFa.
	#
	# Dan Brickley <danbri@google.com>

	register('json-ld', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer')
	browser = webdriver.Firefox()
	#browser = webdriver.Chrome() # TODO
	browser.get(u)
	pagetext = browser.page_source


	with warnings.catch_warnings():
	try:
	warnings.simplefilter("ignore")
	browser.close()
	browser.quit()
	except e:
	print "..."


	soup = BeautifulSoup(pagetext, 'lxml')

	print "Extracting script tags."

	for tag in soup.find_all('script'):
	tt = str(tag.get('type',None))
	if tt.endswith("application/ld+json"):
	myJsonLd = tag.get_text()
	g = Graph()
	g.parse(data=myJsonLd, format='json-ld', base=u)
	g.close()
	for s,p,o in g.triples( (None, None, None) ):
	print "%s %s %s"%(s,p,o)