datadavev/d1_dataextract.py

## d1_dataextract.py
import time
import urllib
from selenium import webdriver
import extruct
import pprint
import clipboard

pid = "https://pasta.lternet.edu/package/metadata/eml/knb-lter-fce/1224/1"
#The view service is broken by not accepting an escaped path element
#url = "https://search.dataone.org/view/" + urllib.parse.quote_plus(pid)
url = "https://search.dataone.org/view/" + pid

driver = webdriver.Chrome()
driver.get(url)

#wait for document to load
time.sleep(10)
html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
driver.close()

# Place the rendered HTML onto the clipboard
clipboard.copy(html.encode("utf-8"))

# Extract the JSON-LD data
data = extruct.extract(html, base_url=url)
pprint.pprint(data)
	import time
	import urllib
	from selenium import webdriver
	import extruct
	import pprint
	import clipboard

	pid = "https://pasta.lternet.edu/package/metadata/eml/knb-lter-fce/1224/1"
	#The view service is broken by not accepting an escaped path element
	#url = "https://search.dataone.org/view/" + urllib.parse.quote_plus(pid)
	url = "https://search.dataone.org/view/" + pid

	driver = webdriver.Chrome()
	driver.get(url)

	#wait for document to load
	time.sleep(10)
	html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
	driver.close()

	# Place the rendered HTML onto the clipboard
	clipboard.copy(html.encode("utf-8"))

	# Extract the JSON-LD data
	data = extruct.extract(html, base_url=url)
	pprint.pprint(data)