Skip to content

Instantly share code, notes, and snippets.

Last active August 22, 2022 18:54
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save shawngraham/8dddf5b924588184b0568ca98f827b8b to your computer and use it in GitHub Desktop.
Save shawngraham/8dddf5b924588184b0568ca98f827b8b to your computer and use it in GitHub Desktop.
extracting structured notes from PUBLIC annotations based on
import requests, re, json, types, traceback
class HypothesisAnnotation:
def __init__(self, row):
"""Encapsulate relevant parts of one row of a Hypothesis API search."""
self.tags = []
if row.has_key('tags') and row['tags'] is not None:
self.tags = row['tags']
if isinstance(self.tags, types.ListType):
self.tags = [t.strip() for t in self.tags]
self.text = ''
if row.has_key('text'):
self.text = row['text'] = []
if row.has_key('target'): = row['target']
self.start = self.end = self.prefix = self.exact = self.suffix = None
if isinstance(,list) and len( and[0].has_key('selector'):
selectors =[0]['selector']
for selector in selectors:
if selector.has_key('type') and selector['type'] == 'TextQuoteSelector':
self.prefix = selector['prefix']
self.exact = selector['exact']
self.suffix = selector['suffix']
if selector.has_key('type') and selector['type'] == 'TextPositionSelector' and selector.has_key('start'):
self.start = selector['start']
self.end = selector['end']
if selector.has_key('type') and selector['type'] == 'FragmentSelector' and selector.has_key('value'):
self.fragment_selector = selector['value']
print traceback.format_exc()
canonical_url = raw_input("Canonical URL?")
alternate_url = raw_input("Annotated URL?")
query_url = '' % alternate_url
text = requests.get(query_url).text.decode('utf-8')
rows = json.loads(text)['rows']
h_annotations = [HypothesisAnnotation(row) for row in rows]
def filter_tags_by_prefix(tags, tag_prefix):
return [tag for tag in tags if tag.lower().startswith(tag_prefix.lower())]
def has_tag_starting_with(h_annotation, tag_prefix):
filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
return len(filtered) > 0
def get_tag_starting_with(h_annotation, tag_prefix):
filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
if len(filtered) > 0:
return filtered[0]
return None
def select_annotations_with_tag_prefix(rows, tag_prefix):
return [h_annotation for h_annotation in h_annotations if has_tag_starting_with(h_annotation, tag_prefix)]
def make_interpretation_element(h_annotations, tag_prefix):
list = []
h_annotations = select_annotations_with_tag_prefix(rows, tag_prefix)
for h_annotation in h_annotations:
tag = get_tag_starting_with(h_annotation, tag_prefix)
subtag = tag.split(':')[1]
list.append(subtag + ': ' + h_annotation.exact)
return list
def make_abstract(h_annotations):
html = """
**Title:** %s
**Subject:** %s
**Key Themes:** %s
**Key Literature:** %s
**The Interesting Bits:**
%s""" % (
select_annotations_with_tag_prefix(h_annotations, 'Title')[0].exact,
', '.join(make_interpretation_element(h_annotations, 'Subject')),
', '.join(make_interpretation_element(h_annotations, 'Keytheme')),
', '.join(make_interpretation_element(h_annotations, 'Keylit')),
'<ul><li>' + '</li><li>'.join(make_interpretation_element(h_annotations, 'Item')) + '</li></ul>' )
return html
def subfindings_from_h_annotation(h_annotation):
body = re.sub('\n*<.+>\n*', '', h_annotation.text)
rows = []
chunks = body.split('\n\n')
for chunk in chunks:
fields = chunk.split('\n')
fields = [re.sub('\w+:\s+','',field) for field in fields]
rows.append('<tr><td>' + '</td><td>'.join(fields) + '</td></tr>')
return rows
def make_table(h_annotations):
rows = []
raw_findings = select_annotations_with_tag_prefix(h_annotations, 'Item')
for raw_finding in raw_findings:
rows += subfindings_from_h_annotation(raw_finding)
html = '<table>'
headers = ['Observation','Resonances','Crossref','Problems'] #these are headings you'd use in your actual annotation. change as appropriate
html += '<tr><th>' + '</th><th>'.join(headers) + '</th></tr>'
html += '\n'.join(rows)
html += '</table>'
return html
html = """
table, td, th { border-collapse: collapse; border: 1px solid black }
td { padding: 6px }
th { padding: 6px; background-color: lightgrey}
## Reading report
for [%s](%s)
### Summary
### Tabular Representation
""" % ( canonical_url, canonical_url, make_abstract(h_annotations), make_table(h_annotations) )
savefileas = raw_input("save the report as?")
f = open(savefileas+'.md','w')
Copy link

switched things up so that it writes as .md

Copy link

also, it now asks you for the URL so you don't have to change that line up yourself. Think this works.

Copy link

judell commented May 5, 2016

Well done!

Copy link
Author also asks you for an output file name, which'll be saved as .md

Copy link

@judell thanks!

Copy link

...added the styling back into the table.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment