judell/mmr.py

## mmr.py
import requests, re, json, types, traceback

class HypothesisAnnotation:

    def __init__(self, row):
        """Encapsulate relevant parts of one row of a Hypothesis API search."""

        self.tags = []
        if row.has_key('tags') and row['tags'] is not None:
            self.tags = row['tags']
            if isinstance(self.tags, types.ListType):
                self.tags = [t.strip() for t in self.tags]

        self.text = ''
        if row.has_key('text'):
            self.text = row['text']

        self.target = []
        if row.has_key('target'):
            self.target = row['target']

        self.start = self.end = self.prefix = self.exact = self.suffix = None
        try:
            if isinstance(self.target,list) and len(self.target) and self.target[0].has_key('selector'):
                selectors = self.target[0]['selector']
                for selector in selectors:
                    if selector.has_key('type') and selector['type'] == 'TextQuoteSelector':
                        try:
                            self.prefix = selector['prefix']
                            self.exact = selector['exact']
                            self.suffix = selector['suffix']
                        except:
                            traceback.print_exc()
                    if selector.has_key('type') and selector['type'] == 'TextPositionSelector' and selector.has_key('start'):
                        self.start = selector['start']
                        self.end = selector['end']
                    if selector.has_key('type') and selector['type'] == 'FragmentSelector' and selector.has_key('value'):
                        self.fragment_selector = selector['value']
        except:
            print traceback.format_exc()

canonical_url = 'http://www.ncbi.nlm.nih.gov/pubmed/26142024'      # original annotated version
alternate_url = 'http://jonudell.net/h/david_kennedy_example.html' # annotated with suggested tags and conventions
query_url = 'https://hypothes.is/api/search?uri=%s' % alternate_url
text = requests.get(query_url).text.decode('utf-8')
rows = json.loads(text)['rows']
h_annotations = [HypothesisAnnotation(row) for row in rows]

def filter_tags_by_prefix(tags, tag_prefix):
    return [tag for tag in tags if tag.lower().startswith(tag_prefix.lower())]

def has_tag_starting_with(h_annotation, tag_prefix):
    filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
    return len(filtered) > 0

def get_tag_starting_with(h_annotation, tag_prefix):
    filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
    if len(filtered) > 0:
        return filtered[0]
    else:
        return None

def select_annotations_with_tag_prefix(rows, tag_prefix):
    return [h_annotation for h_annotation in h_annotations if has_tag_starting_with(h_annotation, tag_prefix)]

def make_interpretation_element(h_annotations, tag_prefix):
    list = []
    h_annotations = select_annotations_with_tag_prefix(rows, tag_prefix)
    for h_annotation in h_annotations:
        tag = get_tag_starting_with(h_annotation, tag_prefix)
        subtag = tag.split(':')[1]
        list.append(subtag + ': ' + h_annotation.exact)
    return list

def make_abstract(h_annotations):
    html = """
<p><b>Subject Groups:</b> %s</p>
<p><b>Acquisition Methods:</b> %s</p>
<p><b>Analysis Methods:</b> %s</p>
<p><b>Findings:</b> %s</p>""" % (
        select_annotations_with_tag_prefix(h_annotations, 'Subject')[0].exact,
        ', '.join(make_interpretation_element(h_annotations, 'AcquisitionMethod')),
        ', '.join(make_interpretation_element(h_annotations, 'AnalysisMethod')),
        '<ul><li>' + '</li><li>'.join(make_interpretation_element(h_annotations, 'Finding')) + '</li></ul>' )
    return html

def subfindings_from_h_annotation(h_annotation):
    body = re.sub('\n*<.+>\n*', '', h_annotation.text)
    rows = []
    chunks = body.split('\n\n')
    for chunk in chunks:
        fields = chunk.split('\n')
        fields = [re.sub('\w+:\s+','',field) for field in fields]
        rows.append('<tr><td>' + '</td><td>'.join(fields) + '</td></tr>')
    return rows

def make_table(h_annotations):
    rows = []
    raw_findings = select_annotations_with_tag_prefix(h_annotations, 'Finding')
    for raw_finding in raw_findings:
        rows += subfindings_from_h_annotation(raw_finding)
    html = '<table>'
    headers = ['Observation','AnalysisMethod','Location','SubjectGroups']
    html += '<tr><th>' + '</th><th>'.join(headers) + '</th></tr>'
    html += '\n'.join(rows)
    html += '</table>'
    return html

html = """<html>
<head>
<title>Monthly Morphology Report for %s</title>
<meta charset="utf-8" />
<style>
body { font-family: verdana; margin: .5in; }
table, td, th { border-collapse: collapse; border: 1px solid black }
td { padding: 6px }
th { padding: 6px; background-color: lightgrey}
</style>
</head>
<body>
<h1>Monthly Morphology Report for %s</h1>
<h2>Abstract Interpretation</h2>
%s
<h2>Tabular Representation</h2>
%s
</body>
</html> """ % ( canonical_url, canonical_url, make_abstract(h_annotations), make_table(h_annotations) )

f = open('mmr.html','w')
f.write(html.encode('utf-8'))
f.close()
	import requests, re, json, types, traceback

	class HypothesisAnnotation:

	def __init__(self, row):
	"""Encapsulate relevant parts of one row of a Hypothesis API search."""

	self.tags = []
	if row.has_key('tags') and row['tags'] is not None:
	self.tags = row['tags']
	if isinstance(self.tags, types.ListType):
	self.tags = [t.strip() for t in self.tags]

	self.text = ''
	if row.has_key('text'):
	self.text = row['text']

	self.target = []
	if row.has_key('target'):
	self.target = row['target']

	self.start = self.end = self.prefix = self.exact = self.suffix = None
	try:
	if isinstance(self.target,list) and len(self.target) and self.target[0].has_key('selector'):
	selectors = self.target[0]['selector']
	for selector in selectors:
	if selector.has_key('type') and selector['type'] == 'TextQuoteSelector':
	try:
	self.prefix = selector['prefix']
	self.exact = selector['exact']
	self.suffix = selector['suffix']
	except:
	traceback.print_exc()
	if selector.has_key('type') and selector['type'] == 'TextPositionSelector' and selector.has_key('start'):
	self.start = selector['start']
	self.end = selector['end']
	if selector.has_key('type') and selector['type'] == 'FragmentSelector' and selector.has_key('value'):
	self.fragment_selector = selector['value']
	except:
	print traceback.format_exc()

	canonical_url = 'http://www.ncbi.nlm.nih.gov/pubmed/26142024' # original annotated version
	alternate_url = 'http://jonudell.net/h/david_kennedy_example.html' # annotated with suggested tags and conventions
	query_url = 'https://hypothes.is/api/search?uri=%s' % alternate_url
	text = requests.get(query_url).text.decode('utf-8')
	rows = json.loads(text)['rows']
	h_annotations = [HypothesisAnnotation(row) for row in rows]

	def filter_tags_by_prefix(tags, tag_prefix):
	return [tag for tag in tags if tag.lower().startswith(tag_prefix.lower())]

	def has_tag_starting_with(h_annotation, tag_prefix):
	filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
	return len(filtered) > 0

	def get_tag_starting_with(h_annotation, tag_prefix):
	filtered = filter_tags_by_prefix(h_annotation.tags, tag_prefix)
	if len(filtered) > 0:
	return filtered[0]
	else:
	return None

	def select_annotations_with_tag_prefix(rows, tag_prefix):
	return [h_annotation for h_annotation in h_annotations if has_tag_starting_with(h_annotation, tag_prefix)]

	def make_interpretation_element(h_annotations, tag_prefix):
	list = []
	h_annotations = select_annotations_with_tag_prefix(rows, tag_prefix)
	for h_annotation in h_annotations:
	tag = get_tag_starting_with(h_annotation, tag_prefix)
	subtag = tag.split(':')[1]
	list.append(subtag + ': ' + h_annotation.exact)
	return list

	def make_abstract(h_annotations):
	html = """
	<p><b>Subject Groups:</b> %s</p>
	<p><b>Acquisition Methods:</b> %s</p>
	<p><b>Analysis Methods:</b> %s</p>
	<p><b>Findings:</b> %s</p>""" % (
	select_annotations_with_tag_prefix(h_annotations, 'Subject')[0].exact,
	', '.join(make_interpretation_element(h_annotations, 'AcquisitionMethod')),
	', '.join(make_interpretation_element(h_annotations, 'AnalysisMethod')),
	'<ul><li>' + '</li><li>'.join(make_interpretation_element(h_annotations, 'Finding')) + '</li></ul>' )
	return html

	def subfindings_from_h_annotation(h_annotation):
	body = re.sub('\n<.+>\n', '', h_annotation.text)
	rows = []
	chunks = body.split('\n\n')
	for chunk in chunks:
	fields = chunk.split('\n')
	fields = [re.sub('\w+:\s+','',field) for field in fields]
	rows.append('<tr><td>' + '</td><td>'.join(fields) + '</td></tr>')
	return rows

	def make_table(h_annotations):
	rows = []
	raw_findings = select_annotations_with_tag_prefix(h_annotations, 'Finding')
	for raw_finding in raw_findings:
	rows += subfindings_from_h_annotation(raw_finding)
	html = '<table>'
	headers = ['Observation','AnalysisMethod','Location','SubjectGroups']
	html += '<tr><th>' + '</th><th>'.join(headers) + '</th></tr>'
	html += '\n'.join(rows)
	html += '</table>'
	return html

	html = """<html>
	<head>
	<title>Monthly Morphology Report for %s</title>
	<meta charset="utf-8" />
	<style>
	body { font-family: verdana; margin: .5in; }
	table, td, th { border-collapse: collapse; border: 1px solid black }
	td { padding: 6px }
	th { padding: 6px; background-color: lightgrey}
	</style>
	</head>
	<body>
	<h1>Monthly Morphology Report for %s</h1>
	<h2>Abstract Interpretation</h2>
	%s
	<h2>Tabular Representation</h2>
	%s
	</body>
	</html> """ % ( canonical_url, canonical_url, make_abstract(h_annotations), make_table(h_annotations) )

	f = open('mmr.html','w')
	f.write(html.encode('utf-8'))
	f.close()