Last active
August 29, 2015 13:59
-
-
Save lpsinger/10489886 to your computer and use it in GitHub Desktop.
Scrape AAS Machine Readable Table creator form
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Convert an Astropy table (an instance of astropy.table.Table) to a | |
Machine Readable Table (MRT) by scraping ApJ's machine readable table creator | |
web site (http://authortools.aas.org/MRT/upload.html). | |
See also: | |
https://aas.org/authors/online-only-materials-guidelines | |
http://vizier.u-strasbg.fr/doc/catstd.htx | |
""" | |
__author__ = "Leo Singer <leo.singer@ligo.org>" | |
# Standard library imports | |
import os | |
import re | |
import StringIO | |
import textwrap | |
# Module imports | |
from bs4 import BeautifulSoup | |
import requests | |
def convert_to_mrt(data, units, label, explain): | |
upload_url = 'http://authortools.aas.org/cgi-bin/convert.cgi' | |
payload = { | |
'data': data, | |
'units': units, | |
'label': label, | |
'explain': explain, | |
'formattype': 'tabs', | |
'button': 'Create a MR table'} | |
# Submit post request to AAS url | |
response = requests.post(upload_url, payload) | |
# Parse response | |
soup = BeautifulSoup(response.text) | |
text = soup.find('body').get_text() | |
# Raise exception and show response if response does not report that | |
# conversion succeeded | |
if 'Check and conversion complete!' not in text: | |
raise RuntimeError(text) | |
# Parse href of link to our document | |
href = soup.body.a.attrs['href'] | |
if not re.match(r'^http://authortools.aas.org/MRT/datafile\d+.txt$', href): | |
raise RuntimeError( | |
'Returned link ({0}) does not match expected format'.format(href)) | |
# Retrieve table | |
response = requests.get(href) | |
# Done! | |
return response.text | |
notes_present_placeholder_text = """Note (1): ***ADD LENGTHY NOTES HERE*** | |
Note (2): ***OR REMOVE THIS SECTION***""" | |
notes_absent_placeholder_text = notes_present_placeholder_text + """ | |
-------------------------------------------------------------------------------- | |
""" | |
def astropy_table_to_mrt(filename, table, notes=None): | |
# Write table into memory as tab-separated values | |
strio = StringIO.StringIO() | |
table.write(strio, format='ascii.no_header', delimiter="\t") | |
data = strio.getvalue() | |
# Post to AAS table preparation form and scrape output | |
units = "\r\n".join('---' if col.unit is None | |
else col.unit.to_string('cds') | |
for col in table.columns.values()) | |
label = "\r\n".join(table.colnames) | |
explain = "\r\n".join(col.description for col in table.columns.values()) | |
text = convert_to_mrt(data, units, label, explain) | |
text = text.replace('datafile.txt', os.path.basename(filename)) | |
if notes: | |
notes_text = "\n".join( | |
textwrap.TextWrapper( | |
width=80, | |
initial_indent='Note ({0:d}): '.format(i), | |
subsequent_indent=' ' * len('Note (0): ') | |
).fill(note) | |
for i, note in enumerate(notes)) | |
text = text.replace(notes_present_placeholder_text, notes_text) | |
else: | |
text = text.replace(notes_absent_placeholder_text, '') | |
with open(filename, 'w') as out: | |
out.write(text) | |
return text | |
if __name__ == '__main__': | |
import astropy.table | |
import astropy.units as u | |
print convert_to_mrt( | |
"\t".join(['108.3', '-33.2']), | |
"\r\n".join(['deg', 'deg']), | |
"\r\n".join(['RAdeg', 'DEdeg']), | |
"\r\n".join(['Right ascension (J2000)', 'Declination (J2000)'])) | |
table = astropy.table.Table() | |
table.add_column(astropy.table.Column( | |
name='RAdeg', unit=u.deg, description='Right ascension (J2000)')) | |
table.add_column(astropy.table.Column( | |
name='DEdeg', unit=u.deg, description='Declination (J2000)')) | |
table.add_row([123.4, -33.5]) | |
table.add_row([355.9, +89.6]) | |
table.add_row([10.3, +1.3]) | |
print astropy_table_to_mrt('test.dat', table, | |
notes=['This is a fake table.', 'It is machine readable.', 'Yay!']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment