Created
June 6, 2012 23:17
-
-
Save jonathansick/2885445 to your computer and use it in GitHub Desktop.
Download MILES spectra and associate metadata.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
Download MILES spectra and associate metadata. | |
2012-05-25 - Created by Jonathan Sick | |
""" | |
import os | |
from StringIO import StringIO | |
from bs4 import BeautifulSoup | |
import requests | |
import pyfits | |
def main(): | |
htmlPath = "catalog.html" | |
miles = MILESMaster(htmlPath, "spectra") | |
miles.parse_html() | |
miles.printout() | |
miles.download() | |
class MILESMaster(object): | |
"""Download and format MILES spectra.""" | |
def __init__(self, htmlPath, downloadDir): | |
super(MILESMaster, self).__init__() | |
self.htmlPath = htmlPath | |
self.downloadDir = downloadDir | |
if not os.path.exists(self.downloadDir): os.makedirs(self.downloadDir) | |
self.catalog = [] | |
def parse_html(self): | |
"""Parse the MILES html table.""" | |
soup = BeautifulSoup(open(self.htmlPath), "lxml") | |
tbl = soup.find_all("table", "catalog")[0] | |
tblBody = tbl('tbody')[0] | |
rows = tblBody("tr") | |
for row in rows: | |
self.catalog.append(self._parse_row(row)) | |
def _parse_row(self, row): | |
"""Parse each row of the MILES html table.""" | |
star = {} | |
cols = row('td') | |
link, starName, catNum = self._parse_link(cols[0]) | |
star['url'] = link | |
star['name'] = starName | |
star['catnum'] = catNum | |
names = ["RA", "Dec", "EBV", "SpT", "Teff", "logg", "FeH", "V", | |
"libs", "CaTID"] | |
for i, name in enumerate(names): | |
star[name] = cols[i + 1].string | |
return star | |
def _parse_link(self, c): | |
"""Parse the link column.""" | |
links = c('a') | |
name = links[0].string | |
url = c('a')[1].get('href') | |
catID = os.path.splitext(os.path.basename(url))[0].strip('s') | |
return url, name, catID | |
def printout(self): | |
for star in self.catalog: | |
print star['name'], star['url'] | |
def download(self): | |
"""Download the FITS spectrum file.""" | |
for star in self.catalog: | |
url = star['url'] | |
print 'downloading', url, | |
r = requests.get(url) | |
print r.status_code | |
fits = pyfits.open(StringIO(r.content)) | |
for key, val in star.iteritems(): | |
if key == 'url': continue | |
fits[0].header.update(str(key), str(val)) | |
fits.writeto(os.path.join(self.downloadDir, | |
star['name'] + ".fits"), clobber=True) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment