Skip to content

Instantly share code, notes, and snippets.

@jonathansick
Created June 6, 2012 23:17
Show Gist options
  • Save jonathansick/2885445 to your computer and use it in GitHub Desktop.
Save jonathansick/2885445 to your computer and use it in GitHub Desktop.
Download MILES spectra and associate metadata.
#!/usr/bin/env python
# encoding: utf-8
"""
Download MILES spectra and associate metadata.
2012-05-25 - Created by Jonathan Sick
"""
import os
from StringIO import StringIO
from bs4 import BeautifulSoup
import requests
import pyfits
def main():
htmlPath = "catalog.html"
miles = MILESMaster(htmlPath, "spectra")
miles.parse_html()
miles.printout()
miles.download()
class MILESMaster(object):
"""Download and format MILES spectra."""
def __init__(self, htmlPath, downloadDir):
super(MILESMaster, self).__init__()
self.htmlPath = htmlPath
self.downloadDir = downloadDir
if not os.path.exists(self.downloadDir): os.makedirs(self.downloadDir)
self.catalog = []
def parse_html(self):
"""Parse the MILES html table."""
soup = BeautifulSoup(open(self.htmlPath), "lxml")
tbl = soup.find_all("table", "catalog")[0]
tblBody = tbl('tbody')[0]
rows = tblBody("tr")
for row in rows:
self.catalog.append(self._parse_row(row))
def _parse_row(self, row):
"""Parse each row of the MILES html table."""
star = {}
cols = row('td')
link, starName, catNum = self._parse_link(cols[0])
star['url'] = link
star['name'] = starName
star['catnum'] = catNum
names = ["RA", "Dec", "EBV", "SpT", "Teff", "logg", "FeH", "V",
"libs", "CaTID"]
for i, name in enumerate(names):
star[name] = cols[i + 1].string
return star
def _parse_link(self, c):
"""Parse the link column."""
links = c('a')
name = links[0].string
url = c('a')[1].get('href')
catID = os.path.splitext(os.path.basename(url))[0].strip('s')
return url, name, catID
def printout(self):
for star in self.catalog:
print star['name'], star['url']
def download(self):
"""Download the FITS spectrum file."""
for star in self.catalog:
url = star['url']
print 'downloading', url,
r = requests.get(url)
print r.status_code
fits = pyfits.open(StringIO(r.content))
for key, val in star.iteritems():
if key == 'url': continue
fits[0].header.update(str(key), str(val))
fits.writeto(os.path.join(self.downloadDir,
star['name'] + ".fits"), clobber=True)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment