Skip to content

Instantly share code, notes, and snippets.

@andybee
Created January 1, 2016 15:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andybee/e38308c7cd4dc64ac8e0 to your computer and use it in GitHub Desktop.
Save andybee/e38308c7cd4dc64ac8e0 to your computer and use it in GitHub Desktop.
Script to parse VM Superhub status page HTML in to CSV
#!/usr/bin/env python
from os import listdir
from os.path import isfile, join, splitext
import csv
from BeautifulSoup import BeautifulSoup
PATH = '.'
files = [f for f in listdir(PATH) if isfile(join(PATH, f)) and splitext(f)[1] == '.html']
with open('result.csv', 'w') as csvfile:
csv_writer = csv.writer(csvfile)
for filename in files:
f = open(filename, 'r')
soup = BeautifulSoup(f)
f.close()
row = []
for td in soup.tbody.findAll('tr')[-1].findAll('td')[1:]:
text = td.text
if text == 'N/A': text = '0'
row.append(int(text))
row.append(sum(row)/len(row))
datetime = '%s/%s/%s %s:00:00' % (filename[6:8], filename[4:6], filename[0:4], filename[9:11])
row = [datetime] + row
csv_writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment