Skip to content

Instantly share code, notes, and snippets.

@dangunter
Last active December 15, 2015 03:28
Show Gist options
  • Save dangunter/5194068 to your computer and use it in GitHub Desktop.
Save dangunter/5194068 to your computer and use it in GitHub Desktop.
Generate run history from materials project database
## Generate graph of run history
Generating the graph requires 2 scripts
1) A Python script to extract the dates of each compound from the
database. Replace DB, USER and PASSWORD below with appropriate values:
./mg_run_history.py DB USER PASSWORD --port 27018 > runs.csv
2) An R program to draw the graph:
./mg_run_history.r runs.csv runs.pdf
The resulting output is in runs.pdf.
It will be an area chart of the runs, with a loess smooth line added.
#!/usr/bin/env python
"""
Fetch and write run history
"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '3/18/13'
import argparse
from datetime import datetime
import sys
import pymongo
import time
def write_dates(coll, ofile):
ofile.write('completed_at\n')
for rec in coll.find({},{'completed_at':1}):
if not 'completed_at' in rec:
continue
s = rec['completed_at']
if '.' in s:
s = s.split('.')[0]
date = datetime.strptime(s, '%Y-%m-%d %H:%M:%S')
sec = int(time.mktime(date.utctimetuple()))
ofile.write('{:d}\n'.format(sec))
def main(cmdline=sys.argv[1:]):
retcode = 0
parser = argparse.ArgumentParser()
parser.add_argument('--host', dest='host', default='localhost')
parser.add_argument('--port', dest='port', type=int, default=27017)
parser.add_argument('database')
parser.add_argument('user')
parser.add_argument('password')
args = parser.parse_args(cmdline)
try:
client = pymongo.MongoClient(host=args.host, port=args.port)
db = client[args.database]
db.authenticate(args.user, args.password)
except Exception, err:
parser.error('Error connecting: {}'.format(err))
retcode = -1
if retcode == 0:
write_dates(db.tasks_dbv2, sys.stdout)
return retcode
if __name__ == '__main__':
sys.exit(main())
#! /usr/bin/env Rscript
args <- commandArgs(TRUE)
if (length(args) != 2) {
cat("usage: mg_run_history.r DATAFILE PLOTFILE\n")
q(status=1)
}
require(ggplot2)
infile <- args[1]
outfile <- args[2]
cat(sprintf("reading data from file %s\n", infile))
d <- read.csv(infile)
d$date <- as.POSIXct(d$completed_at, origin='1970-01-01')
d.o <- d[order(d$completed_at),]
d.o$n <- 1
d.o$daysec <- floor(d.o$completed_at / (60*60*24)) * (60*60*24)
d.o.day <- aggregate(d.o$n, by=list(d.o$daysec), sum)
d.o.day$tot <- cumsum(d.o.day$x)
d.o.day$date <- as.POSIXct(d.o.day$Group.1, origin='1970-01-01')
pdf(file=outfile)
xlims <- with(d.o.day, c(min(date), max(date)))
p <- ggplot(d.o.day, aes(x=date, y=tot)) +
geom_area(fill='grey') +
geom_smooth(stat="smooth", span=0.5) +
ylab("Number of compounds calculated") +
xlab("Date") +
scale_x_datetime(limits=xlims, expand=c(0,0)) +
scale_y_continuous(limits=c(0,60000), expand=c(0,0))
print(p)
dev.off()
q(status=0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment