Skip to content

Instantly share code, notes, and snippets.

View seandavi's full-sized avatar

Sean Davis seandavi

View GitHub Profile
@seandavi
seandavi / neo4jtest.py
Created September 26, 2011 15:02
Testing the neo4j database from python to load the Gene Ontology database
#!/usr/bin/env python
# install neo4j-embedded first
from neo4j import GraphDatabase
import neo4j
import csv
import shutil
import logging
import argparse
parser = argparse.ArgumentParser()
# set up matrix
x = matrix(rnorm(165*165),ncol=165)
# calculate distance between all pairs and
# convert to matrix
df1 = as.matrix(dist(x))
# show dimensions
dim(df1)
# set the names of the rows and columns (convenience only)
colnames(df1) <- paste("sec",1:165,sep="_")
rownames(df1) <- paste("sec",1:165,sep="_")
import csv
import os
os.environ['NEO4J_PYTHON_JVMARGS'] = '-Xmx2g'
from neo4j import GraphDatabase,INCOMING,OUTGOING
db = GraphDatabase('testing2')
idx = None
try:
#!/bin/bash
htlatex $1 "xhtml,ooffice" "ooffice/! -cmozhtf" "-coo -cvalidate"
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library("optparse"))
# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
# help="Show this help message and exit")
option_list <- list(
make_option(c("-m", "--metrics"), default=".dupmetrics",
help="The file pattern to match (in dir) for finding hsmetrics files [default %default]",
metavar="metrics"),
@seandavi
seandavi / hsMetrics2pdf.R
Created March 5, 2012 18:01
From the command line, make a pdf file of hsMetrics from picard calculateHsMetrics output
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library("optparse"))
# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
# help="Show this help message and exit")
option_list <- list(
make_option(c("-m", "--hsmetrics"), default=".hsmetrics",
help="The file pattern to match (in dir) for finding hsmetrics files [default %default]",
metavar="hsmetric"),
@seandavi
seandavi / bamsplit.py
Created March 11, 2012 01:58
Split a BAM file by Read Group ID
#!/usr/bin/env python
# split a bam file by read group ID
# Sean Davis <seandavi@gmail.com>
# March 10, 2012
#
import pysam
import argparse
import logging
logging.basicConfig(level=logging.INFO)
@seandavi
seandavi / qstat.xsl
Created May 15, 2012 19:17
Sun Grid Engine qstat to html
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:param name="now"/>
<xsl:template match="/">
<html>
<head>
<title>SGE Qstat Results</title>
<link href="/css/main.css" rel="stylesheet" type="text/css" />
<meta http-equiv="refresh" content="60" />
@seandavi
seandavi / basic-exome-outline.sh
Created June 22, 2012 00:43 — forked from arq5x/basic-exome-outline.sh
Exome pipeline for Charles
##########################################
# Step 0. setup a list of sample names.
# Assume that each of your gzipped
# FASTQ files is named as follows:
# sample1.1.fq.gz
# sample1.2.fq.gz
# sample2.1.fq.gz
# sample2.2.fq.gz
# ...
# sampleN.1.fq.gz
@seandavi
seandavi / demultiplexer.py
Created June 29, 2012 03:59
Demultiplex fastq files from illumina based on supplied index
#!/usr/bin/env python
# quick script for demultiplexing reads from illumina fastq files
# Sean Davis <seandavi@gmail.com>
# 2012-06-29
#
import argparse
import Bio.SeqIO as SeqIO
from itertools import izip
import gzip
from string import maketrans