Eric Minikel ericminikel

## findbadbams.bash
cd /my/working/dir/
du -sb *.bam > bam.filesize  # two-column list of BAMs and their size in bytes
cat fastq.12col | awk '{print "du -sb "$1}' | bash > fastq.filesize  # two-column list of FASTQs and their size in bytes
# switch to R
R
fastq = read.table('fastq.filesize',header=FALSE)
bam = read.table('bam.filesize',header=FALSE)
bam$name = substr(bam$V2,1,21) # parse a unique ID for the BAMs from the path
fastq$name = substr(fastq$V2,71,91) # same for FASTQs
bam$bamsize=bam$V1

## aggregate-fastqc.sql

drop table if exists ea1.fastqc_summary;
create table ea1.fastqc_summary (
fileid varchar,
module varchar,
status varchar
);
drop table if exists ea1.fastqc_details;
create table ea1.fastqc_details (
id serial primary key,

## aggregate-fastqc.py
import os
import sys

working_dir = '/data/HD/analysis/038ea/analysis/1/fastqc/'

fastqc_summary = open(working_dir+'fastqc_summary.txt',mode='wb')
fastqc_details = open(working_dir+'fastqc_details.txt',mode='wb')

for root, dirs, files in os.walk(working_dir): # walk a directory containing FastQC output for multiple samples
    for name in files:
	cd /my/working/dir/
	du -sb *.bam > bam.filesize # two-column list of BAMs and their size in bytes
	cat fastq.12col \| awk '{print "du -sb "$1}' \| bash > fastq.filesize # two-column list of FASTQs and their size in bytes
	# switch to R
	R
	fastq = read.table('fastq.filesize',header=FALSE)
	bam = read.table('bam.filesize',header=FALSE)
	bam$name = substr(bam$V2,1,21) # parse a unique ID for the BAMs from the path
	fastq$name = substr(fastq$V2,71,91) # same for FASTQs
	bam$bamsize=bam$V1

	drop table if exists ea1.fastqc_summary;
	create table ea1.fastqc_summary (
	fileid varchar,
	module varchar,
	status varchar
	);
	drop table if exists ea1.fastqc_details;
	create table ea1.fastqc_details (
	id serial primary key,
	import os
	import sys

	working_dir = '/data/HD/analysis/038ea/analysis/1/fastqc/'

	fastqc_summary = open(working_dir+'fastqc_summary.txt',mode='wb')
	fastqc_details = open(working_dir+'fastqc_details.txt',mode='wb')

	for root, dirs, files in os.walk(working_dir): # walk a directory containing FastQC output for multiple samples
	for name in files: