Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This script extracts insert size mean and std, mass of mapped read information from Cufflinks log outputs.
#!/bin/env python3
import os
import re
import sys
if len(sys.argv)<2:
print('This script extracts insert size information from Cufflinks logs.',file=sys.stderr);
print('Usage: getinsertsize [cufflinks log file]',file=sys.stderr);
print('Note: you may specify different log files using filename wildcards.',file=sys.stderr);
sys.exit();
flist=sys.argv[1:];
print('File\tMapMass\tReadLength\tMean\tStd');
for fof in sorted(flist):
jobid=re.findall('e(\d+)',fof)[0];
ispassed=False;
nline=0;
mpms='';
rtype='';
dmean='';
dstd='';
for lines in open(fof):
if lines.startswith('> '):
r1=re.findall('Total Map Mass: ([\d\.]+)',lines);
r2=re.findall('Read Type: ([\d]+)bp',lines);
r3=re.findall('Estimated Mean: ([\d\.]+)',lines);
r4=re.findall('Estimated Std Dev: ([\d\.]+)',lines);
if len(r1)>0: mpms=r1[0];
if len(r2)>0: rtype=r2[0];
if len(r3)>0: dmean=r3[0];
if len(r4)>0: dstd=r4[0];
ispassed=True;
else:
if ispassed: break;
print(fof+'\t'+str(mpms)+'\t'+str(rtype)+'\t'+str(dmean)+'\t'+str(dstd));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment