Created
October 28, 2013 20:30
-
-
Save cfriedline/7204046 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "#Diginorm renamer\n##Because it expects files to have names like *1 and *2. The input is an interleaved fastq file produced by velvet shuffle_seqs" | |
}, | |
{ | |
"metadata": {}, | |
"input": "from IPython.parallel import Client", | |
"cell_type": "code", | |
"prompt_number": 1, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "rc = Client()", | |
"cell_type": "code", | |
"prompt_number": 18, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "dview = rc[:]\nlview = rc.load_balanced_view()\nprint len(dview)", | |
"cell_type": "code", | |
"prompt_number": 22, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "40\n" | |
} | |
], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "infile = \"/home/cfriedline/data7/assemblies/gypsy/shuffled_processed.fastq\"\noutfile = infile + \"_12.fastq\"", | |
"cell_type": "code", | |
"prompt_number": 2, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "def run_diginorm_renamer(args):\n from Bio.SeqIO.QualityIO import FastqGeneralIterator\n infile, outfile = args\n with open(outfile, \"w\") as o:\n for i, (name, seq, qual) in enumerate(FastqGeneralIterator(open(infile))):\n add = 1\n if i % 2 != 0:\n add = 2\n o.write(\"@%s_%d\\n%s\\n+\\n%s\\n\" % (name.split()[0], add, seq, qual))\n if i > 0 and i % 100000 == 0:\n print \"at %d\" % i\ndview['run_diginorm_renamer'] = run_diginorm_renamer", | |
"cell_type": "code", | |
"prompt_number": 113, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "res = lview.apply_async(run_diginorm_renamer, (infile, outfile))", | |
"cell_type": "code", | |
"prompt_number": 114, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "res.r", | |
"cell_type": "code", | |
"prompt_number": 112, | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"input": "", | |
"cell_type": "code", | |
"outputs": [], | |
"language": "python", | |
"collapsed": false | |
} | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"name": "", | |
"gist_id": "7204046" | |
}, | |
"nbformat": 3 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment