Created
November 16, 2011 14:32
-
-
Save vals/1370183 to your computer and use it in GitHub Desktop.
Randomly pick out some of the line triples in a fastq file and write to a new fastq file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Randomly picks out some title / sequence / quality triples | |
from a given fastq file and writes them in to a new fastq file. | |
This is to generate rudimentary test data which doesn't take too long to run. | |
Usage: make_random_test_fastq.py <fastq_file> <target file size in MB> | |
""" | |
import os | |
import sys | |
from random import random | |
from optparse import OptionParser | |
from Bio.SeqIO.QualityIO import FastqGeneralIterator | |
def main(fastq, target_size): | |
# File sizes are in terms of MB. | |
big_file_size = float(os.path.getsize(fastq)) / 1048576. | |
probability = target_size / big_file_size | |
in_handle = open(fastq, "r") | |
out_handle = open(fastq.split("_fastq.")[0] + \ | |
"_random_sample_fastq.txt", "w+") | |
for title, sequence, quality in FastqGeneralIterator(in_handle): | |
if random() < probability: | |
out_handle.write("@%s\n%s\n+\n%s\n" % (title, sequence, quality)) | |
in_handle.close() | |
out_handle.close() | |
if __name__ == '__main__': | |
parser = OptionParser() | |
_, args = parser.parse_args() | |
if len(args) == 2: | |
fastq, target_size = args | |
else: | |
print(__doc__) | |
sys.exit() | |
main(fastq, float(target_size)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment