walterst/count_zipped_fastq_reads.py

## count_zipped_fastq_reads.py
#!/usr/bin/env python

# Used to count fastq seqs in gzipped files, write counts and file name to log file
# Usage:  python count_zipped_fastq_reads.py fastq_folder log_file
# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

from sys import argv
from glob import glob

from cogent.parse.fastq import MinimalFastqParser
from qiime.util import gzip_open

header_index = 0
sequence_index = 1
quality_index = 2


fastq_files = glob(argv[1] + "/*.gz")

output_log = open(argv[2], "w")


for curr_file in fastq_files:

    if curr_file.endswith('.gz'):
        query_reads = gzip_open(curr_file)
    else:
        query_reads = open(curr_file, "U")

    curr_counts = 0
    for read_data in MinimalFastqParser(query_reads, strict=False):
        curr_counts += 1

    output_log.write("%s\t%d\n" % (curr_file, curr_counts))
    query_reads.close()
	#!/usr/bin/env python

	# Used to count fastq seqs in gzipped files, write counts and file name to log file
	# Usage: python count_zipped_fastq_reads.py fastq_folder log_file
	# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

	from sys import argv
	from glob import glob

	from cogent.parse.fastq import MinimalFastqParser
	from qiime.util import gzip_open

	header_index = 0
	sequence_index = 1
	quality_index = 2


	fastq_files = glob(argv[1] + "/*.gz")

	output_log = open(argv[2], "w")



	for curr_file in fastq_files:

	if curr_file.endswith('.gz'):
	query_reads = gzip_open(curr_file)
	else:
	query_reads = open(curr_file, "U")

	curr_counts = 0
	for read_data in MinimalFastqParser(query_reads, strict=False):
	curr_counts += 1

	output_log.write("%s\t%d\n" % (curr_file, curr_counts))
	query_reads.close()