Created
March 26, 2015 18:06
-
-
Save jvolkening/c34e1258dc8ac905dba5 to your computer and use it in GitHub Desktop.
cutadapt "succinct" patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -rupN '--exclude=.git' cutadapt/cutadapt/report.py cutadapt_patched/cutadapt/report.py | |
--- cutadapt/cutadapt/report.py 2015-03-26 13:00:26.163894349 -0500 | |
+++ cutadapt_patched/cutadapt/report.py 2015-03-26 12:58:56.944023106 -0500 | |
@@ -252,3 +252,55 @@ def print_statistics(adapters_pair, pair | |
print(' Please see the detailed output above.') | |
sys.stdout = old_stdout | |
+ | |
+def print_succinct(adapters_pair, paired, stats, | |
+ modifiers, modifiers2, writers, file=None): | |
+ """Print single-line summary""" | |
+ old_stdout = sys.stdout | |
+ if file is not None: | |
+ sys.stdout = file | |
+ n = stats.n | |
+ | |
+ too_short = 0 | |
+ too_long = 0 | |
+ written = 0 | |
+ written_bp = 0 | |
+ too_many_n = 0 | |
+ for w in writers: | |
+ if isinstance(w, TooShortReadFilter): | |
+ too_short = w.too_short | |
+ elif isinstance(w, TooLongReadFilter): | |
+ too_long = w.too_long | |
+ elif isinstance(w, NContentTrimmer): | |
+ too_many_n = w.too_many_n | |
+ elif isinstance(w, (ProcessedReadWriter, Demultiplexer)): | |
+ written = w.written | |
+ written_bp = w.written_bp | |
+ | |
+ with_adapters = [0, 0] | |
+ for i in (0, 1): | |
+ for adapter in adapters_pair[i]: | |
+ with_adapters[i] += sum(adapter.lengths_front.values()) | |
+ with_adapters[i] += sum(adapter.lengths_back.values()) | |
+ quality_trimmed_bp = [ int(qtrimmed(modifiers) or 0), | |
+ int(qtrimmed(modifiers2) or 0) ] | |
+ total_bp = sum(stats.total_bp) | |
+ sep = "\t" | |
+ fields = ["OK", n, total_bp, too_short, too_long, too_many_n, written, | |
+ with_adapters[0], quality_trimmed_bp[0], written_bp[0]] | |
+ if paired: | |
+ fields.extend( [with_adapters[1], quality_trimmed_bp[1], written_bp[1]] ) | |
+ warnbase = None | |
+ for which_in_pair in (0, 1): | |
+ for index, adapter in enumerate(adapters_pair[which_in_pair]): | |
+ total = sum(adapter.adjacent_bases.values()) | |
+ for base in ['A', 'C', 'G', 'T', '']: | |
+ b = base if base != '' else 'none/other' | |
+ fraction = 1.0 * adapter.adjacent_bases[base] / total | |
+ if fraction > 0.8 and base != '': | |
+ warnbase = b | |
+ if total >= 20 and warnbase is not None: | |
+ fields[0] = "WARN" | |
+ | |
+ print( sep.join(map(str,fields)) ) | |
+ sys.stdout = old_stdout | |
diff -rupN '--exclude=.git' cutadapt/cutadapt/scripts/cutadapt.py cutadapt_patched/cutadapt/scripts/cutadapt.py | |
--- cutadapt/cutadapt/scripts/cutadapt.py 2015-03-26 13:00:26.163894349 -0500 | |
+++ cutadapt_patched/cutadapt/scripts/cutadapt.py 2015-03-26 12:16:52.509715125 -0500 | |
@@ -79,7 +79,7 @@ from cutadapt.modifiers import (LengthTa | |
NEndTrimmer) | |
from cutadapt.writers import (TooShortReadFilter, TooLongReadFilter, | |
ProcessedReadWriter, Demultiplexer, NContentTrimmer) | |
-from cutadapt.report import Statistics, print_statistics | |
+from cutadapt.report import Statistics, print_statistics, print_succinct | |
from cutadapt.compat import next | |
logger = logging.getLogger(__name__) | |
@@ -425,6 +425,19 @@ def get_option_parser(): | |
group = OptionGroup(parser, "Options that influence what gets output to where") | |
group.add_option("--quiet", default=False, action='store_true', | |
help="Do not print a report at the end.") | |
+ group.add_option("--succinct", default=False, action='store_true', | |
+ help="Print a single-line, tab-delimited summary. The fields are: " | |
+ "status, " | |
+ "reads/pairs processed, " | |
+ "total bp processed, " | |
+ "reads/pairs too short, " | |
+ "reads/pairs too long, " | |
+ "reads/pairs with too many N, " | |
+ "reads/pairs written, " | |
+ "AND for each individual read file: " | |
+ "reads w/ adapters, " | |
+ "bases quality-trimmed, " | |
+ "bases written.") | |
group.add_option("-o", "--output", metavar="FILE", | |
help="Write modified reads to FILE. FASTQ or FASTA format is chosen " | |
"depending on input. The summary report is sent to standard output. " | |
@@ -745,11 +758,12 @@ def main(cmdlineargs=None, default_outfi | |
if options.output: | |
logger.root.handlers = [] | |
logging.basicConfig(level=logging.INFO, format='%(message)s', stream=sys.stdout) | |
- logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version()) | |
- logger.info("Command line parameters: %s", " ".join(cmdlineargs)) | |
- logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...", | |
- len(adapters) + len(adapters2), options.error_rate * 100, | |
- { False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired]) | |
+ if not options.succinct: | |
+ logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version()) | |
+ logger.info("Command line parameters: %s", " ".join(cmdlineargs)) | |
+ logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...", | |
+ len(adapters) + len(adapters2), options.error_rate * 100, | |
+ { False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired]) | |
try: | |
reader = seqio.open(input_filename, file2=input_paired_filename, | |
@@ -851,7 +865,11 @@ def main(cmdlineargs=None, default_outfi | |
f.close() | |
elapsed_time = time.clock() - start_time | |
- if not options.quiet: | |
+ if options.succinct: | |
+ stat_file = sys.stderr if options.output is None else None | |
+ print_succinct((adapters, adapters2), paired, stats, | |
+ modifiers, modifiers2, writers, file=stat_file) | |
+ elif not options.quiet: | |
# send statistics to stderr if result was sent to stdout | |
stat_file = sys.stderr if options.output is None else None | |
print_statistics((adapters, adapters2), paired, elapsed_time, stats, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment