Skip to content

Instantly share code, notes, and snippets.

@jvolkening
Created March 26, 2015 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvolkening/c34e1258dc8ac905dba5 to your computer and use it in GitHub Desktop.
Save jvolkening/c34e1258dc8ac905dba5 to your computer and use it in GitHub Desktop.
cutadapt "succinct" patch
diff -rupN '--exclude=.git' cutadapt/cutadapt/report.py cutadapt_patched/cutadapt/report.py
--- cutadapt/cutadapt/report.py 2015-03-26 13:00:26.163894349 -0500
+++ cutadapt_patched/cutadapt/report.py 2015-03-26 12:58:56.944023106 -0500
@@ -252,3 +252,55 @@ def print_statistics(adapters_pair, pair
print(' Please see the detailed output above.')
sys.stdout = old_stdout
+
+def print_succinct(adapters_pair, paired, stats,
+ modifiers, modifiers2, writers, file=None):
+ """Print single-line summary"""
+ old_stdout = sys.stdout
+ if file is not None:
+ sys.stdout = file
+ n = stats.n
+
+ too_short = 0
+ too_long = 0
+ written = 0
+ written_bp = 0
+ too_many_n = 0
+ for w in writers:
+ if isinstance(w, TooShortReadFilter):
+ too_short = w.too_short
+ elif isinstance(w, TooLongReadFilter):
+ too_long = w.too_long
+ elif isinstance(w, NContentTrimmer):
+ too_many_n = w.too_many_n
+ elif isinstance(w, (ProcessedReadWriter, Demultiplexer)):
+ written = w.written
+ written_bp = w.written_bp
+
+ with_adapters = [0, 0]
+ for i in (0, 1):
+ for adapter in adapters_pair[i]:
+ with_adapters[i] += sum(adapter.lengths_front.values())
+ with_adapters[i] += sum(adapter.lengths_back.values())
+ quality_trimmed_bp = [ int(qtrimmed(modifiers) or 0),
+ int(qtrimmed(modifiers2) or 0) ]
+ total_bp = sum(stats.total_bp)
+ sep = "\t"
+ fields = ["OK", n, total_bp, too_short, too_long, too_many_n, written,
+ with_adapters[0], quality_trimmed_bp[0], written_bp[0]]
+ if paired:
+ fields.extend( [with_adapters[1], quality_trimmed_bp[1], written_bp[1]] )
+ warnbase = None
+ for which_in_pair in (0, 1):
+ for index, adapter in enumerate(adapters_pair[which_in_pair]):
+ total = sum(adapter.adjacent_bases.values())
+ for base in ['A', 'C', 'G', 'T', '']:
+ b = base if base != '' else 'none/other'
+ fraction = 1.0 * adapter.adjacent_bases[base] / total
+ if fraction > 0.8 and base != '':
+ warnbase = b
+ if total >= 20 and warnbase is not None:
+ fields[0] = "WARN"
+
+ print( sep.join(map(str,fields)) )
+ sys.stdout = old_stdout
diff -rupN '--exclude=.git' cutadapt/cutadapt/scripts/cutadapt.py cutadapt_patched/cutadapt/scripts/cutadapt.py
--- cutadapt/cutadapt/scripts/cutadapt.py 2015-03-26 13:00:26.163894349 -0500
+++ cutadapt_patched/cutadapt/scripts/cutadapt.py 2015-03-26 12:16:52.509715125 -0500
@@ -79,7 +79,7 @@ from cutadapt.modifiers import (LengthTa
NEndTrimmer)
from cutadapt.writers import (TooShortReadFilter, TooLongReadFilter,
ProcessedReadWriter, Demultiplexer, NContentTrimmer)
-from cutadapt.report import Statistics, print_statistics
+from cutadapt.report import Statistics, print_statistics, print_succinct
from cutadapt.compat import next
logger = logging.getLogger(__name__)
@@ -425,6 +425,19 @@ def get_option_parser():
group = OptionGroup(parser, "Options that influence what gets output to where")
group.add_option("--quiet", default=False, action='store_true',
help="Do not print a report at the end.")
+ group.add_option("--succinct", default=False, action='store_true',
+ help="Print a single-line, tab-delimited summary. The fields are: "
+ "status, "
+ "reads/pairs processed, "
+ "total bp processed, "
+ "reads/pairs too short, "
+ "reads/pairs too long, "
+ "reads/pairs with too many N, "
+ "reads/pairs written, "
+ "AND for each individual read file: "
+ "reads w/ adapters, "
+ "bases quality-trimmed, "
+ "bases written.")
group.add_option("-o", "--output", metavar="FILE",
help="Write modified reads to FILE. FASTQ or FASTA format is chosen "
"depending on input. The summary report is sent to standard output. "
@@ -745,11 +758,12 @@ def main(cmdlineargs=None, default_outfi
if options.output:
logger.root.handlers = []
logging.basicConfig(level=logging.INFO, format='%(message)s', stream=sys.stdout)
- logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version())
- logger.info("Command line parameters: %s", " ".join(cmdlineargs))
- logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...",
- len(adapters) + len(adapters2), options.error_rate * 100,
- { False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired])
+ if not options.succinct:
+ logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version())
+ logger.info("Command line parameters: %s", " ".join(cmdlineargs))
+ logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...",
+ len(adapters) + len(adapters2), options.error_rate * 100,
+ { False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired])
try:
reader = seqio.open(input_filename, file2=input_paired_filename,
@@ -851,7 +865,11 @@ def main(cmdlineargs=None, default_outfi
f.close()
elapsed_time = time.clock() - start_time
- if not options.quiet:
+ if options.succinct:
+ stat_file = sys.stderr if options.output is None else None
+ print_succinct((adapters, adapters2), paired, stats,
+ modifiers, modifiers2, writers, file=stat_file)
+ elif not options.quiet:
# send statistics to stderr if result was sent to stdout
stat_file = sys.stderr if options.output is None else None
print_statistics((adapters, adapters2), paired, elapsed_time, stats,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment