Skip to content

Instantly share code, notes, and snippets.

@ctb
Last active April 6, 2017 08:06
Show Gist options
  • Save ctb/beb9a7eebf1a1282562ff0272efb4dd6 to your computer and use it in GitHub Desktop.
Save ctb/beb9a7eebf1a1282562ff0272efb4dd6 to your computer and use it in GitHub Desktop.
benchmarking RAM allocation against file load time
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#! /usr/bin/env python
from __future__ import print_function, unicode_literals
import json
import os
import sys
import threading
import textwrap
import khmer
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, report_on_config, info,
add_threading_args, calculate_graphsize,
sanitize_help)
from khmer.kfile import check_file_writable
from khmer.kfile import check_input_files
from khmer.kfile import check_space_for_graph
from khmer.khmer_logger import (configure_logging, log_info, log_error,
log_warn)
import time
def get_parser():
parser = build_counting_args("Build a k-mer countgraph from the given"
" sequences.",
citations=['counting', 'SeqAn'])
add_threading_args(parser)
parser.add_argument('input_sequence_filename', nargs='+',
help="The names of one or more FAST[AQ] input "
"sequence files.")
parser.add_argument('-b', '--no-bigcount', dest='bigcount', default=True,
action='store_false', help="The default behaviour is "
"to count past 255 using bigcount. This flag turns "
"bigcount off, limiting counts to 255.")
parser.add_argument('--summary-info', '-s', type=str, default=None,
metavar="FORMAT", choices=[str('json'), str('tsv')],
help="What format should the machine readable run "
"summary be in? (`json` or `tsv`, disabled by"
" default)")
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
parser.add_argument('-q', '--quiet', dest='quiet', default=False,
action='store_true')
return parser
def main():
args = sanitize_help(get_parser()).parse_args()
configure_logging(args.quiet)
report_on_config(args)
filenames = args.input_sequence_filename
log_info('making countgraph')
start = time.time()
countgraph = khmer_args.create_countgraph(args)
countgraph.set_use_bigcount(False)
filename = None
total_num_reads = 0
for index, filename in enumerate(filenames):
log_info('consuming input {input}', input=filename)
rparser = khmer.ReadParser(filename)
countgraph.consume_seqfile_with_reads_parser(rparser)
total_num_reads += rparser.num_reads
log_info('DONE.')
end = time.time()
print(sum(countgraph.hashsizes()), end - start)
if __name__ == '__main__':
main()
# vim: set filetype=python tabstop=4 softtabstop=4 shiftwidth=4 expandtab:
# vim: set textwidth=79:
4999802 0.6113121509552002
9999936 0.6644558906555176
49999814 0.7513549327850342
99999894 0.9065320491790771
499999864 1.3282811641693115
999999738 1.7348880767822266
1999999730 2.948660135269165
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment