Skip to content

Instantly share code, notes, and snippets.

@bede
Created February 11, 2021 10:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bede/76dc0acb209ce8108bb7f16861de7582 to your computer and use it in GitHub Desktop.
Save bede/76dc0acb209ce8108bb7f16861de7582 to your computer and use it in GitHub Desktop.
Split Guppy sequencing summaries by barcode
def split_summary_by_barcode(summary_path, out_dir, run_name):
'''Given a sequencing summary file path, write per barcode summaries to an output directory'''
dtypes = {
'filename_fastq': 'object',
'filename_fast5': 'object',
'read_id': 'object',
'run_id': 'category',
'channel': 'int64',
'mux': 'int64',
'start_time': 'float64',
'duration': 'float64',
'num_events': 'int64',
'passes_filtering': 'bool',
'template_start': 'float64',
'num_events_template': 'int64',
'template_duration': 'float64',
'sequence_length_template': 'int64',
'mean_qscore_template': 'float64',
'strand_score_template': 'float64',
'median_template': 'float64',
'mad_template': 'float64',
'pore_type': 'category',
'experiment_id': 'category',
'sample_id': 'category',
'end_reason': 'category',
'alias': 'category',
'type': 'category',
'barcode_arrangement': 'category',
'barcode_full_arrangement': 'category',
'barcode_kit': 'category',
'barcode_variant': 'category',
'barcode_score': 'float64',
'barcode_front_id': 'category',
'barcode_front_score': 'float64',
'barcode_front_refseq': 'category',
'barcode_front_foundseq': 'category',
'barcode_front_foundseq_length': 'int64',
'barcode_front_begin_index': 'int64',
'barcode_rear_id': 'category',
'barcode_rear_score': 'float64',
'barcode_rear_refseq': 'object',
'barcode_rear_foundseq': 'object',
'barcode_rear_foundseq_length': 'int64',
'barcode_rear_end_index': 'int64',
'bc_front': 'category',
'bc_rear': 'category'}
os.makedirs(out_dir, exist_ok=True)
df = pd.read_csv(summary_path, sep='\t', dtype=dtypes)
gb = df.groupby('barcode_arrangement')
for barcode in gb.groups:
gb.get_group(barcode).to_csv(f'{out_dir}/{run_name}_{barcode}.txt', sep='\t', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment