Skip to content

Instantly share code, notes, and snippets.

@tomkinsc
Created July 13, 2022 21:54
Show Gist options
  • Save tomkinsc/5a62008c04f086b233ee7a473c25aa31 to your computer and use it in GitHub Desktop.
Save tomkinsc/5a62008c04f086b233ee7a473c25aa31 to your computer and use it in GitHub Desktop.
quick regex-based script for returning the model of Illumina sequencer for a given flowcell ID
#!/usr/bin/env python
import sys, re
flowcell_to_machine_model_and_chemistry = {
r'[A-Z,0-9]{5}AAXX':{
"machine": "Illumina Genome Analyzer IIx",
"chemistry": "All",
"lane_count": 8,
"note": ""
},
r'[A-Z,0-9]{5}ABXX':{
"machine": "Illumina HiSeq 2000",
"chemistry": "V2 Chemistry",
"lane_count": 8,
"note": ""
},
r'[A-Z,0-9]{5}ACXX':{
"machine": "Illumina HiSeq 2000",
"chemistry": "V3 Chemistry",
"lane_count": 8,
"note": "Also used on transient 2000E"
},
r'[A-Z,0-9]{5}(?:ANXX|AN\w\w)':{
"machine": "Illumina HiSeq 2500",
"chemistry": "V4 Chemistry",
"lane_count": 8,
"note": "High output"
},
r'[A-Z,0-9]{5}(?:ADXX|AD\w\w)':{
"machine": "Illumina HiSeq 2500",
"chemistry": "V1 Chemistry",
"lane_count": 2,
"note": "Rapid run"
},
r'[A-Z,0-9]{5}AMXX':{
"machine": "Illumina HiSeq 2500",
"chemistry": "V2 Chemistry (beta)",
"lane_count": 2,
"note": "Rapid run"
},
r'[A-Z,0-9]{5}(?:BCXX|BC\w\w)':{
"machine": "Illumina HiSeq 2500",
"chemistry": "V2 Chemistry",
"lane_count": 2,
"note": "Rapid run"
},
# NextSeq 550 is a NextSeq 500 that can also read arrays.
# Since we cannot tell them apart based on tile count, we call it the 550
r'[A-Z,0-9]{5}AFX\w':{
"machine": "NextSeq 550",
"chemistry": "Mid-Output NextSeq",
"lane_count": 4,
"note": ""
},
# NextSeq 550 is a NextSeq 500 that can also read arrays.
# Since we cannot tell them apart based on tile count, we call it the 550
r'[A-Z,0-9]{5}AGXX':{
"machine": "NextSeq 550",
"chemistry": "V1 Chemistry",
"lane_count": 4,
"note": "High-output"
},
# NextSeq 550 is a NextSeq 500 that can also read arrays.
# Since we cannot tell them apart based on tile count, we call it the 550
r'[A-Z,0-9]{5}(?:BGXX|BG\w\w)':{
"machine": "NextSeq 550",
"chemistry": "V2/V2.5 Chemistry",
"lane_count": 4,
"note": "High-output"
},
# r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed
# "machine": "NextSeq 1000/2000",
# "chemistry": "P2 Chemistry",
# "lane_count": 1,
# "note": "Mid-output"
# },
# r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed
# "machine": "NextSeq 2000",
# "chemistry": "P3 Chemistry",
# "lane_count": 2,
# "note": "High-output"
# },
r'[A-Z,0-9]{5}(?:BBXX|BB\w\w)':{
"machine": "Illumina HiSeq 4000",
"chemistry": "Illumina HiSeq 4000",
"lane_count": 8,
"note": ""
},
r'[A-Z,0-9]{5}(?:ALXX:AL\w\w)':{
"machine": "HiSeq X Ten",
"chemistry": "V1/V2.5 Chemistry",
"lane_count": 8,
"note": ""
},
r'[A-Z,0-9]{5}(?:CCXX:CC\w\w)':{
"machine": "HiSeq X Ten",
"chemistry": "V2/V2.5 Chemistry",
"lane_count": 8,
"note": ""
},
r'[A-Z,0-9]{5}DR\w\w':{
"machine": "Illumina NovaSeq 6000",
"chemistry": "V1 Chemistry",
"lane_count": 2,
"note": "S1/SP"
},
r'[A-Z,0-9]{5}DM\w\w':{
"machine": "Illumina NovaSeq 6000",
"chemistry": "V1 Chemistry",
"lane_count": 2,
"note": "S2"
},
r'[A-Z,0-9]{5}DS\w\w':{
"machine": "Illumina NovaSeq 6000",
"chemistry": "V1 Chemistry",
"lane_count": 4,
"note": "S4"
},
r'BNS417.*':{
"machine": "Illumina iSeq 100",
"chemistry": "V1",
"lane_count": 1,
"note": "AKA Firefly"
},
r'[0-9]{9}-\w{5}':{
"machine": "Illumina MiSeq",
"chemistry": "V1/V2/V3 Chemistry",
"lane_count": 1,
"note": ""
}
}
def get_machines_for_flowcell_id(fcid):
sequencer_by_fcid = []
for key in flowcell_to_machine_model_and_chemistry:
if re.search(key,fcid):
sequencer_by_fcid.append(flowcell_to_machine_model_and_chemistry[key])
return sequencer_by_fcid
if __name__ == "__main__":
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} FLOWCELLID")
print(f" ex. {sys.argv[0]} AHMNY5AFX3")
else:
print(get_machines_for_flowcell_id(sys.argv[1])[0]["machine"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment