Created
July 13, 2022 21:54
-
-
Save tomkinsc/5a62008c04f086b233ee7a473c25aa31 to your computer and use it in GitHub Desktop.
quick regex-based script for returning the model of Illumina sequencer for a given flowcell ID
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys, re | |
flowcell_to_machine_model_and_chemistry = { | |
r'[A-Z,0-9]{5}AAXX':{ | |
"machine": "Illumina Genome Analyzer IIx", | |
"chemistry": "All", | |
"lane_count": 8, | |
"note": "" | |
}, | |
r'[A-Z,0-9]{5}ABXX':{ | |
"machine": "Illumina HiSeq 2000", | |
"chemistry": "V2 Chemistry", | |
"lane_count": 8, | |
"note": "" | |
}, | |
r'[A-Z,0-9]{5}ACXX':{ | |
"machine": "Illumina HiSeq 2000", | |
"chemistry": "V3 Chemistry", | |
"lane_count": 8, | |
"note": "Also used on transient 2000E" | |
}, | |
r'[A-Z,0-9]{5}(?:ANXX|AN\w\w)':{ | |
"machine": "Illumina HiSeq 2500", | |
"chemistry": "V4 Chemistry", | |
"lane_count": 8, | |
"note": "High output" | |
}, | |
r'[A-Z,0-9]{5}(?:ADXX|AD\w\w)':{ | |
"machine": "Illumina HiSeq 2500", | |
"chemistry": "V1 Chemistry", | |
"lane_count": 2, | |
"note": "Rapid run" | |
}, | |
r'[A-Z,0-9]{5}AMXX':{ | |
"machine": "Illumina HiSeq 2500", | |
"chemistry": "V2 Chemistry (beta)", | |
"lane_count": 2, | |
"note": "Rapid run" | |
}, | |
r'[A-Z,0-9]{5}(?:BCXX|BC\w\w)':{ | |
"machine": "Illumina HiSeq 2500", | |
"chemistry": "V2 Chemistry", | |
"lane_count": 2, | |
"note": "Rapid run" | |
}, | |
# NextSeq 550 is a NextSeq 500 that can also read arrays. | |
# Since we cannot tell them apart based on tile count, we call it the 550 | |
r'[A-Z,0-9]{5}AFX\w':{ | |
"machine": "NextSeq 550", | |
"chemistry": "Mid-Output NextSeq", | |
"lane_count": 4, | |
"note": "" | |
}, | |
# NextSeq 550 is a NextSeq 500 that can also read arrays. | |
# Since we cannot tell them apart based on tile count, we call it the 550 | |
r'[A-Z,0-9]{5}AGXX':{ | |
"machine": "NextSeq 550", | |
"chemistry": "V1 Chemistry", | |
"lane_count": 4, | |
"note": "High-output" | |
}, | |
# NextSeq 550 is a NextSeq 500 that can also read arrays. | |
# Since we cannot tell them apart based on tile count, we call it the 550 | |
r'[A-Z,0-9]{5}(?:BGXX|BG\w\w)':{ | |
"machine": "NextSeq 550", | |
"chemistry": "V2/V2.5 Chemistry", | |
"lane_count": 4, | |
"note": "High-output" | |
}, | |
# r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed | |
# "machine": "NextSeq 1000/2000", | |
# "chemistry": "P2 Chemistry", | |
# "lane_count": 1, | |
# "note": "Mid-output" | |
# }, | |
# r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed | |
# "machine": "NextSeq 2000", | |
# "chemistry": "P3 Chemistry", | |
# "lane_count": 2, | |
# "note": "High-output" | |
# }, | |
r'[A-Z,0-9]{5}(?:BBXX|BB\w\w)':{ | |
"machine": "Illumina HiSeq 4000", | |
"chemistry": "Illumina HiSeq 4000", | |
"lane_count": 8, | |
"note": "" | |
}, | |
r'[A-Z,0-9]{5}(?:ALXX:AL\w\w)':{ | |
"machine": "HiSeq X Ten", | |
"chemistry": "V1/V2.5 Chemistry", | |
"lane_count": 8, | |
"note": "" | |
}, | |
r'[A-Z,0-9]{5}(?:CCXX:CC\w\w)':{ | |
"machine": "HiSeq X Ten", | |
"chemistry": "V2/V2.5 Chemistry", | |
"lane_count": 8, | |
"note": "" | |
}, | |
r'[A-Z,0-9]{5}DR\w\w':{ | |
"machine": "Illumina NovaSeq 6000", | |
"chemistry": "V1 Chemistry", | |
"lane_count": 2, | |
"note": "S1/SP" | |
}, | |
r'[A-Z,0-9]{5}DM\w\w':{ | |
"machine": "Illumina NovaSeq 6000", | |
"chemistry": "V1 Chemistry", | |
"lane_count": 2, | |
"note": "S2" | |
}, | |
r'[A-Z,0-9]{5}DS\w\w':{ | |
"machine": "Illumina NovaSeq 6000", | |
"chemistry": "V1 Chemistry", | |
"lane_count": 4, | |
"note": "S4" | |
}, | |
r'BNS417.*':{ | |
"machine": "Illumina iSeq 100", | |
"chemistry": "V1", | |
"lane_count": 1, | |
"note": "AKA Firefly" | |
}, | |
r'[0-9]{9}-\w{5}':{ | |
"machine": "Illumina MiSeq", | |
"chemistry": "V1/V2/V3 Chemistry", | |
"lane_count": 1, | |
"note": "" | |
} | |
} | |
def get_machines_for_flowcell_id(fcid): | |
sequencer_by_fcid = [] | |
for key in flowcell_to_machine_model_and_chemistry: | |
if re.search(key,fcid): | |
sequencer_by_fcid.append(flowcell_to_machine_model_and_chemistry[key]) | |
return sequencer_by_fcid | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print(f"Usage: {sys.argv[0]} FLOWCELLID") | |
print(f" ex. {sys.argv[0]} AHMNY5AFX3") | |
else: | |
print(get_machines_for_flowcell_id(sys.argv[1])[0]["machine"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment