tomkinsc/flowcell-id_to_seqmodel.py

## flowcell-id_to_seqmodel.py
#!/usr/bin/env python

import sys, re

flowcell_to_machine_model_and_chemistry = {
    r'[A-Z,0-9]{5}AAXX':{
        "machine":    "Illumina Genome Analyzer IIx",
        "chemistry":  "All",
        "lane_count":  8,
        "note":       ""
    },
    r'[A-Z,0-9]{5}ABXX':{
        "machine":    "Illumina HiSeq 2000",
        "chemistry":  "V2 Chemistry",
        "lane_count":  8,
        "note":       ""
    },
    r'[A-Z,0-9]{5}ACXX':{
        "machine":   "Illumina HiSeq 2000",
        "chemistry": "V3 Chemistry",
        "lane_count": 8,
        "note":       "Also used on transient 2000E"
    },
    r'[A-Z,0-9]{5}(?:ANXX|AN\w\w)':{
        "machine":    "Illumina HiSeq 2500",
        "chemistry":  "V4 Chemistry",
        "lane_count":  8,
        "note":       "High output"
    },
    r'[A-Z,0-9]{5}(?:ADXX|AD\w\w)':{
        "machine":    "Illumina HiSeq 2500",
        "chemistry":  "V1 Chemistry",
        "lane_count":  2,
        "note":       "Rapid run"
    },
    r'[A-Z,0-9]{5}AMXX':{
        "machine":    "Illumina HiSeq 2500",
        "chemistry":  "V2 Chemistry (beta)",
        "lane_count":  2,
        "note":       "Rapid run"
    },
    r'[A-Z,0-9]{5}(?:BCXX|BC\w\w)':{
        "machine":    "Illumina HiSeq 2500",
        "chemistry":  "V2 Chemistry",
        "lane_count":  2,
        "note":       "Rapid run"
    },
    # NextSeq 550 is a NextSeq 500 that can also read arrays.
    # Since we cannot tell them apart based on tile count, we call it the 550
    r'[A-Z,0-9]{5}AFX\w':{
        "machine":    "NextSeq 550",
        "chemistry":  "Mid-Output NextSeq",
        "lane_count":  4,
        "note":       ""
    },
    # NextSeq 550 is a NextSeq 500 that can also read arrays.
    # Since we cannot tell them apart based on tile count, we call it the 550
    r'[A-Z,0-9]{5}AGXX':{
        "machine":    "NextSeq 550",
        "chemistry":  "V1 Chemistry",
        "lane_count":  4,
        "note":       "High-output"
    },
    # NextSeq 550 is a NextSeq 500 that can also read arrays.
    # Since we cannot tell them apart based on tile count, we call it the 550
    r'[A-Z,0-9]{5}(?:BGXX|BG\w\w)':{
        "machine":    "NextSeq 550",
        "chemistry":  "V2/V2.5 Chemistry",
        "lane_count":  4,
        "note":       "High-output"
    },
    # r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed
    #     "machine":    "NextSeq 1000/2000",
    #     "chemistry":  "P2 Chemistry",
    #     "lane_count":  1,
    #     "note":       "Mid-output"
    # },
    # r'[A-Z,0-9]{5}(?:AAAC|AAA\w)':{ # suffix not confirmed
    #     "machine":    "NextSeq 2000",
    #     "chemistry":  "P3 Chemistry",
    #     "lane_count":  2,
    #     "note":       "High-output"
    # },
    r'[A-Z,0-9]{5}(?:BBXX|BB\w\w)':{
        "machine":    "Illumina HiSeq 4000",
        "chemistry":  "Illumina HiSeq 4000",
        "lane_count":  8,
        "note":       ""
    },
    r'[A-Z,0-9]{5}(?:ALXX:AL\w\w)':{
        "machine":    "HiSeq X Ten",
        "chemistry":  "V1/V2.5 Chemistry",
        "lane_count":  8,
        "note":       ""
    },
    r'[A-Z,0-9]{5}(?:CCXX:CC\w\w)':{
        "machine":    "HiSeq X Ten",
        "chemistry":  "V2/V2.5 Chemistry",
        "lane_count":  8,
        "note":       ""
    },
    r'[A-Z,0-9]{5}DR\w\w':{
        "machine":    "Illumina NovaSeq 6000",
        "chemistry":  "V1 Chemistry",
        "lane_count":  2,
        "note":       "S1/SP"
    },
    r'[A-Z,0-9]{5}DM\w\w':{
        "machine":    "Illumina NovaSeq 6000",
        "chemistry":  "V1 Chemistry",
        "lane_count":  2,
        "note":       "S2"
    },
    r'[A-Z,0-9]{5}DS\w\w':{
        "machine":    "Illumina NovaSeq 6000",
        "chemistry":  "V1 Chemistry",
        "lane_count":  4,
        "note":       "S4"
    },
    r'BNS417.*':{
        "machine":    "Illumina iSeq 100",
        "chemistry":  "V1",
        "lane_count":  1,
        "note":       "AKA Firefly"
    },
    r'[0-9]{9}-\w{5}':{
        "machine":    "Illumina MiSeq",
        "chemistry":  "V1/V2/V3 Chemistry",
        "lane_count":  1,
        "note":       ""
    }
}

def get_machines_for_flowcell_id(fcid):
    sequencer_by_fcid = []
    for key in flowcell_to_machine_model_and_chemistry:
        if re.search(key,fcid):
            sequencer_by_fcid.append(flowcell_to_machine_model_and_chemistry[key])
    return sequencer_by_fcid

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} FLOWCELLID")
        print(f"   ex. {sys.argv[0]} AHMNY5AFX3")
    else:
        print(get_machines_for_flowcell_id(sys.argv[1])[0]["machine"])
	#!/usr/bin/env python

	import sys, re

	flowcell_to_machine_model_and_chemistry = {
	r'[A-Z,0-9]{5}AAXX':{
	"machine": "Illumina Genome Analyzer IIx",
	"chemistry": "All",
	"lane_count": 8,
	"note": ""
	},
	r'[A-Z,0-9]{5}ABXX':{
	"machine": "Illumina HiSeq 2000",
	"chemistry": "V2 Chemistry",
	"lane_count": 8,
	"note": ""
	},
	r'[A-Z,0-9]{5}ACXX':{
	"machine": "Illumina HiSeq 2000",
	"chemistry": "V3 Chemistry",
	"lane_count": 8,
	"note": "Also used on transient 2000E"
	},
	r'[A-Z,0-9]{5}(?:ANXX\|AN\w\w)':{
	"machine": "Illumina HiSeq 2500",
	"chemistry": "V4 Chemistry",
	"lane_count": 8,
	"note": "High output"
	},
	r'[A-Z,0-9]{5}(?:ADXX\|AD\w\w)':{
	"machine": "Illumina HiSeq 2500",
	"chemistry": "V1 Chemistry",
	"lane_count": 2,
	"note": "Rapid run"
	},
	r'[A-Z,0-9]{5}AMXX':{
	"machine": "Illumina HiSeq 2500",
	"chemistry": "V2 Chemistry (beta)",
	"lane_count": 2,
	"note": "Rapid run"
	},
	r'[A-Z,0-9]{5}(?:BCXX\|BC\w\w)':{
	"machine": "Illumina HiSeq 2500",
	"chemistry": "V2 Chemistry",
	"lane_count": 2,
	"note": "Rapid run"
	},
	# NextSeq 550 is a NextSeq 500 that can also read arrays.
	# Since we cannot tell them apart based on tile count, we call it the 550
	r'[A-Z,0-9]{5}AFX\w':{
	"machine": "NextSeq 550",
	"chemistry": "Mid-Output NextSeq",
	"lane_count": 4,
	"note": ""
	},
	# NextSeq 550 is a NextSeq 500 that can also read arrays.
	# Since we cannot tell them apart based on tile count, we call it the 550
	r'[A-Z,0-9]{5}AGXX':{
	"machine": "NextSeq 550",
	"chemistry": "V1 Chemistry",
	"lane_count": 4,
	"note": "High-output"
	},
	# NextSeq 550 is a NextSeq 500 that can also read arrays.
	# Since we cannot tell them apart based on tile count, we call it the 550
	r'[A-Z,0-9]{5}(?:BGXX\|BG\w\w)':{
	"machine": "NextSeq 550",
	"chemistry": "V2/V2.5 Chemistry",
	"lane_count": 4,
	"note": "High-output"
	},
	# r'[A-Z,0-9]{5}(?:AAAC\|AAA\w)':{ # suffix not confirmed
	# "machine": "NextSeq 1000/2000",
	# "chemistry": "P2 Chemistry",
	# "lane_count": 1,
	# "note": "Mid-output"
	# },
	# r'[A-Z,0-9]{5}(?:AAAC\|AAA\w)':{ # suffix not confirmed
	# "machine": "NextSeq 2000",
	# "chemistry": "P3 Chemistry",
	# "lane_count": 2,
	# "note": "High-output"
	# },
	r'[A-Z,0-9]{5}(?:BBXX\|BB\w\w)':{
	"machine": "Illumina HiSeq 4000",
	"chemistry": "Illumina HiSeq 4000",
	"lane_count": 8,
	"note": ""
	},
	r'[A-Z,0-9]{5}(?:ALXX:AL\w\w)':{
	"machine": "HiSeq X Ten",
	"chemistry": "V1/V2.5 Chemistry",
	"lane_count": 8,
	"note": ""
	},
	r'[A-Z,0-9]{5}(?:CCXX:CC\w\w)':{
	"machine": "HiSeq X Ten",
	"chemistry": "V2/V2.5 Chemistry",
	"lane_count": 8,
	"note": ""
	},
	r'[A-Z,0-9]{5}DR\w\w':{
	"machine": "Illumina NovaSeq 6000",
	"chemistry": "V1 Chemistry",
	"lane_count": 2,
	"note": "S1/SP"
	},
	r'[A-Z,0-9]{5}DM\w\w':{
	"machine": "Illumina NovaSeq 6000",
	"chemistry": "V1 Chemistry",
	"lane_count": 2,
	"note": "S2"
	},
	r'[A-Z,0-9]{5}DS\w\w':{
	"machine": "Illumina NovaSeq 6000",
	"chemistry": "V1 Chemistry",
	"lane_count": 4,
	"note": "S4"
	},
	r'BNS417.*':{
	"machine": "Illumina iSeq 100",
	"chemistry": "V1",
	"lane_count": 1,
	"note": "AKA Firefly"
	},
	r'[0-9]{9}-\w{5}':{
	"machine": "Illumina MiSeq",
	"chemistry": "V1/V2/V3 Chemistry",
	"lane_count": 1,
	"note": ""
	}
	}

	def get_machines_for_flowcell_id(fcid):
	sequencer_by_fcid = []
	for key in flowcell_to_machine_model_and_chemistry:
	if re.search(key,fcid):
	sequencer_by_fcid.append(flowcell_to_machine_model_and_chemistry[key])
	return sequencer_by_fcid

	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print(f"Usage: {sys.argv[0]} FLOWCELLID")
	print(f" ex. {sys.argv[0]} AHMNY5AFX3")
	else:
	print(get_machines_for_flowcell_id(sys.argv[1])[0]["machine"])