Last active
August 8, 2018 19:33
-
-
Save stupidpupil/dd687f168db39eccc5f6040a6da25714 to your computer and use it in GitHub Desktop.
BNF Name PEG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bnf_name <- | |
("Gppe " form "_" product_name_word (" "* (ratio / quantity / modifier / product_name_word))*) / | |
((product_name_word " "?)+ "_" (" "* (!form product_name_word))* " "* form (" "* (ratio / quantity / modifier))*) | |
; | |
product_name_word <- r'[^\s_]+' ; | |
ratio <- (quantity / number) ("/" / ";") (ratio / quantity / unit / number); | |
modifier <- | |
"e/c"/"E/c"/"E/C"/ //Enteric-coating | |
"Spec"/ //? | |
"Sach"/ //Sachet | |
"Lyoph"/ | |
"Soln"/ | |
"Paed"/ | |
"Conc"/ | |
"Disper"/ | |
"s/f"/"S/f"/"S/F"/ //Sugar-free? | |
"a/f"/"A/f"/"A/F"/ //Additive-free | |
"m/r"/"M/r"/"M/R"/ //Modified-release | |
"c/r"/"C/r"/"C/R"/ //Controlled-release | |
"g/r"/"G/r"/"G/R"/ //Gastro-resistant | |
"g/f"/"G/f"/"G/F"/ //Gluten-free | |
"Ud"/ //Ut dictum? | |
"Vl"/ //? | |
"Dil"/ //Dilute | |
"Pf"/"Pfs"/ //Pre-filled, syringe | |
"Amp"/ //Ampoule | |
"Cart"/ //Cartridge | |
"Pen"/ | |
"Syrg"/ | |
"Vl"/ //Vial | |
"Pfa"/ //Perforated film absorbent | |
"Eff"/ //Effervescent | |
("(" r'[^\s_\)]+' ")")/ //Flavours and the like | |
"SD"/"Sd"/ //Single dose | |
"@gn"/ | |
" In "; //NOT (just) a concentration AFAICT | |
form <- (form_additional " "?)* form_base (" "? (form_base / form_additional))* ; | |
form_additional <- | |
"Aq" / "Viscous" / "Adh" / "Prote" / "Isot" / | |
"Ethylsuc" / | |
"Postcoital" / | |
"Paed" / "Adult" / "Child" / "Children" / | |
"Foaming" / "Eff" / "Disper" / | |
"Heat" / | |
"Ear/Eye" / "Ear/Eye/Nose" / "Ear/Eye/Nsl" / //Or allow "/" to seperate form words | |
"Oral" / "Orodisper" / "Oramucosal" / "Chble" / | |
"Ophth" / "Eye" / "Subcon" / | |
"Ear" / | |
"Nsl" / "N/" / "Nasule" / "I/Nsl" / "Nose" / | |
"Skin" / "Cutaneous" / "Top" / | |
"Transdermal" / "T/Derm" / | |
"Rectal" / "Blad" / "Vag" / | |
"I/V" / "I/M" / "Epidural" / "Intrathecal" / | |
"Subling" / "Sublingual" / "Buccal"/ "Sub" / "Lyophilisate" / | |
"Resp" / "B/A" / "Mist" / | |
"Scalp" / "Face" / | |
"A/" / "P/" / //Aerosol Pump, Pneumatic Pump | |
"Auto-" / | |
"Dissolve" / "Absorb" / | |
"Bath" / "Shower" / "Cleansing" / | |
"Heavy" / "Light" / "Micro-" / | |
"Forte" / "Fte" / "Strong" / | |
"Soft" / "Dilute" / | |
"Medic" / "Ster" / "Steri-" / | |
"Retention" / | |
"Co " / | |
"Ud" / | |
"Depot" / | |
"V/" / //Vegicap | |
("Size " r'[A-Z]+') / | |
"For" | |
; | |
form_base <- | |
"Capl" / "Cap" / "Tab" / "Loz" / "Pastil" / "Pulvule" / "Divitab" / "Pills" / "Pill" / "Pellets" / "Wafer" / "Spansule" / "Span" / | |
"Liq" / "Syr" / "Linct" / "Rinse" / "Mthwsh" / "Susp" / "Inf" / "Fluid" / "Expect" / "Tinct" / "Elixir" / "Elix" / "Gelcap" / | |
"Conc" / "Soln" / | |
"Spy" / "spy" / "Aero" / | |
"Suppos" / "Supp" / "Enem" / | |
"Pess" / | |
"Gran" / "Pdrs" / "Pdr" / "Mix" / "Sach" / | |
"Inha" / "Inh" / "Inhalator" / "Respule" / "Cylinder" / "Neb" / "Reefer" / "Disk" / "Gas" / "Cyclocap" / "Ventodisk" / "Steripoule" / "Respule" / "R/Cap" / "Becodisk" / "Spincap" / | |
"Autohaler" / "Turbohaler" / "Accuhaler" / "Evohaler" / | |
"Inj" / "Org" / "Amp" / "Ins" / | |
"Patch" / | |
"System" / "Delivery System" / "D/System" / "D/system" / "Implant" / | |
"Wsh" / "Wash" / "Cleanser" / "Soap" / "Bar" / "Shampoo" / | |
"Chewing Gum" / | |
"Dps" / | |
"Crm" / "Gel" / "Oint" / "Lot" / "Emollient" / "Applic" / "Emuls" / "Emulsif" / "Paste" / "Oil" / "Lin" / "Rub" / "Balm" / | |
"(S)" / "(s)" / //Solid | |
"Vac" / //Vaccine | |
"Foam" / | |
"Film" / | |
"Irrig" / | |
"Strips" / "Strip" / | |
"Cryst" / | |
"C/Pk" / "S/Pack" / "T/Pk" / "D/P" / "Pack" | |
; | |
digit <- r'[0-9]'; | |
point <- "."; | |
float <- digit* point digit+; | |
integer <- (digit+ ","?)+ ; | |
number <- float/integer; | |
quantity <- "("? number " "? unit ")"?; | |
unit <- | |
metric_unit / | |
"%" / | |
"hr" "s"? / | |
"u" / "d" / "D"; | |
metric_unit <- metric_prefix ('g'/'l'/'L'/'m'/'mol'/'M'); | |
metric_prefix <- 'mc'/'m'/''; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from arpeggio import PTNodeVisitor, visit_parse_tree, NoMatch | |
from arpeggio.peg import ParserPEG | |
from decimal import * | |
class BNFNameVisitor(PTNodeVisitor): | |
# http://www.nhsbsa.nhs.uk/PrescriptionServices/Documents/Drug_Appliance_Abbreviations_2009.pdf | |
def visit_bnf_name(self, node, children): | |
bnfname = {'product_name':' '.join(children.product_name_word), 'roa':children.form[0]['roa']} | |
# | |
# Mass, Volume and D's | |
# | |
mass_quantities = [c for c in children.quantity if c['unit'] == 'g'] | |
if len(mass_quantities) == 1: | |
bnfname['mass/g'] = mass_quantities[0]['number'] | |
elif children.form[0]['is_solid']: | |
bnfname['mass/g'] = Decimal(1) #1 g if solid | |
vol_quantities = [c for c in children.quantity if c['unit'] == 'l'] | |
if len(vol_quantities) == 1: | |
bnfname['vol/l'] = vol_quantities[0]['number'] | |
elif children.form[0]['is_liquid']: | |
bnfname['vol/l'] = Decimal('0.001') #1 ml if liquid | |
d_quantities = [c for c in children.quantity if c['unit'] == 'd'] | |
if len(d_quantities) == 1: | |
bnfname['d'] = d_quantities[0]['number'] | |
else: | |
bnfname['d'] = Decimal(1) # 1 tablet/ampoule... | |
if not ('mass/g' in bnfname): | |
if ('vol/l' in bnfname): | |
liq_concentrations = [c for c in children.ratio if c['unit'] == 'g/l'] | |
if len(liq_concentrations) == 1: | |
bnfname['mass/g'] = children.ratio[0]['number']*bnfname['vol/l'] | |
if 'mass/g' in bnfname: bnfname['mass/g'] *= bnfname['d'] | |
if 'vol/l' in bnfname: bnfname['vol/l'] *= bnfname['d'] | |
return bnfname | |
def visit_form(self, node, children): | |
form = {'form':''.join(children), 'roa':'U'} | |
liq_forms = ['Soln','Syr','Susp','Linct','Liq','Mthwsh','Fluid','Elix','Dps','Mix','Gel','Oil'] | |
sol_forms = ['Crm','Oint','Lot','Emollient','Applic','(S)','Lin','Pdr'] | |
form['is_liquid'] = any(lf in form['form'] for lf in liq_forms) | |
form['is_solid'] = any(sf in form['form'] for sf in sol_forms) | |
roa_dict = { | |
'Oral' : ['Oral', 'Orodisper', 'Oromucosal','Chble'], | |
'Ocular' : ['Eye'], | |
'Rectal' : ['Rectal'], | |
'Parenteral' : ['I/V','Implant','I/M','Intrathecal'], | |
'Transdermal': ['Transdermal','T/Derm'], | |
'Topical' : ['Top'], | |
'Nasal' : ['Nsl', 'Nasule','N/'], | |
'Inhaled' : ['Inh','Resp','Gas','Neb'], | |
'Vaginal' : ['Vag'], | |
'Sublingual' : ['Sub ','Buccal','Subling'] | |
} | |
for roa in roa_dict: | |
if any(f in children for f in roa_dict[roa]): form['roa'] = roa | |
if form['roa'] == 'U': | |
# Then we try to guess RoA based on the form of the chemical | |
roa_dict = { | |
'Oral' : ['Linct','Cap','Capl','Tab','Susp','Soln','Chewing Gum','Divitab', 'Spansule', 'Span', 'Gelcap', | |
'Pastil','Pulvule','Pill','Liq','Syr','Tinct', 'Pellets', 'Wafer','Elixir','Elix'], | |
'Parenteral' : ['Inj','Org'], | |
'Inhaled' : ['Autohaler', 'Turbohaler', 'Accuhaler', 'Evohaler', | |
'Respule','Cylinder','Reefer','Disk','B/A','Cyclocap', 'Ventodisk','Steripoule','Respule','R/Cap', 'Spincap'], | |
'Transdermal': ['Patch'], | |
'Rectal' : ['Suppos','Supp','Enem'], | |
'Vaginal' : ['Pess'], | |
'Topical' : ['Crm','Gel','Oint','Lot','Lin'], | |
'Sublingual' : ['Lyophilisate'] | |
} | |
for roa in roa_dict: | |
if any(f in children for f in roa_dict[roa]): form['roa'] = roa | |
return form | |
def visit_modifier(self, node, children): | |
return ''.join(children).lower() | |
def visit_ratio(self, node, children): | |
qs = [children[0], children[-1]] | |
def quantify(x): return {'number':x, 'unit':''} if isinstance(x, Decimal) else x | |
qs = [quantify(q) for q in qs] | |
#if qs[1]['unit'] == 'g': #Assume it's a combination product and return the total | |
# total = qs[0]['number'] + qs[1]['number'] | |
# unit = 'g' | |
# return {'number':total, 'unit': unit, 'quantities':qs} | |
#Assume it's a concentration or a rate | |
quotient = qs[0]['number']/qs[1]['number'] | |
unit = "%s/%s" % (qs[0]['unit'], qs[1]['unit']) | |
return {'number':quotient, 'unit': unit, 'quantities':qs} | |
def visit_quantity(self, node, children): | |
number = 1 | |
if len(children.number) > 0: | |
number *= children.number[0] | |
number *= children.unit[0]['number'] | |
return {'number':number, 'unit':children.unit[0]['unit']} | |
def visit_unit(self, node, children): | |
if len(children.metric_unit) > 0: | |
return children.metric_unit[0] | |
return {'number':Decimal(1), 'unit':''.join(children).lower()} | |
def visit_metric_unit(self, node, children): | |
return {'number':children[0], 'unit':children[-1].lower()} | |
def visit_metric_prefix(self, node, children): | |
lookup = { | |
'mc' : Decimal('1e-6'), | |
'm' : Decimal('0.001'), | |
'' : Decimal(1) | |
} | |
return lookup.get(''.join(children)) | |
def visit_number(self, node, children): | |
return Decimal(''.join(children)) | |
def visit_float(self, node, children): | |
return ''.join(children) | |
def visit_integer(self, node, children): | |
return ''.join(children.digit) | |
def visit_digit(self, node, children): | |
return node.value | |
def visit_point(self, node, children): | |
return "." | |
bnf_name_parser = ParserPEG(open('./data/bnf_codes/bnf_name.peg','r').read(),'bnf_name',skipws=False) | |
bnf_name_visitor = BNFNameVisitor() | |
def dict_for_bnf_name(bnf_name): | |
try: | |
tree = bnf_name_parser.parse(bnf_name) | |
return visit_parse_tree(tree, bnf_name_visitor) | |
except NoMatch: | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment