Skip to content

Instantly share code, notes, and snippets.

@bhaskar-nair2
Created June 23, 2018 09:11
Show Gist options
  • Save bhaskar-nair2/c582f2a5baa2507e0a2476872ef78bfc to your computer and use it in GitHub Desktop.
Save bhaskar-nair2/c582f2a5baa2507e0a2476872ef78bfc to your computer and use it in GitHub Desktop.
Search Algorithm
# Hello
# Please read the comments for hints
import re
# These words have to be ignored even if they match
trash = ['tabs', 'inj', 'bottle', 'syp', 'bot', 'bott', 'cap', 'doses', 'with', 'ml', 'mg', 'in', 'methyl', 'containing', 'antibiotic', 'sodium', 'chloride', 'fluoride', 'phosphate', 'without',
'chloride', 'ammonium', 'citrate', 'adrenaline', 'gluconate', 'propionate', 'absorbent', 'unmedicated', 'sulphate', 'eye drops', 'lactate', 'disposable', 'lignocaine']
# If two diffrent words from the same list are in the words, they become contradicting,and are thus not same.
# Example: Tab Paracetamol and bottle Paracetamol are two diffrent things
contraVals = [['tab', 'tabs', 'inj', 'bottle', 'syp', 'bot', 'bott', 'cap', 'drops', 'needles', 'ointment'],
['sodium', 'chloride', 'fluoride', 'phosphate']]
# This function checks if the two nomenclature words have contradicting values
# I'm not sure if this is correctly done, feel free to change/remove it
def contradict(a, b):
for _ in a:
for i in contraVals:
if _ in i:
for h in b:
if h in i and h != _:
return True # Yes it does Contradict
if h == _:
return False # No Contradiction
return True # Contradicts
for _ in b:
for i in contraVals:
if _ in i:
for h in a:
if h in i and h != _:
return True
if h == _:
return False
return True
return False
# This function removes the trash words from the strings
def remove(val):
return list(set(val) - set(trash))
# This is the base function responsible for the checking
def isSimilar(v1, v2):
if v1.lower() == v2.lower():
return 0
# This will check if the string are same after removing the spaces in between
if v1.upper().replace(' ', '') == v2.upper().replace(' ', ''):
return 0
else:
# Cases don't matter cause we make everything into lower case before checking
a = re.findall(r"[\w]+", v1.lower())
b = re.findall(r"[\w]+", v2.lower())
if contradict(a, b):
return 2 # Stop cause it contradicts
a = remove(a)
b = remove(b)
if a == b:
return 0
else:
for _ in a[1:len(a) - 2]:
if _.isalpha(): # So that it does not match 100==100
for p in b:
if _ == p and len(_) > 5:
return 1
return 2
#These are some self generated samples, If you can clear all of them, i'll share the complete databse with you
Samplev1=['Antibiotic disc augmentin 30 mcg (1x5 cart)','Aso titer test kit 50-test ','Blood agar base (infusion agar)','Hcv rapid test tab (1x30)','Insulin','siringe','Syringes disposable 50cc','CALCIUM HYDROXIDE','TIE PADS FOR HANDS (RESTRAINS) (UNIVERSAL)']
samplev2=['Antibiotic disc bacitracin 30 mcg (1x5 cart)','Aso titertest kit 50 tests ','Blood agar base','Hcv rapid test bottle (1x30)','Insolin','syringe','Syringes disposable 50cc','Syringes disposable 5cc','PEDIATRIC CLAVICLE BRACE FOR HANDS( UNIVERSAL)','CALCIUM ACETATE 500 MG TAB']
#the lists aren't of the same size, so don't zip them, use a double loop
# feel free to ask any doubts or demands,
# sharing your knowledge in the comments would be really appriciated
@bhaskar-nair2
Copy link
Author

Please clone and make pull requests so that we can see the changes made.

@bhaskar-nair2
Copy link
Author

'Antibiotic disc augmentin 30 mcg (1x5 cart)', Returns 0 for 'Antibiotic disc bacitracin 30 mcg (1x5 cart)'
'Aso titer test kit 50-test ', returns 0 for 'Aso titertest kit 50 tests '
'Blood agar base (infusion agar)',returns 1 for 'Blood agar base',
'Hcv rapid test tab (1x30)',returns 1 for 'Hcv rapid test bottle (1x30)'
'Insulin',returns 1 for 'Insolin',
'siringe', returns 1 for 'syringe',
'Syringes disposable 50cc', returns 0 for 'Syringes disposable 50cc' but returns 2 for 'Syringes disposable 5cc'
'CALCIUM HYDROXIDE',\ returns 2
'TIE PADS FOR HANDS (RESTRAINS) (UNIVERSAL)' returns 2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment