Created
June 23, 2018 09:11
-
-
Save bhaskar-nair2/c582f2a5baa2507e0a2476872ef78bfc to your computer and use it in GitHub Desktop.
Search Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hello | |
# Please read the comments for hints | |
import re | |
# These words have to be ignored even if they match | |
trash = ['tabs', 'inj', 'bottle', 'syp', 'bot', 'bott', 'cap', 'doses', 'with', 'ml', 'mg', 'in', 'methyl', 'containing', 'antibiotic', 'sodium', 'chloride', 'fluoride', 'phosphate', 'without', | |
'chloride', 'ammonium', 'citrate', 'adrenaline', 'gluconate', 'propionate', 'absorbent', 'unmedicated', 'sulphate', 'eye drops', 'lactate', 'disposable', 'lignocaine'] | |
# If two diffrent words from the same list are in the words, they become contradicting,and are thus not same. | |
# Example: Tab Paracetamol and bottle Paracetamol are two diffrent things | |
contraVals = [['tab', 'tabs', 'inj', 'bottle', 'syp', 'bot', 'bott', 'cap', 'drops', 'needles', 'ointment'], | |
['sodium', 'chloride', 'fluoride', 'phosphate']] | |
# This function checks if the two nomenclature words have contradicting values | |
# I'm not sure if this is correctly done, feel free to change/remove it | |
def contradict(a, b): | |
for _ in a: | |
for i in contraVals: | |
if _ in i: | |
for h in b: | |
if h in i and h != _: | |
return True # Yes it does Contradict | |
if h == _: | |
return False # No Contradiction | |
return True # Contradicts | |
for _ in b: | |
for i in contraVals: | |
if _ in i: | |
for h in a: | |
if h in i and h != _: | |
return True | |
if h == _: | |
return False | |
return True | |
return False | |
# This function removes the trash words from the strings | |
def remove(val): | |
return list(set(val) - set(trash)) | |
# This is the base function responsible for the checking | |
def isSimilar(v1, v2): | |
if v1.lower() == v2.lower(): | |
return 0 | |
# This will check if the string are same after removing the spaces in between | |
if v1.upper().replace(' ', '') == v2.upper().replace(' ', ''): | |
return 0 | |
else: | |
# Cases don't matter cause we make everything into lower case before checking | |
a = re.findall(r"[\w]+", v1.lower()) | |
b = re.findall(r"[\w]+", v2.lower()) | |
if contradict(a, b): | |
return 2 # Stop cause it contradicts | |
a = remove(a) | |
b = remove(b) | |
if a == b: | |
return 0 | |
else: | |
for _ in a[1:len(a) - 2]: | |
if _.isalpha(): # So that it does not match 100==100 | |
for p in b: | |
if _ == p and len(_) > 5: | |
return 1 | |
return 2 | |
#These are some self generated samples, If you can clear all of them, i'll share the complete databse with you | |
Samplev1=['Antibiotic disc augmentin 30 mcg (1x5 cart)','Aso titer test kit 50-test ','Blood agar base (infusion agar)','Hcv rapid test tab (1x30)','Insulin','siringe','Syringes disposable 50cc','CALCIUM HYDROXIDE','TIE PADS FOR HANDS (RESTRAINS) (UNIVERSAL)'] | |
samplev2=['Antibiotic disc bacitracin 30 mcg (1x5 cart)','Aso titertest kit 50 tests ','Blood agar base','Hcv rapid test bottle (1x30)','Insolin','syringe','Syringes disposable 50cc','Syringes disposable 5cc','PEDIATRIC CLAVICLE BRACE FOR HANDS( UNIVERSAL)','CALCIUM ACETATE 500 MG TAB'] | |
#the lists aren't of the same size, so don't zip them, use a double loop | |
# feel free to ask any doubts or demands, | |
# sharing your knowledge in the comments would be really appriciated |
'Antibiotic disc augmentin 30 mcg (1x5 cart)', Returns 0 for 'Antibiotic disc bacitracin 30 mcg (1x5 cart)'
'Aso titer test kit 50-test ', returns 0 for 'Aso titertest kit 50 tests '
'Blood agar base (infusion agar)',returns 1 for 'Blood agar base',
'Hcv rapid test tab (1x30)',returns 1 for 'Hcv rapid test bottle (1x30)'
'Insulin',returns 1 for 'Insolin',
'siringe', returns 1 for 'syringe',
'Syringes disposable 50cc', returns 0 for 'Syringes disposable 50cc' but returns 2 for 'Syringes disposable 5cc'
'CALCIUM HYDROXIDE',\ returns 2
'TIE PADS FOR HANDS (RESTRAINS) (UNIVERSAL)' returns 2
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Please clone and make pull requests so that we can see the changes made.