Skip to content

Instantly share code, notes, and snippets.

Last active February 6, 2024 11:11
Show Gist options
  • Save parajain/4c9d44b9fd7ace7b80bdaf7383de7477 to your computer and use it in GitHub Desktop.
Save parajain/4c9d44b9fd7ace7b80bdaf7383de7477 to your computer and use it in GitHub Desktop.
import nltk'omw-1.4')
import tqdm
from nltk.corpus import wordnet as wn
all_nouns = [word for synset in wn.all_synsets('n') for word in synset.lemma_names()]
inputphrase= ''
wordlens = [len(w) for w in inputphrase.split()]
cumm_wl = []
for wl in wordlens[:-1]:
cumm_wl.append(wl + t)
t = t + wl
inputphrase = inputphrase.replace(' ', '').lower()
#inputphrase = inputphrase.split()
#chars = []
#for i in inputphrase:
# chars.extend(list(i))
for n in tqdm.tqdm(all_nouns, total=len(all_nouns)):
#if n != 'voltage':
# continue
#print('Now ****************************', n)
idx = 0
matched = []
currn = inputphrase
if '_' in n:
chars = list(n)
done = True
coverage = cumm_wl.copy()
last_covered = 0
for c in chars:
#print('curr ', currn)
ci = currn.find(c)
if ci > -1:
#print(ci, currn[ci], c)
idx = idx + ci
#print(idx, coverage, last_covered)
if len(coverage) > 0 and idx <= coverage[0] and idx > last_covered:
last_covered = coverage[0]
currn = inputphrase[idx:]
done = False
matched = []
if done and len(coverage) == 0: # remove coverage if you want to allow not using all words
#print('************************* done ', n)
s = ''
for idx, cc in enumerate(list(inputphrase)):
if idx in matched:
s += cc.upper()
s += cc
print(n, s, matched, coverage)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment