Skip to content

Instantly share code, notes, and snippets.

@jarehec
Last active February 20, 2020 00:21
Show Gist options
  • Save jarehec/57acac25f8b1103eac4c22e64912d127 to your computer and use it in GitHub Desktop.
Save jarehec/57acac25f8b1103eac4c22e64912d127 to your computer and use it in GitHub Desktop.
def word_count_engine(doc: str):
words = {}
word = []
pos = 0
result = []
# build word list and frequency
for i, c in enumerate(doc.lower()):
if c >= 'a' and c <= 'z':
word.append(c)
elif c == '\'' or word == []:
continue
elif words.get(''.join(word)) is None:
words[''.join(word)] = [pos, 1]
word = []
pos += 1
else:
words[''.join(word)][1] += 1
word = []
# increment word count if done scanning doc
if i + 1 == len(doc) and words.get(''.join(word)):
words[''.join(word)][1] += 1
# insert sort into results list
for k, v in words.items():
if result == []:
result.append((k, v))
else:
for i, item in enumerate(result):
if v[1] > item[1][1]:
result.insert(i, (k, v))
break
elif item[1][1] == v[1] and v[0] < item[1][0]:
result.insert(i, (k, v))
break
elif i == len(result) - 1:
result.append((k, v))
break
result = [[k[0], str(k[1][1])] for k in result]
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment