Skip to content

Instantly share code, notes, and snippets.

@miles-d
Created November 8, 2020 20:40
Show Gist options
  • Save miles-d/488556c42af2d8f0e2feac99d9e8775b to your computer and use it in GitHub Desktop.
Save miles-d/488556c42af2d8f0e2feac99d9e8775b to your computer and use it in GitHub Desktop.
Count occurences of all 3-character sequences of lowercase letters in a file.
import sys
import string
fname = sys.argv[1]
pairs = {}
char = None
next_char = None
next_next_char = None
chars = []
# add all the lowercase basic alphabet items to pairs
for ch in string.ascii_lowercase:
for ch2 in string.ascii_lowercase:
for ch3 in string.ascii_lowercase:
pairs[ch + ch2 + ch3] = 0
with open(fname) as f:
line = f.readline()
while line:
for ch in line:
last_char = next_char
next_char = next_next_char
next_next_char = ch
if last_char and next_char and next_next_char and last_char.strip() != '' and next_char.strip() != '' and next_next_char.strip() != '':
if last_char + next_char + next_next_char in pairs:
pairs[last_char + next_char + next_next_char ] += 1
else:
pairs[last_char + next_char + next_next_char ] = 1
line = f.readline()
for key, val in pairs.items():
print(str(val) + ' ' + key
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment