Created
November 8, 2020 20:40
-
-
Save miles-d/488556c42af2d8f0e2feac99d9e8775b to your computer and use it in GitHub Desktop.
Count occurences of all 3-character sequences of lowercase letters in a file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import string | |
fname = sys.argv[1] | |
pairs = {} | |
char = None | |
next_char = None | |
next_next_char = None | |
chars = [] | |
# add all the lowercase basic alphabet items to pairs | |
for ch in string.ascii_lowercase: | |
for ch2 in string.ascii_lowercase: | |
for ch3 in string.ascii_lowercase: | |
pairs[ch + ch2 + ch3] = 0 | |
with open(fname) as f: | |
line = f.readline() | |
while line: | |
for ch in line: | |
last_char = next_char | |
next_char = next_next_char | |
next_next_char = ch | |
if last_char and next_char and next_next_char and last_char.strip() != '' and next_char.strip() != '' and next_next_char.strip() != '': | |
if last_char + next_char + next_next_char in pairs: | |
pairs[last_char + next_char + next_next_char ] += 1 | |
else: | |
pairs[last_char + next_char + next_next_char ] = 1 | |
line = f.readline() | |
for key, val in pairs.items(): | |
print(str(val) + ' ' + key |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment