Count occurences of all 3-character sequences of lowercase letters in a file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import string | |
fname = sys.argv[1] | |
pairs = {} | |
char = None | |
next_char = None | |
next_next_char = None | |
chars = [] | |
# add all the lowercase basic alphabet items to pairs | |
for ch in string.ascii_lowercase: | |
for ch2 in string.ascii_lowercase: | |
for ch3 in string.ascii_lowercase: | |
pairs[ch + ch2 + ch3] = 0 | |
with open(fname) as f: | |
line = f.readline() | |
while line: | |
for ch in line: | |
last_char = next_char | |
next_char = next_next_char | |
next_next_char = ch | |
if last_char and next_char and next_next_char and last_char.strip() != '' and next_char.strip() != '' and next_next_char.strip() != '': | |
if last_char + next_char + next_next_char in pairs: | |
pairs[last_char + next_char + next_next_char ] += 1 | |
else: | |
pairs[last_char + next_char + next_next_char ] = 1 | |
line = f.readline() | |
for key, val in pairs.items(): | |
print(str(val) + ' ' + key |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment