Skip to content

Instantly share code, notes, and snippets.

@ixe013
Last active October 2, 2019 17:33
Show Gist options
  • Save ixe013/1f81cadeb0ecc8225d491ca568454863 to your computer and use it in GitHub Desktop.
Save ixe013/1f81cadeb0ecc8225d491ca568454863 to your computer and use it in GitHub Desktop.
Looks for a sequence of text (a prompt) inside a set of lines arbitrary cut, with or without newlines
import itertools
tests_passed = {
'simple end of line':
[ b'One\nSSH>',
b'no!',
],
'simple begining of line':
[ b'SSH>',
],
'simple empty lines':
[ b'\n',
b'\n',
b'SSH>',
b'no!',
],
'last character cut':
[ b'One\nSSH',
b'>',
],
'last two characters cut':
[ b'One\nSS',
b'H>',
],
'cut after first':
[ b'One\nS',
b'SH>',
],
}
tests_to_do = {
'cut by a new line':
[ b'One\nS\n',
b'SH>',
b'this should show\n',
b'SSH>',
],
'all cut':
[ b'One\nS',
b'S',
b'H',
b'>',
b'no!',
],
'false alsert':
[ b'One\nSS H>',
b'\nSSH>',
],
'false alsert on two lines':
[ b'One\nSS',
b' H> '
b'Yes\nSSH>',
],
'false alert all cut':
[ b'One\nS',
b'S',
b'H',
b' >',
b'SSH>',
],
}
tests = {}
tests.update(tests_passed)
tests.update(tests_to_do)
prompts_that_work = [
]
prompts = [
'SSH>',
'SS*>',
'S*H>',
'S*>',
'*>',
'*SH>',
'*S*>',
'SSH*',
'S.H*',
'....',
#'...*' This one does not work...
]
def fake_read(name):
#print(f'Testing "{name}"')
for line in tests[name]:
yield line
#print(f'Test "{name}" complete!\n')
def position_of_next_character_to_match(prompt, marker):
for pos in range(marker+1, len(prompt)):
if prompt[pos] != '*':
marker = pos
break
return min(len(prompt), marker)
def forward_to_character_following(iterator, character):
for c in iterator:
if c == ord(character):
break
return iterator
def forward_to_begining_of_new_line(iterator):
return forward_to_character_following(iterator, '\n')
def find_wildcard_stop_character(marker):
try:
after_wildcard = next(marker)
#Collapse a series of ***** into a single star, making SS******> the same as SS*> (both match SSH>)
while after_wildcard == '*':
after_wildcard = next(marker)
return after_wildcard
except StopIteration:
#The prompt ends with a wildcard...
pass
def eat_next_character(iterator):
loop = True
byte = None
try:
byte = next(iterator)
except StopIteration:
loop = False
return byte, loop
def new_test(name, prompt):
received_chunks = []
reader = fake_read(name)
data = next(reader)
marker = iter(prompt)
looking_for = next(marker)
match_anything = looking_for == '.'
if looking_for == '*':
#Make a copy of the iterator. Will also server as a flag
#that we are in wildcard mode
marker, saved_marker = itertools.tee(marker)
#and start looking for the remainder of the prompt
#as if it was what we were looking for all along
looking_for = find_wildcard_stop_character(marker)
else:
saved_marker = None
partial_match = False
while data:
received_chunks.append(data)
#For every character in the string
iterator = iter(data)
#Possible states:
#0: Looking for next character after new line
#1: Looking for the next character in the prompt string
#2: Looging for the next charcter that would stop the non-greedy wildcard search
loop = True
byte = next(iterator)
#We expect the prompt to start a new line, so go looging for it
#find_begining_of_new_line(iterator):
while loop: #We cannot use a for loop because we might have to test a character twice
# If tne character at marker the next one we are looking for? It can be
# - The exact char
# - Any character because the prompt specification had a . in it
if chr(byte) == looking_for or match_anything:
partial_match = True
try:
# Increment the counter optimistically
looking_for = next(marker)
#Will we match the next char as is?
match_anything = looking_for == '.'
#If we were in wildcard mode, it's over because we found the character
#that made us break out of it
#but maybe there is another wild card right after it
if looking_for == '*':
#We start a new search for the remainder of the prompt
partial_match = False
marker, saved_marker = itertools.tee(marker)
looking_for = find_wildcard_stop_character(marker)
# If there are no more character to find
except StopIteration:
# found it!
data = None #Signal to get out of the while loop
break #Get out of the for loop
#If we reach here, we haven't found it all yet, continue
byte, loop = eat_next_character(iterator)
elif saved_marker:
#We've set looking_for to the character that will get us out the wildcard loop
#so if we get here this is just any other charcter, there is nothing to do but
#iterate and see.
marker, saved_marker = itertools.tee(saved_marker)
#We already visited the position in saved_marker. We know it is not a special
#character and that it will not raise. But we could be at end of line of the marker
try:
looking_for = next(marker)
except StopIteration:
# found it!
data = None #Signal to get out of the while loop
break #Get out of the for loop
#We failed to match this character, but maybe it is the start of
#a new prompt. Let's *not* advance the iterator and try again, now
#that we reset the prompt iterator
if not partial_match:
byte, loop = eat_next_character(iterator)
else:
#But that's a one time thing
partial_match = False
else:
#Start to search from begining of prompt again
marker = iter(prompt)
looking_for = next(marker)
match_anything = looking_for == '.'
partial_match = False
#If the character that did not match happens to be a new line
#then we must let the loop iterate, or else it will skip valid
#characters after the \n looking for another one. For example,
#if we parsing through "\nSSH>\n". Without that if, the call to
#forward_to_begining_of_new_line will skip over the SSH we are
#looking for!
if chr(byte) != '\n':
byte,loop = eat_next_character(forward_to_begining_of_new_line(iterator))
else:
byte, loop = eat_next_character(iterator)
else:
#We read all the data without finding anything
data = next(reader)
lines = b''.join(received_chunks).decode('utf-8').split('\n')
return lines
for name in tests.keys():
for prompt in prompts:
print(f"Result for [{name},{prompt}] ", end='')
print(f"{new_test(name, prompt)}")
print('---------------------------------')
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment