Created
July 12, 2012 22:26
-
-
Save snim2/3101489 to your computer and use it in GitHub Desktop.
Parse simple tokens from a string such that it can be recreated.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Parse simple tokens from a string such that it can be recreated. | |
# | |
__author__ = 'Sarah Mount <s.mount@wlv.ac.uk>' | |
__date__ = '12 July 2012' | |
ws = [' ', '\n', '\t'] | |
def parse(original): | |
tokens = [] | |
tojoin = [] | |
char = 0 | |
while char < len(original): | |
sep_ = '' | |
while original[char] in ws and char < len(original): | |
sep_ += original[char] | |
char += 1 | |
tojoin.append(sep_) | |
char_ = '' | |
while char < len(original) and (not original[char] in ws): | |
char_ += original[char] | |
char += 1 | |
tokens.append(char_) | |
tojoin.append(char_) | |
return tokens, tojoin | |
if __name__ == '__main__': | |
test = "a b c\td\n\n\ne" | |
print test | |
toks, s = parse(test) | |
print ''.join(s) | |
if test == ''.join(s): | |
print 'PASSED' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment