Skip to content

Instantly share code, notes, and snippets.

@an-OK-squirrel
Created November 27, 2015 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save an-OK-squirrel/8e0088539d1ee876d74d to your computer and use it in GitHub Desktop.
Save an-OK-squirrel/8e0088539d1ee876d74d to your computer and use it in GitHub Desktop.
# coding: utf-8
import re
L_LETTERS = 'abcdefghijklmnopqrstuvwxyz'
U_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
DIGITS = '0123456789'
ALL_LETTERS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
OPERATORS = L_LETTERS + '+*/-='
WHITESPACE = ' \t'
def does_regex_match(regex, string):
thing = re.match(regex, string)
try:
return str(thing) is not None and thing.group() == string
except AttributeError:
return False
def split_into_st(program):
# split_into_st('apple')
# ['a', 'p', 'p', 'l', 'e']
result = []
char_index = 0
token_type = 0 # 0 is none, 1 is op, 2 is num, 3 is string
# actually, for that matter, what other types are there? derp, lots of them
token = ''
while char_index < len(program):
char = program[char_index]
if token_type == 0: # Token is empty
if char in WHITESPACE: # Y U whitespace in codegolf?
token = ''
token_type = 0
elif char in OPERATORS: # Do things
token += char
token_type = 1
result.append([token, token_type])
token = '' # reset
token_type = 0
elif char == '.':
token += '.'
token_type = 1
elif char in DIGITS:
token += char
token_type = 2
elif char == '"':
token_type = 3
token = '"'
elif token_type == 1:
token += char
result.append([token, token_type])
token = ''
token_type = 0
elif token_type == 2:
if char in DIGITS:
token += char
elif char == '.':
pass # token += char
else:
result.append([token, token_type])
token = ''
token_type = 0
char_index -= 1
elif token_type == 3:
if char == '"':
result.append([token, token_type]) # We don't want "abc",
# rather "abc
token = ''
token_type = 0
else:
token += char
char_index += 1
result.append([token, token_type])
return result
types = ['none', 'operator', '.operator']
def parse_token_st(tokens):
result = []
for token in tokens:
token_type = token[1]
if token_type == 0:
pass
elif token_type == 1:
result.append({'token_type': 'operator', 'token_value': token[0]})
elif token_type == 2:
result.append({'token_type': 'integer',
'token_value': int(token[0])})
elif token_type == 3:
result.append({'token_type': 'string',
'token_value': str(token[0][1:])})
return result
replace_chars = {
'à': ' 0'
}
def char_replace(code):
result = ""
for char in code:
print(1)
if char in replace_chars:
result += replace_chars[char]
else:
result += char
print(result)
return result
def fully_parse(code):
return parse_token_st(split_into_st(char_replace(code)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment