Skip to content

Instantly share code, notes, and snippets.

@nkmathew
Created March 7, 2014 18:24
Show Gist options
  • Save nkmathew/9416938 to your computer and use it in GitHub Desktop.
Save nkmathew/9416938 to your computer and use it in GitHub Desktop.
Primitive code formatter for the HLA(High Level Assembly) programming language
'''
@Started: 1st March 2014 1630
@Completed: 5th March 2014 0800
A hacked up script to help you survive through the Art of Assembly
Programming's rather awful formatting.
Alas, it has many flaws. It'll be helpful with only the most trivial of scripts.
'''
## FIXME: Add a list of keywords that should not have newlines after them removed.
## FIXME: Options to control removal of empty lines in multiline comments
## A newline will be added before the line in which the keywords below are at
NEWLINE_BEFORE = ['program',
'static',
'while',
'repeat',
'endwhile',
'end',
'until']
## A newline will be added after the line in which the keywords below are at
NEWLINE_AFTER = ['program',
'static',
'while',
'repeat',
'endwhile',
'end',
'until']
declarationKeywords = ['constant', 'storage', 'static']
class Options:
def __init__(self, lst):
self.optionList = lst
self.codeSource = sys.stdin
self.fileName = sys.stdin
## Add spaces inside like most HLA code
self.padBracketsInside = False
## Add a space between the directive and the opening bracket
self.padBracketsBefore = False
self.indentChar = ' ' * 4
## Don't indent comments
self.preserveComments = False
if self.optionList:
self.process()
def process(self):
if os.path.exists(self.optionList[0]):
self.fileName = self.optionList[0]
self.codeSource = open(self.optionList[0], "rb")
self.optionList = self.optionList[1:]
if self.optionList:
if set(['--pad-inside', '-pi']) & set(self.optionList):
self.padInside = True
elif set(['--pad-before', '-pb']) & set(self.optionList):
self.padBefore = True
elif set(['--preserve-comments', '-pc']) & set(self.optionList):
self.preserveComments = True
def __str__(self):
return '''
+----------------------------------------
| Code source: %s
| Pad Inside: %s
| Pad Before: %s
| Indent Size: %s
| Preserve Comments: %s
+----------------------------------------
''' % (self.fileName, self.padBracketsInside, self.padBracketsBefore,
repr(self.indentChar), self.preserveComments)
def splitPreserve(string, sep = '\n'):
strList = string.split(sep)
if strList[-1] == "":
# If you split "this\nthat\n" you get ["this", "that", ""] if
# you add newlines to every string in the list you get
# ["this\n", "that\n", "\n"]. You've just added
# another newline at the end of the file.
del strList[-1]
strList = map(lambda x: x + sep, strList)
else:
# ["this", "that"] will become ["this\n", "that\n"] when
# mapped. A newline has been added to the file. We don't want
# this, so we strip it below.
strList = map(lambda x: x + sep, strList)
strList[-1] = strList[-1].rstrip(sep)
return strList
def tabularize(strList, delimeter = ":=", extraSpace = 3):
delimPos = []
maxLength = 0
for line in strList:
match = re.search("\s*" + re.escape(delimeter), line)
if match:
start = match.start()
maxLength = maxLength if maxLength > start else start
end = match.end()
left = line[0:start]
right = line[end:]
delimPos.append((left, right))
else:
delimPos.append(('', line))
maxLength += extraSpace
tabularized = []
for line in delimPos:
if line[0]:
tabularized.append(line[0].ljust(maxLength) + delimeter + line[1])
else:
tabularized.append(line[1])
return tabularized
def chooseChar(prevChar, currChar, nextChar, padBefore = False, padInside = False):
## FIXME: Prevent separation of the directive and the arguments
## Rules for copying a space
temp1 = currChar.isspace() and prevChar not in "\t (\n"
temp2 = currChar.isspace() and nextChar not in " )\t\n"
## Conditions for not copying a space
temp = temp1 and temp2
chosen = ''
if currChar not in ['\t', ' ', ''] or temp:
chosen = currChar
if chosen == '(' and padBefore and prevChar not in " \t":
## Add a space before opening bracket
return ' ' + chosen
elif prevChar == "(" and chosen != ' ' and padInside and (currChar not in '\n) '):
## Add a space after opening bracket
return ' ' + chosen
elif currChar == ")" and padInside and (prevChar not in '()\n'):
## Add a space before closing bracket
return ' ' + chosen
elif currChar in ":" and nextChar in "=" and prevChar not in "\t ":
## Add a space before the assignment operator
return ' ' + chosen
elif currChar in ":" and nextChar != ":" and prevChar not in "\t ":
## Add a space on both sides of the colon
return ' ' + chosen + ' '
elif currChar == "=" and nextChar not in "\t ":
return chosen + ' '
else:
return chosen
def removeExtraSpace(hlaCode):
'''
Compacts the code by removing as much space as possible without changing
the code's working or introducing syntax errors.
'''
hlaCode = re.sub("\r\n?","\n",hlaCode)
compactedCode = ""
inString = False
inCharacter = False
inLineComment = False
inMultiComment = False
charCount = len(hlaCode)
index = 0
padBeforeAtOpener = False
while charCount > index:
length = len(compactedCode)
prevChar = compactedCode[-1:length]
currChar = hlaCode[index]
nextChar = hlaCode[index+1:index+2]
index += 1
rest = hlaCode[index-1:]
linePortionLeft = rest[:rest.find("\n")]
[prevChar,
currChar, nextChar, inString, inCharacter, inMultiComment, inLineComment] = \
switchState(prevChar, currChar, nextChar, inString,
inCharacter,
## NOTE: passing the whole of string rest will greatly slow
## down the program, by about 1.6-2 seconds more
inMultiComment, inLineComment, rest[:10])
untouchable = inString or inCharacter or inLineComment or inMultiComment
if currChar == '(' and re.findall('^\(\s*$', rest, re.M):
## Add a space before the bracket if it's the last non whitespace
## character on the line
#print "-- ON at: %d `%s`: '%s'" % (index, prevChar, re.search('^\(\s*$', rest, re.M).start())
padBeforeAtOpener = True
if not ((currChar == '\n') and re.search('^\n*\s*[(]', rest)):
if not untouchable:
compactedCode += chooseChar(prevChar, currChar, nextChar, padBeforeAtOpener)
if currChar == '(':
padBeforeAtOpener = False
else:
compactedCode += currChar
elif not padBeforeAtOpener and prevChar != ' ':
## This variable is going to help with adding a space before an
## opening bracket that has been relocated from the brackets below.
padBeforeAtOpener = True
return compactedCode
def switchState(prevChar, currChar, nextChar, inString, inCharacter,
inMultiComment, inLineComment, linePortionLeft):
## A function to save me from repetition
if (prevChar == "\n" or not prevChar) and not (inString or inCharacter or inMultiComment):
inLineComment = False
if re.search("^\s*//", linePortionLeft) and not (inString or inCharacter or inMultiComment):
inLineComment = True
if (currChar + nextChar) == "/*" and not (inLineComment or inString or inCharacter):
inMultiComment = True
if (currChar + nextChar) == "*/" and not (inLineComment or inString or inCharacter):
inMultiComment = False
if currChar == '"' and not (inCharacter or inLineComment or inMultiComment):
if not inString:
inString = True
else:
inString = False
if currChar == "'" and not (inString or inLineComment or inMultiComment):
if not inCharacter:
inCharacter = True
else:
inCharacter = False
return (prevChar, currChar, nextChar, inString, inCharacter, inMultiComment, inLineComment)
def isDeclarationLine(string):
pass
class HLAFormatter(object):
def __init__(self, lineList, opts):
self.options = opts
self.codeLines = lineList
self.totalLines = len(self.codeLines)
self.formattedCode = ""
self.inCharacter = False
self.inString = False
self.inMultiComment = False
self.inLineComment = False
self.fileWritten = False
self.outFile = ""
def untouchable(self):
return self.inMultiComment or self.inLineComment or self.inCharacter or self.inString
def removeEmptyLines(self, lst):
res = []
for line in lst:
if not re.search("^\s*$", line):
res += [line]
return res+['\n']
def format(self):
linePos = 0
## Characters that delimit a keyword.
keywordStarters = "fwebgprtiusv"
beforeKeywordChars = " \t\n"
nextIndentLevel = 0
rx_keywordEnd = re.compile("[ \t\n-;(/:]+?")
## Match keywords.
rx_increaser = re.compile("^(while|for|begin|forever|try|repeat|if)$", re.VERBOSE | re.I)
rx_decreaser = re.compile("^(end|endwhile|endfor|until|endtry|endif)$", re.VERBOSE | re.I)
rx_halfIncreaser = re.compile("^(exception|else|elseif)$", re.I)
consecutiveEmptyLines = 0
inBeginBlock = False
fetchingDeclarations = False
declarations = []
preserveEmptyLine = False
while self.totalLines > linePos:
charsInLine = len(self.codeLines[linePos])
line = self.codeLines[linePos]
linePos += 1
charPos = 0
allWhitespace = False
if re.search('^\s*$', line):
allWhitespace = True
consecutiveEmptyLines += 1
else:
consecutiveEmptyLines = 0
currentIndentLevel = nextIndentLevel
if re.match("begin ", line):
fetchingDeclarations = False
#pprint.pprint(declarations)
declarations = self.removeEmptyLines(declarations)
fmt = map(lambda x: ' ' + x, tabularize(declarations, ":"))
self.formattedCode += ''.join(fmt)
declarations = []
if fetchingDeclarations:
declarations += [line]
continue
while charsInLine > charPos:
currChar = line[charPos]
nextChar = line[charPos+1:charPos+2]
prevChar = line[charPos-1:charPos]
rest = line[charPos:]
#assert not (charPos == 0 and prevChar == "\n")
#print "++ PrevChar: '%s'" % prevChar
token = ""
charPos += 1
[prevChar,
currChar, nextChar, self.inString, self.inCharacter,
self.inMultiComment, self.inLineComment] = \
switchState(prevChar, currChar, nextChar, self.inString,
self.inCharacter, self.inMultiComment, self.inLineComment, rest[:10])
## ~~~~~~~~~~~~~~~~~~~~~[ Get next word ]~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if not self.untouchable():
if currChar.lower() in keywordStarters and (prevChar in beforeKeywordChars or
prevChar == ""):
token = rest[:rx_keywordEnd.search(rest).end()]
token = rx_keywordEnd.sub("", token)
#if token:
#print "-$$$$ TOKEN (%d, %d): '%s' LEVEL: %d" % (linePos, charPos, token, currentIndentLevel)
if re.search("(var|static)", token):
fetchingDeclarations = True
if rx_increaser.search(token):
nextIndentLevel += 1
#print "-@ Increaser: '%s'" % (token)
elif rx_decreaser.search(token):
#print "-@ Decreaser: '%s'" % (token)
nextIndentLevel -= 1
currentIndentLevel = nextIndentLevel
elif rx_halfIncreaser.search(token):
#print "-- found halflife:", token, linePos
currentIndentLevel -= 1
if currChar == "(":
nextIndentLevel += 1
elif currChar == ")":
if charPos == 1:
currentIndentLevel -= 1
nextIndentLevel -= 1
#elif token == "program":
#print "-# Decreaser: '%s'" % (token)
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lim = 0
if preserveEmptyLine:
lim = 1
preserveEmptyLine = False
self.inLineComment = False
if consecutiveEmptyLines <= lim:
if not self.untouchable():
if re.search("^\s*$", line):
## Dont' indent empty lines
self.formattedCode += '\n'
else:
line = re.sub("^\s*", "", line)
self.formattedCode += self.options.indentChar * currentIndentLevel + line
else:
if re.search("^\s*//", line):
if self.options.preserveComments:
self.formattedCode += line
else:
line = re.sub("^\s*", "", line)
self.formattedCode += self.options.indentChar * currentIndentLevel + line
else:
self.formattedCode += line
if re.search("\s*(#include|end)\W*", line):
## An empty line after an end of block will not be deleted.
## Placed here so that it doesn't affect this line
preserveEmptyLine = True
assert not currentIndentLevel, currentIndentLevel
def displayResult(self):
print(self.formattedCode)
def writeToFile(self, fname = "formatted-output.hla", inPlace = True):
if inPlace and self.options.fileName != sys.stdin:
fname = self.options.fileName
with open(fname, "wb") as fd:
fd.write(self.formattedCode)
self.fileWritten = True
DEBUG = 0
def main():
optLst = [r"C:\mine\.hla\test.hla", '-pc']
optLst = sys.argv[1:]
opts = Options(optLst)
#print opts
content = ""
for line in opts.codeSource:
content += line
opts.codeSource.close()
content = removeExtraSpace(content)
codeLines = splitPreserve(content)
formatter = HLAFormatter(codeLines, opts)
#pprint.pprint(codeLines)
if not DEBUG:
formatter.format()
formatter.displayResult()
formatter.writeToFile()
import sys
import os
import re
import pprint
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment