Created
March 7, 2014 18:24
-
-
Save nkmathew/9416938 to your computer and use it in GitHub Desktop.
Primitive code formatter for the HLA(High Level Assembly) programming language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
@Started: 1st March 2014 1630 | |
@Completed: 5th March 2014 0800 | |
A hacked up script to help you survive through the Art of Assembly | |
Programming's rather awful formatting. | |
Alas, it has many flaws. It'll be helpful with only the most trivial of scripts. | |
''' | |
## FIXME: Add a list of keywords that should not have newlines after them removed. | |
## FIXME: Options to control removal of empty lines in multiline comments | |
## A newline will be added before the line in which the keywords below are at | |
NEWLINE_BEFORE = ['program', | |
'static', | |
'while', | |
'repeat', | |
'endwhile', | |
'end', | |
'until'] | |
## A newline will be added after the line in which the keywords below are at | |
NEWLINE_AFTER = ['program', | |
'static', | |
'while', | |
'repeat', | |
'endwhile', | |
'end', | |
'until'] | |
declarationKeywords = ['constant', 'storage', 'static'] | |
class Options: | |
def __init__(self, lst): | |
self.optionList = lst | |
self.codeSource = sys.stdin | |
self.fileName = sys.stdin | |
## Add spaces inside like most HLA code | |
self.padBracketsInside = False | |
## Add a space between the directive and the opening bracket | |
self.padBracketsBefore = False | |
self.indentChar = ' ' * 4 | |
## Don't indent comments | |
self.preserveComments = False | |
if self.optionList: | |
self.process() | |
def process(self): | |
if os.path.exists(self.optionList[0]): | |
self.fileName = self.optionList[0] | |
self.codeSource = open(self.optionList[0], "rb") | |
self.optionList = self.optionList[1:] | |
if self.optionList: | |
if set(['--pad-inside', '-pi']) & set(self.optionList): | |
self.padInside = True | |
elif set(['--pad-before', '-pb']) & set(self.optionList): | |
self.padBefore = True | |
elif set(['--preserve-comments', '-pc']) & set(self.optionList): | |
self.preserveComments = True | |
def __str__(self): | |
return ''' | |
+---------------------------------------- | |
| Code source: %s | |
| Pad Inside: %s | |
| Pad Before: %s | |
| Indent Size: %s | |
| Preserve Comments: %s | |
+---------------------------------------- | |
''' % (self.fileName, self.padBracketsInside, self.padBracketsBefore, | |
repr(self.indentChar), self.preserveComments) | |
def splitPreserve(string, sep = '\n'): | |
strList = string.split(sep) | |
if strList[-1] == "": | |
# If you split "this\nthat\n" you get ["this", "that", ""] if | |
# you add newlines to every string in the list you get | |
# ["this\n", "that\n", "\n"]. You've just added | |
# another newline at the end of the file. | |
del strList[-1] | |
strList = map(lambda x: x + sep, strList) | |
else: | |
# ["this", "that"] will become ["this\n", "that\n"] when | |
# mapped. A newline has been added to the file. We don't want | |
# this, so we strip it below. | |
strList = map(lambda x: x + sep, strList) | |
strList[-1] = strList[-1].rstrip(sep) | |
return strList | |
def tabularize(strList, delimeter = ":=", extraSpace = 3): | |
delimPos = [] | |
maxLength = 0 | |
for line in strList: | |
match = re.search("\s*" + re.escape(delimeter), line) | |
if match: | |
start = match.start() | |
maxLength = maxLength if maxLength > start else start | |
end = match.end() | |
left = line[0:start] | |
right = line[end:] | |
delimPos.append((left, right)) | |
else: | |
delimPos.append(('', line)) | |
maxLength += extraSpace | |
tabularized = [] | |
for line in delimPos: | |
if line[0]: | |
tabularized.append(line[0].ljust(maxLength) + delimeter + line[1]) | |
else: | |
tabularized.append(line[1]) | |
return tabularized | |
def chooseChar(prevChar, currChar, nextChar, padBefore = False, padInside = False): | |
## FIXME: Prevent separation of the directive and the arguments | |
## Rules for copying a space | |
temp1 = currChar.isspace() and prevChar not in "\t (\n" | |
temp2 = currChar.isspace() and nextChar not in " )\t\n" | |
## Conditions for not copying a space | |
temp = temp1 and temp2 | |
chosen = '' | |
if currChar not in ['\t', ' ', ''] or temp: | |
chosen = currChar | |
if chosen == '(' and padBefore and prevChar not in " \t": | |
## Add a space before opening bracket | |
return ' ' + chosen | |
elif prevChar == "(" and chosen != ' ' and padInside and (currChar not in '\n) '): | |
## Add a space after opening bracket | |
return ' ' + chosen | |
elif currChar == ")" and padInside and (prevChar not in '()\n'): | |
## Add a space before closing bracket | |
return ' ' + chosen | |
elif currChar in ":" and nextChar in "=" and prevChar not in "\t ": | |
## Add a space before the assignment operator | |
return ' ' + chosen | |
elif currChar in ":" and nextChar != ":" and prevChar not in "\t ": | |
## Add a space on both sides of the colon | |
return ' ' + chosen + ' ' | |
elif currChar == "=" and nextChar not in "\t ": | |
return chosen + ' ' | |
else: | |
return chosen | |
def removeExtraSpace(hlaCode): | |
''' | |
Compacts the code by removing as much space as possible without changing | |
the code's working or introducing syntax errors. | |
''' | |
hlaCode = re.sub("\r\n?","\n",hlaCode) | |
compactedCode = "" | |
inString = False | |
inCharacter = False | |
inLineComment = False | |
inMultiComment = False | |
charCount = len(hlaCode) | |
index = 0 | |
padBeforeAtOpener = False | |
while charCount > index: | |
length = len(compactedCode) | |
prevChar = compactedCode[-1:length] | |
currChar = hlaCode[index] | |
nextChar = hlaCode[index+1:index+2] | |
index += 1 | |
rest = hlaCode[index-1:] | |
linePortionLeft = rest[:rest.find("\n")] | |
[prevChar, | |
currChar, nextChar, inString, inCharacter, inMultiComment, inLineComment] = \ | |
switchState(prevChar, currChar, nextChar, inString, | |
inCharacter, | |
## NOTE: passing the whole of string rest will greatly slow | |
## down the program, by about 1.6-2 seconds more | |
inMultiComment, inLineComment, rest[:10]) | |
untouchable = inString or inCharacter or inLineComment or inMultiComment | |
if currChar == '(' and re.findall('^\(\s*$', rest, re.M): | |
## Add a space before the bracket if it's the last non whitespace | |
## character on the line | |
#print "-- ON at: %d `%s`: '%s'" % (index, prevChar, re.search('^\(\s*$', rest, re.M).start()) | |
padBeforeAtOpener = True | |
if not ((currChar == '\n') and re.search('^\n*\s*[(]', rest)): | |
if not untouchable: | |
compactedCode += chooseChar(prevChar, currChar, nextChar, padBeforeAtOpener) | |
if currChar == '(': | |
padBeforeAtOpener = False | |
else: | |
compactedCode += currChar | |
elif not padBeforeAtOpener and prevChar != ' ': | |
## This variable is going to help with adding a space before an | |
## opening bracket that has been relocated from the brackets below. | |
padBeforeAtOpener = True | |
return compactedCode | |
def switchState(prevChar, currChar, nextChar, inString, inCharacter, | |
inMultiComment, inLineComment, linePortionLeft): | |
## A function to save me from repetition | |
if (prevChar == "\n" or not prevChar) and not (inString or inCharacter or inMultiComment): | |
inLineComment = False | |
if re.search("^\s*//", linePortionLeft) and not (inString or inCharacter or inMultiComment): | |
inLineComment = True | |
if (currChar + nextChar) == "/*" and not (inLineComment or inString or inCharacter): | |
inMultiComment = True | |
if (currChar + nextChar) == "*/" and not (inLineComment or inString or inCharacter): | |
inMultiComment = False | |
if currChar == '"' and not (inCharacter or inLineComment or inMultiComment): | |
if not inString: | |
inString = True | |
else: | |
inString = False | |
if currChar == "'" and not (inString or inLineComment or inMultiComment): | |
if not inCharacter: | |
inCharacter = True | |
else: | |
inCharacter = False | |
return (prevChar, currChar, nextChar, inString, inCharacter, inMultiComment, inLineComment) | |
def isDeclarationLine(string): | |
pass | |
class HLAFormatter(object): | |
def __init__(self, lineList, opts): | |
self.options = opts | |
self.codeLines = lineList | |
self.totalLines = len(self.codeLines) | |
self.formattedCode = "" | |
self.inCharacter = False | |
self.inString = False | |
self.inMultiComment = False | |
self.inLineComment = False | |
self.fileWritten = False | |
self.outFile = "" | |
def untouchable(self): | |
return self.inMultiComment or self.inLineComment or self.inCharacter or self.inString | |
def removeEmptyLines(self, lst): | |
res = [] | |
for line in lst: | |
if not re.search("^\s*$", line): | |
res += [line] | |
return res+['\n'] | |
def format(self): | |
linePos = 0 | |
## Characters that delimit a keyword. | |
keywordStarters = "fwebgprtiusv" | |
beforeKeywordChars = " \t\n" | |
nextIndentLevel = 0 | |
rx_keywordEnd = re.compile("[ \t\n-;(/:]+?") | |
## Match keywords. | |
rx_increaser = re.compile("^(while|for|begin|forever|try|repeat|if)$", re.VERBOSE | re.I) | |
rx_decreaser = re.compile("^(end|endwhile|endfor|until|endtry|endif)$", re.VERBOSE | re.I) | |
rx_halfIncreaser = re.compile("^(exception|else|elseif)$", re.I) | |
consecutiveEmptyLines = 0 | |
inBeginBlock = False | |
fetchingDeclarations = False | |
declarations = [] | |
preserveEmptyLine = False | |
while self.totalLines > linePos: | |
charsInLine = len(self.codeLines[linePos]) | |
line = self.codeLines[linePos] | |
linePos += 1 | |
charPos = 0 | |
allWhitespace = False | |
if re.search('^\s*$', line): | |
allWhitespace = True | |
consecutiveEmptyLines += 1 | |
else: | |
consecutiveEmptyLines = 0 | |
currentIndentLevel = nextIndentLevel | |
if re.match("begin ", line): | |
fetchingDeclarations = False | |
#pprint.pprint(declarations) | |
declarations = self.removeEmptyLines(declarations) | |
fmt = map(lambda x: ' ' + x, tabularize(declarations, ":")) | |
self.formattedCode += ''.join(fmt) | |
declarations = [] | |
if fetchingDeclarations: | |
declarations += [line] | |
continue | |
while charsInLine > charPos: | |
currChar = line[charPos] | |
nextChar = line[charPos+1:charPos+2] | |
prevChar = line[charPos-1:charPos] | |
rest = line[charPos:] | |
#assert not (charPos == 0 and prevChar == "\n") | |
#print "++ PrevChar: '%s'" % prevChar | |
token = "" | |
charPos += 1 | |
[prevChar, | |
currChar, nextChar, self.inString, self.inCharacter, | |
self.inMultiComment, self.inLineComment] = \ | |
switchState(prevChar, currChar, nextChar, self.inString, | |
self.inCharacter, self.inMultiComment, self.inLineComment, rest[:10]) | |
## ~~~~~~~~~~~~~~~~~~~~~[ Get next word ]~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
if not self.untouchable(): | |
if currChar.lower() in keywordStarters and (prevChar in beforeKeywordChars or | |
prevChar == ""): | |
token = rest[:rx_keywordEnd.search(rest).end()] | |
token = rx_keywordEnd.sub("", token) | |
#if token: | |
#print "-$$$$ TOKEN (%d, %d): '%s' LEVEL: %d" % (linePos, charPos, token, currentIndentLevel) | |
if re.search("(var|static)", token): | |
fetchingDeclarations = True | |
if rx_increaser.search(token): | |
nextIndentLevel += 1 | |
#print "-@ Increaser: '%s'" % (token) | |
elif rx_decreaser.search(token): | |
#print "-@ Decreaser: '%s'" % (token) | |
nextIndentLevel -= 1 | |
currentIndentLevel = nextIndentLevel | |
elif rx_halfIncreaser.search(token): | |
#print "-- found halflife:", token, linePos | |
currentIndentLevel -= 1 | |
if currChar == "(": | |
nextIndentLevel += 1 | |
elif currChar == ")": | |
if charPos == 1: | |
currentIndentLevel -= 1 | |
nextIndentLevel -= 1 | |
#elif token == "program": | |
#print "-# Decreaser: '%s'" % (token) | |
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
lim = 0 | |
if preserveEmptyLine: | |
lim = 1 | |
preserveEmptyLine = False | |
self.inLineComment = False | |
if consecutiveEmptyLines <= lim: | |
if not self.untouchable(): | |
if re.search("^\s*$", line): | |
## Dont' indent empty lines | |
self.formattedCode += '\n' | |
else: | |
line = re.sub("^\s*", "", line) | |
self.formattedCode += self.options.indentChar * currentIndentLevel + line | |
else: | |
if re.search("^\s*//", line): | |
if self.options.preserveComments: | |
self.formattedCode += line | |
else: | |
line = re.sub("^\s*", "", line) | |
self.formattedCode += self.options.indentChar * currentIndentLevel + line | |
else: | |
self.formattedCode += line | |
if re.search("\s*(#include|end)\W*", line): | |
## An empty line after an end of block will not be deleted. | |
## Placed here so that it doesn't affect this line | |
preserveEmptyLine = True | |
assert not currentIndentLevel, currentIndentLevel | |
def displayResult(self): | |
print(self.formattedCode) | |
def writeToFile(self, fname = "formatted-output.hla", inPlace = True): | |
if inPlace and self.options.fileName != sys.stdin: | |
fname = self.options.fileName | |
with open(fname, "wb") as fd: | |
fd.write(self.formattedCode) | |
self.fileWritten = True | |
DEBUG = 0 | |
def main(): | |
optLst = [r"C:\mine\.hla\test.hla", '-pc'] | |
optLst = sys.argv[1:] | |
opts = Options(optLst) | |
#print opts | |
content = "" | |
for line in opts.codeSource: | |
content += line | |
opts.codeSource.close() | |
content = removeExtraSpace(content) | |
codeLines = splitPreserve(content) | |
formatter = HLAFormatter(codeLines, opts) | |
#pprint.pprint(codeLines) | |
if not DEBUG: | |
formatter.format() | |
formatter.displayResult() | |
formatter.writeToFile() | |
import sys | |
import os | |
import re | |
import pprint | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment