benjamindoron/guid_converter_base10.py

## guid_converter_base10.py
#!/bin/env python

import os
import re
import sys


compliance_test = re.compile(r"^\t\"[\w\d]+\": \[ [\d, ]+\],$")

# TODO: Consider offering to parse a directory of inputs
if len(sys.argv) != 3:
	print("Usage:", sys.argv[0], "<input> <output>")
	os._exit(1)


# Operating on separate files at present; TODO: add feature later
guidfile_input = open(sys.argv[1], 'r')
guidfile_data = guidfile_input.read()
guidfile_output = open(sys.argv[2], 'w')


# Normalise input for compliance, seems fairly durable against input
## Change quotation style
guidfile_data = guidfile_data.replace('\'', '\"')

## Strip comments
guidfile_data = re.sub(r"\s*# [^,\n]+", "", guidfile_data)

## Remove array name from update_edk2_guids.py python output
guidfile_data = re.sub(r".*([{}]).*", "\\1", guidfile_data)

## Normalise JSON definition whitespace
guidfile_data = re.sub(r"\"\s*([\w\d]+)\s*\"\s*:\s*\[", "\"\\1\": [", guidfile_data.replace('  ', ' '))

## Separate nybbles with whitespace
guidfile_data = re.sub(r"([\[,])0x", "\\1 0x", guidfile_data)

## Separate final nybble with whitespace
guidfile_data = guidfile_data.replace('],', ' ],').replace('  ', ' ')


# HACK: Initialise word bound workaround
# - String replacement is many times faster than regex replacement.
#   While the find-replace-all algorithm may need some work (it's possibly
#   quadratic time), this is a necessary optimisation of the method
guidfile_data = guidfile_data.replace(' ', 'start_wordboundHACK_space')
guidfile_data = guidfile_data.replace(',', 'end_wordboundHACK_comma')


# Strip word bound and comma delimeter from bytes before tokenising
for word in guidfile_data.replace('start_wordboundHACK_space', ' ').replace('end_wordboundHACK_comma', ',').replace(',', '').split():
	try:
		# Convert singular token and replace workaround
		guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'end_wordboundHACK_comma']), "".join([' ', str(int(word, 16)), ',']))
		# Also catch the final nybbles
		guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'start_wordboundHACK_space']), "".join([' ', str(int(word, 16)), ' ']))
	except:
		continue


# HACK: End word bound workaround
guidfile_data = guidfile_data.replace('start_wordboundHACK_space', ' ')
guidfile_data = guidfile_data.replace('end_wordboundHACK_comma', ',')


for line in guidfile_data.splitlines():
	if re.match(compliance_test, line) == None:
		print("The following line fails compliance testing, please validate!")
		print(line)
	# NOTE: `re.match()` operates from the beginning of a line
	if re.search(r"\w*[A-Z]{2,}\w*", line) != None or re.search(r"\"[a-z]*\"", line) != None:
		print("The following line has potentially problematic capitalisation, please validate!")
		print(line)


guidfile_output.write(guidfile_data)

guidfile_output.close()
guidfile_input.close()
	#!/bin/env python

	import os
	import re
	import sys


	compliance_test = re.compile(r"^\t\"[\w\d]+\": \[ [\d, ]+\],$")

	# TODO: Consider offering to parse a directory of inputs
	if len(sys.argv) != 3:
	print("Usage:", sys.argv[0], "<input> <output>")
	os._exit(1)


	# Operating on separate files at present; TODO: add feature later
	guidfile_input = open(sys.argv[1], 'r')
	guidfile_data = guidfile_input.read()
	guidfile_output = open(sys.argv[2], 'w')


	# Normalise input for compliance, seems fairly durable against input
	## Change quotation style
	guidfile_data = guidfile_data.replace('\'', '\"')

	## Strip comments
	guidfile_data = re.sub(r"\s*# [^,\n]+", "", guidfile_data)

	## Remove array name from update_edk2_guids.py python output
	guidfile_data = re.sub(r".([{}]).", "\\1", guidfile_data)

	## Normalise JSON definition whitespace
	guidfile_data = re.sub(r"\"\s([\w\d]+)\s\"\s:\s\[", "\"\\1\": [", guidfile_data.replace(' ', ' '))

	## Separate nybbles with whitespace
	guidfile_data = re.sub(r"([\[,])0x", "\\1 0x", guidfile_data)

	## Separate final nybble with whitespace
	guidfile_data = guidfile_data.replace('],', ' ],').replace(' ', ' ')


	# HACK: Initialise word bound workaround
	# - String replacement is many times faster than regex replacement.
	# While the find-replace-all algorithm may need some work (it's possibly
	# quadratic time), this is a necessary optimisation of the method
	guidfile_data = guidfile_data.replace(' ', 'start_wordboundHACK_space')
	guidfile_data = guidfile_data.replace(',', 'end_wordboundHACK_comma')


	# Strip word bound and comma delimeter from bytes before tokenising
	for word in guidfile_data.replace('start_wordboundHACK_space', ' ').replace('end_wordboundHACK_comma', ',').replace(',', '').split():
	try:
	# Convert singular token and replace workaround
	guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'end_wordboundHACK_comma']), "".join([' ', str(int(word, 16)), ',']))
	# Also catch the final nybbles
	guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'start_wordboundHACK_space']), "".join([' ', str(int(word, 16)), ' ']))
	except:
	continue


	# HACK: End word bound workaround
	guidfile_data = guidfile_data.replace('start_wordboundHACK_space', ' ')
	guidfile_data = guidfile_data.replace('end_wordboundHACK_comma', ',')


	for line in guidfile_data.splitlines():
	if re.match(compliance_test, line) == None:
	print("The following line fails compliance testing, please validate!")
	print(line)
	# NOTE: `re.match()` operates from the beginning of a line
	if re.search(r"\w[A-Z]{2,}\w", line) != None or re.search(r"\"[a-z]*\"", line) != None:
	print("The following line has potentially problematic capitalisation, please validate!")
	print(line)


	guidfile_output.write(guidfile_data)

	guidfile_output.close()
	guidfile_input.close()