Skip to content

Instantly share code, notes, and snippets.

@tigerhawkvok
Created March 1, 2022 00:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tigerhawkvok/7c718afb34876d5c59b1e73f7f190f02 to your computer and use it in GitHub Desktop.
Save tigerhawkvok/7c718afb34876d5c59b1e73f7f190f02 to your computer and use it in GitHub Desktop.
Take in a dirty SQL file formatted who knows how and get it mostly formatted
#!python3
"""
Quick and dirty SQL beautifier
"""
FILEPATH = "buildWDTables.sql"
TABSIZE = 4
#######################################################
import re
import os
import shutil
with open(FILEPATH) as fh:
fileText = fh.read()
outFile = ".".join(FILEPATH.split(".")[:-1]) + "_orig." + FILEPATH.split(".").pop()
i = 0
while os.path.isfile(outFile):
i += 1
outFile = ".".join(FILEPATH.split(".")[:-1]) + f"_orig{i}." + FILEPATH.split(".").pop()
shutil.copy(FILEPATH, outFile)
fileText = re.sub(r"(from|join) +(#?[\[\]a-z\.]+) ([a-z0-9]+)\s*($|(with)?\s*\(nolock\))", r'\1 \2 AS "\3" \4', fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(r"\s*(with)?\s*\(nolock\)", " WITH (NOLOCK)", fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(r"as +([a-z_0-9]+)((?: +--.*?|(?: +with)? *\(nolock\))? *[,;]?)$", r'AS "\1"\2', fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(r'as +"?([a-z_0-9]+)"?((?: +--.*?|(?: +with)? *\(nolock\))? *[,;]?)(on .*)$', 'AS "\\1"\\2\n\t\\3', fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(",([^ ])", r", \1", fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = fileText.replace("<>", "!=")
# Quoting aliases
fileText = re.sub(r",\s*((?:[a-z\_0-9\\-]+\.)?\[?[a-z\_0-9\\-]+\]?)[ \t]+\[?([a-z\_0-9\\-]+)\]?(?: \t)*$", r', \1 AS "\2"', fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(r"(\s*)((?:[a-z\_0-9\\-]+\.)?\[?[a-z\_0-9\\-]+\]?)[ \t]+\[?([a-z\_0-9\\-]+)\]?\s*,(?: \t)*$", r'\1\2 AS "\3",', fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = re.sub(r"(from|join)\s+((?:#?\[?[a-z\_0-9\-]+\]?\.?){1,3})[ \t]+\[?([a-z\_0-9\-]+)\]?[ \t]*$", r'\1 \2 AS "\3"', fileText, 0, re.IGNORECASE | re.MULTILINE)
# This may break poorly-formatted files
fileText = re.sub("from (#[a-z0-9_]+) +([a-z0-9_]+)[ \t]*$", 'FROM $1 AS "$2"', fileText, 0, re.IGNORECASE | re.MULTILINE)
while True:
fileTextNew = re.sub(r"^(?!20[0-9]{2})(.*?)([a-z0-9\(\)])(\+|-|/|\*|>=|<=|>|<|=|!=)([^ \-])", r"\1\2 \3 \4", fileText, 0, re.IGNORECASE | re.MULTILINE)
if fileTextNew == fileText:
break
fileText = fileTextNew
fileText = re.sub(r"'(\d+) / (\d+) / (\d+)'", r"'\1/\2/\3'", fileText, 0, re.IGNORECASE | re.MULTILINE)
def computeReplacement(matchObj):
"""Handle the case replacements"""
start, _ = matchObj.span()
lineLength = 0
isCommented = False
matchString = matchObj.group()
for line in matchObj.string.splitlines():
if lineLength + len(line) > start:
# Match is on this line
if line.find("--") > -1 and line.find("--") + lineLength < start:
# Line is commented
isCommented = True
if re.search(f"""['"][^'"]*?({re.escape(matchString)})[^'"]*?['"]""", line, re.IGNORECASE):
# Match is in a string
isCommented = True # Not exactly but kinda
break
lineLength += len(line) + 1
return matchString.upper() if not isCommented else matchString
fileText = re.sub(r"(?!--)((?:\s+|^)(?:over|partition|order|left|join|union|select|from|isnull|cast|float|int|varchar|getdate|datetime|is|null|case|when|else|then|as|with|nolock|outer|where|group|by|having|top|into|drop|table|sum|and|end|in|not|on|or|nullif|exists|exists|dateadd|count|distinct|log|is not null|left join|inner join)(?:\s+|\(|\)|$))", computeReplacement, fileText, 0, re.IGNORECASE | re.MULTILINE)
fileText = fileText.replace("\t", " " * TABSIZE)
fileText = re.sub("'([0-9]{4}) - ([0-9]{2}) - ([0-9]{2})", r"'\1-\2-\3", fileText, 0, re.IGNORECASE | re.MULTILINE)
with open(FILEPATH, "w", encoding= "utf-8", newline= "\n") as fh:
# Write it back out, fixing newlines, encoding, trailing spaces,
# and adding a terminal newline
fh.write("\n".join([line.rstrip() for line in fileText.splitlines()]) + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment