Last active
March 26, 2020 17:36
-
-
Save zachselby/6507e4a8cb0190daf3543e3fc9e45331 to your computer and use it in GitHub Desktop.
Bash Script: Format Markdown Tables from Input Using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Source copied from: https://leancrew.com/all-this/2014/06/cleaning-up-my-markdown-table-cleanup-script/ | |
# Usage - stdin from file: | |
# $ cat sample.md | |
#| Column 1 | Column 2 | Column 3 | | |
#|--|:--:|--:| | |
#| first | second | third | | |
#| column | column | column | | |
#| left | center | right | | |
# | |
# $ ./md-clean-table.py < sample.md | |
#| Column 1 | Column 2 | Column 3 | | |
#|:---------|:--------:|---------:| | |
#| first | second | third | | |
#| column | column | column | | |
#| left | center | right | | |
# | |
# Usage - Pasting values between Here Document <<EOF | |
# $ ( | |
# > cat <<EOF | |
# > | Column 1 | Column 2 | Column 3 | | |
# > |--|:--:|--:| | |
# > | first | second | third | | |
# > | column | column | column | | |
# > | left | center | right | | |
# > EOF | |
# > ) | ./md-clean-table.py | |
#| Column 1 | Column 2 | Column 3 | | |
#|:---------|:--------:|---------:| | |
#| first | second | third | | |
#| column | column | column | | |
#| left | center | right | | |
import sys | |
def just(string, type, n): | |
"Justify a string to length n according to type." | |
if type == '::': | |
return string.center(n) | |
elif type == '-:': | |
return string.rjust(n) | |
elif type == ':-': | |
return string.ljust(n) | |
else: | |
return string | |
def normtable(text): | |
"Aligns the vertical bars in a text table." | |
# Start by turning the text into a list of lines. | |
lines = text.splitlines() | |
rows = len(lines) | |
formatrow = 0 | |
# Figure out the cell formatting. | |
# First, find the separator line. | |
for i in range(rows): | |
if set(lines[i]).issubset('|:.- '): | |
formatline = lines[i] | |
formatrow = i | |
break | |
# Delete the separator line from the content. | |
del lines[formatrow] | |
# Determine how each column is to be justified. | |
formatline = formatline.strip(' ') | |
if formatline[0] == '|': formatline = formatline[1:] | |
if formatline[-1] == '|': formatline = formatline[:-1] | |
fstrings = formatline.split('|') | |
justify = [] | |
for cell in fstrings: | |
ends = cell[0] + cell[-1] | |
if ends == '::': | |
justify.append('::') | |
elif ends == '-:': | |
justify.append('-:') | |
else: | |
justify.append(':-') | |
# Assume the number of columns in the separator line is the number | |
# for the entire table. | |
columns = len(justify) | |
# Extract the content into a matrix. | |
content = [] | |
for line in lines: | |
line = line.strip(' ') | |
if line[0] == '|': line = line[1:] | |
if line[-1] == '|': line = line[:-1] | |
cells = line.split('|') | |
# Put exactly one space at each end as "bumpers." | |
linecontent = [ ' ' + x.strip() + ' ' for x in cells ] | |
content.append(linecontent) | |
# Append cells to rows that don't have enough. | |
rows = len(content) | |
for i in range(rows): | |
while len(content[i]) < columns: | |
content[i].append('') | |
# Get the width of the content in each column. The minimum width will | |
# be 2, because that's the shortest length of a formatting string and | |
# because that matches an empty column with "bumper" spaces. | |
widths = [2] * columns | |
for row in content: | |
for i in range(columns): | |
widths[i] = max(len(row[i]), widths[i]) | |
# Add whitespace to make all the columns the same width and | |
formatted = [] | |
for row in content: | |
formatted.append('|' + '|'.join([ just(s, t, n) for (s, t, n) in zip(row, justify, widths) ]) + '|') | |
# Recreate the format line with the appropriate column widths. | |
formatline = '|' + '|'.join([ s[0] + '-'*(n-2) + s[-1] for (s, n) in zip(justify, widths) ]) + '|' | |
# Insert the formatline back into the table. | |
formatted.insert(formatrow, formatline) | |
# Return the formatted table. | |
return '\n'.join(formatted) | |
# Read the input, process, and print. | |
unformatted = unicode(sys.stdin.read(), "utf-8") | |
print normtable(unformatted) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment