Skip to content

Instantly share code, notes, and snippets.

@pellicceama
Last active February 1, 2019 12:39
Show Gist options
  • Save pellicceama/676fc1fcdd325b01a65dde0f3013c432 to your computer and use it in GitHub Desktop.
Save pellicceama/676fc1fcdd325b01a65dde0f3013c432 to your computer and use it in GitHub Desktop.
Parses excel files using openpyx, infers column data types and prints them to the console
#!/usr/local/bin/python
import sys
from openpyxl import load_workbook
filename=sys.argv[1]
headers_row=sys.argv[2]
data_row=sys.argv[3]
print "Parsing Filename: ", filename
print "Parsing headers_row: ", headers_row
print "Parsing data_row: " , data_row
wb = load_workbook(filename = filename)
ws = wb.active
print "Number of columns: " , len(ws[headers_row])
A_UPPERCASE = ord('A')
ALPHABET_SIZE = 26
def _decompose(number):
# Generate digits from `number` in base alphabet, least significants
# bits first.
# Since A is 1 rather than 0 in base alphabet, we are dealing with
# `number - 1` at each iteration to be able to extract the proper digits.
while number:
number, remainder = divmod(number - 1, ALPHABET_SIZE)
yield remainder
def base_10_to_alphabet(number):
# Convert a decimal number to its base alphabet representation
return ''.join(
chr(A_UPPERCASE + part)
for part in _decompose(number)
)[::-1]
i = 1
while i <= len(ws[headers_row]):
letter = base_10_to_alphabet(i)
print 'Column: ' + str(ws[letter + headers_row].value) + ' Type: ' + ws[letter + data_row].data_type + ' Inferred: ' + str(str(ws[letter + data_row].value) != 'None') + ' Example: ' + str(ws[letter + data_row].value)
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment