Created
November 13, 2019 20:03
-
-
Save rochacbruno/059e1223f5b69d3b99b509a4962cf3aa to your computer and use it in GitHub Desktop.
Read fixed width text file in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
from operator import itemgetter | |
from pathlib import Path | |
def get_struct_unpacker(fieldspecs, istart, iwidth): | |
""" | |
Build the format string for struct.unpack to use, based on the fieldspecs. | |
fieldspecs is a list of [name, start, width] arrays. | |
Returns a string like "6s2s3s7x7s4x9s". | |
""" | |
unpack_len = 0 | |
unpack_fmt = "" | |
for fieldspec in fieldspecs: | |
start = fieldspec[istart] - 1 | |
end = start + fieldspec[iwidth] | |
if start > unpack_len: | |
unpack_fmt += str(start - unpack_len) + "x" | |
unpack_fmt += str(end - start) + "s" | |
unpack_len = end | |
struct_unpacker = struct.Struct(unpack_fmt).unpack_from | |
return struct_unpacker | |
fieldspecs = [ | |
# Name, Start, Width, Type | |
["ID", 1, 6, int], | |
["NAME", 9, 14, str], | |
["Twitter", 24, 13, str] | |
] | |
iname, istart, iwidth, itype = 0, 1, 2, 3 # field indexes | |
fieldspecs.sort(key=itemgetter(istart)) | |
struct_unpacker = get_struct_unpacker(fieldspecs, istart, iwidth) | |
field_indices = range(len(fieldspecs)) | |
data = [] | |
for line in Path('./data.txt').open(): | |
raw_fields = struct_unpacker(line.encode()) # split line into field values | |
line_data = {} | |
for i in field_indices: | |
fieldspec = fieldspecs[i] | |
fieldname = fieldspec[iname] | |
cast = fieldspec[itype] | |
value = cast(raw_fields[i].decode().strip()) | |
line_data[fieldname] = value | |
data.append(line_data) | |
print(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123456 Bruno Rocha @rochacbruno | |
456789 Rodolfo Viana @rodolfoviana |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'ID': 123456, 'NAME': 'Bruno Rocha', 'Twitter': '@rochacbruno'}, {'ID': 456789, 'NAME': 'Rodolfo Viana', 'Twitter': '@rodolfoviana'}] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
New to Python but Like to see how to take this result and Insert into MSSQL table. I have file with over 1600 columns which I can get start point and width and if this code can read my question is what should I do or use in order to insert into a table. TIA