Skip to content

Instantly share code, notes, and snippets.

@Jhsmit
Last active April 30, 2020 10:47
Show Gist options
  • Save Jhsmit/3b066e1cecd27f769ee679094f7e14cd to your computer and use it in GitHub Desktop.
Save Jhsmit/3b066e1cecd27f769ee679094f7e14cd to your computer and use it in GitHub Desktop.
Generate formatting string for saving numpy recarrays to file
import numpy as np
import string
def _get_f_width(data, sign):
i = 1 if sign else 0
with np.testing.suppress_warnings() as sup:
sup.filter(RuntimeWarning)
w_pos = np.log10(np.nanmax(data)) + i
w_neg = np.log10(np.nanmax(-data)) + 1
w = np.nanmax([w_pos, w_neg]) + 1
return int(np.floor(w))
def fmt_export(arr, delimiter='\t', header=True, sig_fig=2, width='auto', justify='left', sign=False, pad=''):
flag1 = '' if justify != 'left' else '-'
flag2 = '+' if sign else ''
flag3 = '0' if pad == '0' else ''
fmt = []
hdr = []
for j, name in enumerate(arr.dtype.names):
dtype = arr[name].dtype
if dtype.kind in ['b']:
specifier = 'i'
precision = ''
w = 4 if np.all(arr[name]) else 5
elif dtype.kind in ['i', 'u']:
specifier = 'i'
precision = ''
w = _get_f_width(arr[name], sign)
elif dtype.kind in ['f', 'c']:
specifier = 'g'
precision = '.' + str(sig_fig)
# float notation width
w_f = _get_f_width(arr[name], sign) + sig_fig
#w_f = min(sig_fig, w_f)
# scientific notation width
i = 1 if sign or np.any(arr[name] < 0) else 0
w_s = sig_fig + 4 + i + 1 # +1 for decimal point which is not always needed
print(w_f, w_s)
w = min(w_f, w_s) + 1
elif dtype.kind in ['U', 'S', 'O']:
specifier = 's'
precision = ''
w = np.max([len(str(item)) for item in arr[name]])
else:
raise TypeError(f'Invalid dtype kind {dtype.kind} for field {name}')
if width == 'auto':
col_w = w
elif isinstance(width, int):
col_w = width
else:
raise ValueError('Invalid width')
if header:
i = 2 if j == 0 else 0 # Additional space for header comment #
if width == 'auto':
_width = max(col_w, len(name) + i)
elif isinstance(width, int):
_width = col_w
func = str.ljust if justify == 'left' else str.rjust
fill = flag3 if flag3 else ' '
h = func(name, _width - i, fill)
hdr.append(h)
else:
_width = col_w
s = f'%{flag1}{flag2}{flag3}{_width}{precision}{specifier}'
fmt.append(s)
fmt = delimiter.join(fmt)
hdr = delimiter.join(hdr)
return fmt, hdr
# Generate testing data and test saving to txt
np.random.seed(43)
size = 10
dtype =[('x', int), ('y1', int), ('y2', float), ('bools', bool), ('verylongnamewithshortcontent', 'U7'),
('bytes', 'S20'), ('objects', object)]
data = np.empty(size, dtype=dtype)
data['x'] = np.array(range(10))
data['y1'] = (np.logspace(1, 5, num=size) / (np.random.rand(size) - 0.5)).astype(int)
data['y2'] = 10000*np.random.rand(size)
data['bools'] = np.random.rand(10) > 0.5
data['verylongnamewithshortcontent'][:] = 'a'
data['bytes'] = [''.join(np.random.choice(list(string.ascii_letters), np.random.randint(1, 20))) for _ in range(size)]
data['objects'][:] = {'key1': 12423, 'key2': 'asdfjkl;'}
fmt, header = fmt_export(data, delimiter='\t', header=True, sign=False, justify='right', pad='', width='auto')
np.savetxt('test.txt', data, fmt=fmt, header=header)
# Reading back the file to numpy
def auto_read(file_path, delimiter='\t'):
with open(file_path, 'r') as f:
header = f.readline()
if header.startswith('#'):
names = header[2:].split(delimiter)
else:
names=None
return np.genfromtxt(file_path, dtype=None, names=names, skip_header=1, delimiter=delimiter, encoding=None, autostrip=True)
read = np.genfromtxt('test.txt')
read = auto_read('test.txt')
print(read.dtype)
print(data.dtype)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment