Skip to content

Instantly share code, notes, and snippets.

@BigPrject
Created April 25, 2025 16:03
Show Gist options
  • Save BigPrject/48b656a7efeda0c382ce800428164871 to your computer and use it in GitHub Desktop.
Save BigPrject/48b656a7efeda0c382ce800428164871 to your computer and use it in GitHub Desktop.
fast list of lists string to np arr
import numpy as np
cimport numpy as np
from libc.stdint cimport uint16_t
from libc.stdlib cimport strtod
from cpython.unicode cimport PyUnicode_AsUTF8
cpdef np.ndarray[np.double_t, ndim=2] parse_list_list_str_to_floats(list[list[str]] data):
"""
Parses a list of lists of strings into a NumPy array of floats.
Parameters:
data (list of lists of strings): Input data, where each inner list contains strings.
Returns:
np.ndarray : NumPy array of shape (n, m) with dtype float64.
"""
cdef uint16_t i, j
cdef uint16_t n = len(data)
cdef uint16_t m = len(data[0])
cdef np.ndarray[np.double_t, ndim=2] result = np.empty((n, m), dtype=np.double)
cdef list[str] row
cdef double value
cdef str item
cdef const char* s
cdef char* endptr
if n == 0:
return np.empty((0, 0), dtype=np.float64)
for i in range(n):
row = data[i]
if len(row) != m:
raise ValueError("All inner lists must have the same length.")
for j in range(m):
item = row[j]
s = PyUnicode_AsUTF8(item)
value = strtod(s, &endptr)
if endptr == s:
raise ValueError(f"Invalid float value: '{item}' at position ({i}, {j}).")
result[i, j] = value
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment