-
-
Save devdanzin/591394b798b38a332ec3ecdef0f6f082 to your computer and use it in GitHub Desktop.
A trivial Python C extension to map character positions to line numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import line_tuple | |
text = """\ | |
line 1 | |
line 2 | |
line 3 | |
""" | |
line_positions = line_tuple.build_line_tuple(text) | |
print(line_positions[0]) # 1 | |
print(line_positions[5]) # 1 | |
print(line_positions[10]) # 2 | |
print(len(line_positions)) # Length of the text | |
print(line_positions) # (1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define PY_SSIZE_T_CLEAN | |
#include <Python.h> | |
static PyObject* build_line_tuple(PyObject* self, PyObject* args) { | |
const char* text; | |
Py_ssize_t text_length; | |
if (!PyArg_ParseTuple(args, "s#", &text, &text_length)) { | |
return NULL; | |
} | |
if (text_length == 0) { | |
return PyTuple_New(0); | |
} | |
PyObject* line_tuple = PyTuple_New(text_length); | |
if (!line_tuple) { | |
return NULL; | |
} | |
int line_number = 1; | |
PyObject* line_number_obj = PyLong_FromLong(line_number); | |
if (!line_number_obj) { | |
Py_DECREF(line_tuple); | |
return NULL; | |
} | |
for (Py_ssize_t pos = 0; pos < text_length; ++pos) { | |
Py_INCREF(line_number_obj); // Increase reference count since PyTuple_SET_ITEM steals a reference | |
PyTuple_SET_ITEM(line_tuple, pos, line_number_obj); | |
if (text[pos] == '\n') { | |
line_number++; | |
Py_DECREF(line_number_obj); // Decrease the old reference count | |
line_number_obj = PyLong_FromLong(line_number); // Create a new PyLong object | |
if (!line_number_obj) { | |
Py_DECREF(line_tuple); | |
return NULL; | |
} | |
} | |
} | |
Py_DECREF(line_number_obj); // Decrease reference count for the last line number object | |
return line_tuple; | |
} | |
static PyMethodDef LineTupleMethods[] = { | |
{"build_line_tuple", build_line_tuple, METH_VARARGS, "Build a line number tuple from text."}, | |
{NULL, NULL, 0, NULL} | |
}; | |
static struct PyModuleDef linetuplemodule = { | |
PyModuleDef_HEAD_INIT, | |
"line_tuple", | |
NULL, | |
-1, | |
LineTupleMethods | |
}; | |
PyMODINIT_FUNC PyInit_line_tuple(void) { | |
return PyModule_Create(&linetuplemodule); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _build_line_tuple(text) -> tuple[int, ...]: | |
"""Build a tuple where each index corresponds to a character position and the | |
value at that index corresponds to a line number. | |
""" | |
if not text: | |
return () | |
line_list: list[int] = [0] * len(text) | |
line_number = 1 | |
for pos, char in enumerate(text): | |
line_list[pos] = line_number | |
if char == '\n': | |
line_number += 1 | |
line_tuple = tuple(line_list) | |
return line_tuple |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup, Extension | |
module = Extension("line_tuple", sources=["line_tuple.c"]) | |
setup( | |
name="line_tuple", | |
version="0.0.1", | |
description="C extension for line positions as a tuple", | |
ext_modules=[module], | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment