-
-
Save crusaderky/cf0575cfeeee8faa1bb1b3480bc4a87a to your computer and use it in GitHub Desktop.
import sys | |
from ctypes import POINTER, py_object, Structure, c_ssize_t, c_void_p, sizeof | |
from typing import Any, Iterator, Optional, Sequence, Union | |
__all__ = ("OpStack", ) | |
class Frame(Structure): | |
_fields_ = ( | |
("ob_refcnt", c_ssize_t), | |
("ob_type", c_void_p), | |
("ob_size", c_ssize_t), | |
("f_back", c_void_p), | |
("f_code", c_void_p), | |
("f_builtins", POINTER(py_object)), | |
("f_globals", POINTER(py_object)), | |
("f_locals", POINTER(py_object)), | |
("f_valuestack", POINTER(py_object)), | |
("f_stacktop", POINTER(py_object)), | |
) | |
if sys.flags.debug: | |
Frame._fields_ = ( | |
("_ob_next", POINTER(py_object)), | |
("_ob_prev", POINTER(py_object)), | |
) + Frame._fields_ | |
PTR_SIZE = sizeof(POINTER(py_object)) | |
F_VALUESTACK_OFFSET = sizeof(Frame) - 2 * PTR_SIZE | |
F_STACKTOP_OFFSET = sizeof(Frame) - PTR_SIZE | |
class OpStack(Sequence[Any]): | |
__slots__ = ("_frame", "_len") | |
def __init__(self, frame): | |
self._frame = Frame.from_address(id(frame)) | |
stack_start_addr = c_ssize_t.from_address(id(frame) + F_VALUESTACK_OFFSET).value | |
stack_top_addr = c_ssize_t.from_address(id(frame) + F_STACKTOP_OFFSET).value | |
self._len = (stack_top_addr - stack_start_addr) // PTR_SIZE | |
def __repr__(self) -> str: | |
if not self: | |
return "<OpStack> (empty)>" | |
return "<OpStack ({})>\n- {}\n".format( | |
len(self), | |
"\n- ".join(repr(o) for o in reversed(self)), | |
) | |
def __len__(self): | |
return self._len | |
def _preproc_slice(self, idx: Optional[int], default: int) -> int: | |
if idx is None: | |
return default | |
if idx < -self._len or idx >= self._len: | |
raise IndexError(idx) | |
if idx < 0: | |
return idx + self._len | |
return idx | |
def __getitem__(self, item: Union[int, slice]) -> Any: | |
if isinstance(item, int): | |
if item < -self._len or item >= self._len: | |
raise IndexError(item) | |
if item < 0: | |
return self._frame.f_stacktop[item] | |
return self._frame.f_valuestack[item] | |
if isinstance(item, slice): | |
item = slice( | |
self._preproc_slice(item.start, 0), | |
self._preproc_slice(item.stop, self._len), | |
item.step | |
) | |
return self._frame.f_valuestack[item] | |
raise TypeError(item) | |
def __iter__(self) -> Iterator[Any]: | |
for i in range(self._len): | |
yield self._frame.f_valuestack[i] | |
def __reversed__(self) -> Iterator[Any]: | |
for i in range(self._len - 1, -1, -1): | |
yield self._frame.f_valuestack[i] |
sorry, I'm not sure what your problem is; I suggest you ask on stackoverflow
Could I ask where you are using this code?
I'm working on a POC to sandbox python code by validating functions and their parameters against a white/blacklist just before they are executed.
So for example open("my_local_file") could be authorised while open("/etc/passwd") could be rejected, and if you replace the hardcoded string with a dynamically generated python variable it will still work.
Cool!
I just wanted to give you an update that we opened up the project I have been working on using this code. It's been working really nicely, we are using it to trace which function calls are being made to libraries like numpy and scipy, and then inferring their APIs from that.
Your code: https://github.com/data-apis/python-record-api/blob/master/record_api/get_stack.py
How we are using it to handle different bytecodes: https://github.com/data-apis/python-record-api/blob/3c6d01972bbe57f8f84b510eb44184f492ec534a/record_api/core.py#L391-L499
Thanks again for publishing this!
_ob_prev
, _ob_next
, f_builtins
, f_globals
, and f_locals
are py_object
, not POINTER(py_object)
, and you can use cast(self._frame.f_valuestack, c_void_p).value
(or just set f_valuestack
to a c_void_p
) instead of computing F_VALUESTACK_OFFSET
manually.
This is very cool. I was even able to detect function calls and hot-swap alternative functions into the stack before invocation!
@crusaderky : would you consider declaring a license here in the comments? MIT? 🙏🏻
Happy to release this code under the Apache 2.0 license
Thank you!
thanks!
Thank you so much for putting this code out here!
I had a question about this. I am trying to use
settrace
bytecode tracing to understand every function that is being called and with what args.However, when I get I am on a bytecode like
CALL_METHOD
orCALL_FUNCTION
the stack won't have enough items on it to call the bytecode operation. Do you have any ideas about this? Maybe I should use the parent frame?