diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 4d6d5ce9c5ea26..f7d3dcd440aaf1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1582,6 +1582,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_interpreters)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alphabet)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 20dcf81ccf15fa..22494b1798cc53 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -305,6 +305,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) + STRUCT_FOR_ID(all_interpreters) STRUCT_FOR_ID(all_threads) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(alphabet) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 1ce91dc51ea0b7..892c3cdd9623a2 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1580,6 +1580,7 @@ extern "C" { INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ + INIT_ID(all_interpreters), \ INIT_ID(all_threads), \ INIT_ID(allow_code), \ INIT_ID(alphabet), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c7c23494845e01..f0fc3c4f5b0900 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1000,6 +1000,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(all_interpreters); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(all_threads); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_get_gc_stats.py b/Lib/test/test_get_gc_stats.py new file mode 100644 index 00000000000000..66a1e061f6b266 --- /dev/null +++ b/Lib/test/test_get_gc_stats.py @@ -0,0 +1,176 @@ +import os +import textwrap +import time +import unittest + +from test.support import ( + requires_gil_enabled, + requires_remote_subprocess_debugging, +) +from test.test_profiling.test_sampling_profiler.helpers import test_subprocess + +try: + import _remote_debugging # noqa: F401 +except ImportError: + raise unittest.SkipTest( + "Test only runs when _remote_debugging is available" + ) + + +def get_interpreter_identifiers(gc_stats: tuple[dict[str, str|int|float]]) -> tuple[str,...]: + return tuple(sorted({s["iid"] for s in gc_stats})) + + +def get_generations(gc_stats: tuple[dict[str, str|int|float]]) -> tuple[int,int,int]: + generations = set() + for s in gc_stats: + generations.add(s["gen"]) + + return tuple(sorted(generations)) + + +def get_last_item(gc_stats: tuple[dict[str, str|int|float]], + generation:int, + iid:int) -> dict[str, str|int|float] | None: + item = None + for s in gc_stats: + if s["gen"] == generation and s["iid"] == iid: + if item is None or item["ts_start"] < s["ts_start"]: + item = s + + return item + + +@requires_remote_subprocess_debugging() +class TestGetGCStats(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls._main_iid = 0 # main interpreter ID + cls._script = ''' + import concurrent.interpreters as interpreters + import gc + import time + + source = """if True: + import gc + + gc.collect(0) + gc.collect(1) + gc.collect(2) + """ + + if {0}: + interp = interpreters.create() + interp.exec(source) + + gc.collect(0) + gc.collect(1) + gc.collect(2) + + _test_sock.sendall(b"working") + objects = [] + while True: + if len(objects) > 100: + objects = [] + + # objects that GC will visit should increase + objects.append(object()) + + time.sleep(0.1) + if {0}: + interp.exec(source) + gc.collect(0) + gc.collect(1) + gc.collect(2) + ''' + + def _collect_gc_stats(self, script:str, all_interpreters:bool): + with (test_subprocess(script, wait_for_working=True) as subproc): + monitor = _remote_debugging.GCMonitor(subproc.process.pid, debug=True) + before_stats = monitor.get_gc_stats(all_interpreters=all_interpreters) + before = get_last_item(before_stats, 2, self._main_iid) + for _ in range(10): + time.sleep(0.5) + after_stats = monitor.get_gc_stats(all_interpreters=all_interpreters) + after = get_last_item(after_stats, 2, self._main_iid) + if after["ts_stop"] > before["ts_stop"]: + break + + return before_stats, after_stats + + def _check_gc_stats(self, before, after): + self.assertIsNotNone(before) + self.assertIsNotNone(after) + + self.assertGreater(after["collections"], before["collections"], (before, after)) + self.assertGreater(after["ts_start"], before["ts_start"], (before, after)) + self.assertGreater(after["ts_stop"], before["ts_stop"], (before, after)) + self.assertGreater(after["duration"], before["duration"], (before, after)) + + self.assertGreater(after["candidates"], before["candidates"], (before, after)) + + # may not grow + self.assertGreaterEqual(after["collected"], before["collected"], (before, after)) + self.assertGreaterEqual(after["uncollectable"], before["uncollectable"], (before, after)) + + def _check_interpreter_gc_stats(self, before_stats, after_stats): + before_iids = get_interpreter_identifiers(before_stats) + after_iids = get_interpreter_identifiers(after_stats) + + self.assertEqual(before_iids, after_iids) + + self.assertEqual(get_generations(before_stats), (0, 1, 2)) + self.assertEqual(get_generations(after_stats), (0, 1, 2)) + + for iid in after_iids: + with self.subTest(f"interpreter id={iid}"): + before_last_items = (get_last_item(before_stats, 0, iid), + get_last_item(before_stats, 1, iid), + get_last_item(before_stats, 2, iid)) + + after_last_items = (get_last_item(after_stats, 0, iid), + get_last_item(after_stats, 1, iid), + get_last_item(after_stats, 2, iid)) + + for before, after in zip(before_last_items, after_last_items): + self._check_gc_stats(before, after) + + def test_get_gc_stats_fields(self): + keys = sorted(("gen", "iid", "ts_start", "ts_stop", #"heap_size", + "collections", "collected", "uncollectable", "candidates", + "duration")) + monitor = _remote_debugging.GCMonitor(os.getpid(), debug=True) + stats = monitor.get_gc_stats(all_interpreters=False) + self.assertIsInstance(stats, list) + for item in stats: + self.assertIsInstance(item, dict) + self.assertEqual(sorted(item.keys()), keys) + + @requires_gil_enabled() + def test_get_gc_stats_for_main_interpreter(self): + script = textwrap.dedent(self._script.format(False)) + before_stats, after_stats = self._collect_gc_stats(script, False) + + self._check_interpreter_gc_stats(before_stats,after_stats) + + @requires_gil_enabled() + def test_get_gc_stats_for_main_interpreter_if_subinterpreter_exists(self): + script = textwrap.dedent(self._script.format(True)) + before_stats, after_stats = self._collect_gc_stats(script, False) + + self._check_interpreter_gc_stats(before_stats, after_stats) + + @requires_gil_enabled() + def test_get_gc_stats_for_all_interpreters(self): + script = textwrap.dedent(self._script.format(True)) + before_stats, after_stats = self._collect_gc_stats(script, True) + + before_iids = get_interpreter_identifiers(before_stats) + after_iids = get_interpreter_identifiers(after_stats) + + self.assertGreater(len(before_iids), 1) + self.assertGreater(len(after_iids), 1) + self.assertEqual(before_iids, after_iids) + + self._check_interpreter_gc_stats(before_stats, after_stats) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-23-13-19.gh-issue-146527.P3Xv4Q.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-23-13-19.gh-issue-146527.P3Xv4Q.rst new file mode 100644 index 00000000000000..3b566d3bfa3301 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-23-13-19.gh-issue-146527.P3Xv4Q.rst @@ -0,0 +1,5 @@ +Add a ``get_gc_stats`` method to the ``RemoteUnwinder`` class +module to allow reading GC statistics from an external Python process. +Add a ``GCMonitor`` class to the :mod:`!_remote_debugging` module +for cases where the full ``RemoteUnwinder`` functionality is not required. +Patch by Sergey Miryanov. diff --git a/Modules/Setup b/Modules/Setup index 7d816ead8432ef..33737c21cb4066 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -285,7 +285,7 @@ PYTHONPATH=$(COREPYTHONPATH) #*shared* #_ctypes_test _ctypes/_ctypes_test.c -#_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/threads.c _remote_debugging/asyncio.c +#_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/threads.c _remote_debugging/asyncio.c _remote_debugging/interpreters.c #_testcapi _testcapimodule.c #_testimportmultiple _testimportmultiple.c #_testmultiphase _testmultiphase.c diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 0d520684c795d6..0305bf23cc3756 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -41,7 +41,7 @@ @MODULE__PICKLE_TRUE@_pickle _pickle.c @MODULE__QUEUE_TRUE@_queue _queuemodule.c @MODULE__RANDOM_TRUE@_random _randommodule.c -@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c _remote_debugging/binary_io_writer.c _remote_debugging/binary_io_reader.c _remote_debugging/subprocess.c +@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c _remote_debugging/binary_io_writer.c _remote_debugging/binary_io_reader.c _remote_debugging/subprocess.c _remote_debugging/interpreters.c @MODULE__STRUCT_TRUE@_struct _struct.c # build supports subinterpreters diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 07738d45e42d24..6ec02cb53ae2ad 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -262,6 +262,7 @@ typedef struct { PyTypeObject *AwaitedInfo_Type; PyTypeObject *BinaryWriter_Type; PyTypeObject *BinaryReader_Type; + PyTypeObject *GCMonitor_Type; } RemoteDebuggingState; enum _ThreadState { @@ -346,6 +347,12 @@ typedef struct { size_t count; } StackChunkList; +typedef struct { + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; +} RuntimeOffsets; + /* * Context for frame chain traversal operations. */ @@ -376,6 +383,16 @@ typedef struct { int32_t tlbc_index; // Thread-local bytecode index (free-threading) } CodeObjectContext; +typedef struct { + PyObject_HEAD + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; + int debug; +} GCMonitorObject; + +#define GCMonitor_CAST(op) ((GCMonitorObject *)(op)) + /* Function pointer types for iteration callbacks */ typedef int (*thread_processor_func)( RemoteUnwinderObject *unwinder, @@ -390,6 +407,14 @@ typedef int (*set_entry_processor_func)( void *context ); +typedef int (*interpreter_processor_func)( + RuntimeOffsets *offsets, + uintptr_t interpreter_state_addr, + int64_t iid, + void *context +); + + /* ============================================================================ * STRUCTSEQ DESCRIPTORS (extern declarations) * ============================================================================ */ @@ -588,6 +613,17 @@ extern void _Py_RemoteDebug_InitThreadsState(RemoteUnwinderObject *unwinder, _Py extern int _Py_RemoteDebug_StopAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st); extern void _Py_RemoteDebug_ResumeAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st); +/* ============================================================================ + * INTERPRETER FUNCTION DECLARATIONS + * ============================================================================ */ + +extern int +iterate_interpreters( + RuntimeOffsets *offsets, + interpreter_processor_func processor, + void *context +); + /* ============================================================================ * ASYNCIO FUNCTION DECLARATIONS * ============================================================================ */ diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 15df48fabb56b2..ed4733159724fa 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -495,6 +495,182 @@ _remote_debugging_RemoteUnwinder_resume_threads(PyObject *self, PyObject *Py_UNU return return_value; } +PyDoc_STRVAR(_remote_debugging_GCMonitor___init____doc__, +"GCMonitor(pid, *, debug=False)\n" +"--\n" +"\n" +"Initialize a new GCMonitor object for monitoring GC events from remote process.\n" +"\n" +"Args:\n" +" pid: Process ID of the target Python process to monitor\n" +" debug: If True, chain exceptions to explain the sequence of events that\n" +" lead to the exception.\n" +"\n" +"The GCMonitor provides functionality to read GC statistics from a running\n" +"Python process.\n" +"\n" +"Raises:\n" +" PermissionError: If access to the target process is denied\n" +" OSError: If unable to attach to the target process or access its memory\n" +" RuntimeError: If unable to read debug information from the target process"); + +static int +_remote_debugging_GCMonitor___init___impl(GCMonitorObject *self, int pid, + int debug); + +static int +_remote_debugging_GCMonitor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(debug), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "debug", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "GCMonitor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + int pid; + int debug = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + pid = PyLong_AsInt(fastargs[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + debug = PyObject_IsTrue(fastargs[1]); + if (debug < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_GCMonitor___init___impl((GCMonitorObject *)self, pid, debug); + +exit: + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_GCMonitor_get_gc_stats__doc__, +"get_gc_stats($self, /, all_interpreters=False)\n" +"--\n" +"\n" +"Get garbage collector statistics from external Python process.\n" +"\n" +" all_interpreters\n" +" If True, return GC statistics from all interpreters.\n" +" If False, return only from main interpreter.\n" +"\n" +"Returns a list of dictionaries with GC statistics data.\n" +"\n" +"Returns:\n" +" List of dicts.\n" +" dict: A dictionary containing:\n" +" - gen:\n" +" - iid:\n" +" - ts_start:\n" +" - ts_stop:\n" +" - heap_size:\n" +" - collections:\n" +" - collected:\n" +" - uncollectable:\n" +" - candidates:\n" +" - duration:\n" +"\n" +"Raises:\n" +" RuntimeError:"); + +#define _REMOTE_DEBUGGING_GCMONITOR_GET_GC_STATS_METHODDEF \ + {"get_gc_stats", _PyCFunction_CAST(_remote_debugging_GCMonitor_get_gc_stats), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_GCMonitor_get_gc_stats__doc__}, + +static PyObject * +_remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self, + int all_interpreters); + +static PyObject * +_remote_debugging_GCMonitor_get_gc_stats(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(all_interpreters), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"all_interpreters", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_gc_stats", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int all_interpreters = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + all_interpreters = PyObject_IsTrue(args[0]); + if (all_interpreters < 0) { + goto exit; + } +skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_GCMonitor_get_gc_stats_impl((GCMonitorObject *)self, all_interpreters); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(_remote_debugging_BinaryWriter___init____doc__, "BinaryWriter(filename, sample_interval_us, start_time_us, *,\n" " compression=0)\n" @@ -1296,4 +1472,97 @@ _remote_debugging_is_python_process(PyObject *module, PyObject *const *args, Py_ exit: return return_value; } -/*[clinic end generated code: output=34f50b18f317b9b6 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_remote_debugging_get_gc_stats__doc__, +"get_gc_stats($module, /, pid, *, all_interpreters=False)\n" +"--\n" +"\n" +"Get garbage collector statistics from external Python process.\n" +"\n" +" all_interpreters\n" +" If True, return GC statistics from all interpreters.\n" +" If False, return only from main interpreter.\n" +"\n" +"Returns:\n" +" List of dicts.\n" +" dict: A dictionary containing:\n" +" - gen:\n" +" - iid:\n" +" - ts_start:\n" +" - ts_stop:\n" +" - heap_size:\n" +" - collections:\n" +" - collected:\n" +" - uncollectable:\n" +" - candidates:\n" +" - duration:\n" +"\n" +"Raises:\n" +" RuntimeError:"); + +#define _REMOTE_DEBUGGING_GET_GC_STATS_METHODDEF \ + {"get_gc_stats", _PyCFunction_CAST(_remote_debugging_get_gc_stats), METH_FASTCALL|METH_KEYWORDS, _remote_debugging_get_gc_stats__doc__}, + +static PyObject * +_remote_debugging_get_gc_stats_impl(PyObject *module, int pid, + int all_interpreters); + +static PyObject * +_remote_debugging_get_gc_stats(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_interpreters), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "all_interpreters", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_gc_stats", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + int pid; + int all_interpreters = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + pid = PyLong_AsInt(args[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + all_interpreters = PyObject_IsTrue(args[1]); + if (all_interpreters < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_get_gc_stats_impl(module, pid, all_interpreters); + +exit: + return return_value; +} +/*[clinic end generated code: output=63dc4f85d8a7eb55 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/gc_stats.h b/Modules/_remote_debugging/gc_stats.h new file mode 100644 index 00000000000000..c20772490ad9e3 --- /dev/null +++ b/Modules/_remote_debugging/gc_stats.h @@ -0,0 +1,172 @@ +/****************************************************************************** + * Remote Debugging Module - GC Stats Functions + * + * This file contains function for read GC stats from interpreter state. + ******************************************************************************/ + +#ifndef Py_REMOTE_DEBUGGING_GC_STATS_H +#define Py_REMOTE_DEBUGGING_GC_STATS_H + + #ifdef __cplusplus +extern "C" { +#endif + +#include "_remote_debugging.h" + +typedef struct { + PyObject *result; + bool all_interpreters; +} GetGCStatsContext; + +static int +read_gc_stats(struct gc_stats *stats, int64_t iid, PyObject *result) +{ +#define ADD_LOCAL_ULONG(name) do { \ + val = PyLong_FromUnsignedLong(name); \ + if (!val || PyDict_SetItemString(item, #name, val) < 0) { \ + goto error; \ + } \ + Py_DECREF(val); \ +} while(0) + +#define ADD_LOCAL_INT64(name) do { \ + val = PyLong_FromInt64(name); \ + if (!val || PyDict_SetItemString(item, #name, val) < 0) { \ + goto error; \ + } \ + Py_DECREF(val); \ +} while(0) + +#define ADD_STATS_SSIZE(name) do { \ + val = PyLong_FromSsize_t(stats_item->name); \ + if (!val || PyDict_SetItemString(item, #name, val) < 0) { \ + goto error; \ + } \ + Py_DECREF(val); \ +} while(0) + +#define ADD_STATS_INT64(name) do { \ + val = PyLong_FromInt64(stats_item->name); \ + if (!val || PyDict_SetItemString(item, #name, val) < 0) { \ + goto error; \ + } \ + Py_DECREF(val); \ +} while(0) + +#define ADD_STATS_DOUBLE(name) do { \ + val = PyFloat_FromDouble(stats_item->name); \ + if (!val || PyDict_SetItemString(item, #name, val) < 0) { \ + goto error; \ + } \ + Py_DECREF(val); \ +} while(0) + + PyObject *item = NULL; + PyObject *val = NULL; + + for(unsigned long gen = 0; gen < NUM_GENERATIONS; gen++) { + struct gc_generation_stats *items; + int size; + if (gen == 0) { + items = (struct gc_generation_stats *)stats->young.items; + size = GC_YOUNG_STATS_SIZE; + } + else { + items = (struct gc_generation_stats *)stats->old[gen-1].items; + size = GC_OLD_STATS_SIZE; + } + for(int i = 0; i < size; i++, items++) { + struct gc_generation_stats *stats_item = items; + item = PyDict_New(); + if (item == NULL) { + goto error; + } + + ADD_LOCAL_ULONG(gen); + ADD_LOCAL_INT64(iid); + + ADD_STATS_INT64(ts_start); + ADD_STATS_INT64(ts_stop); + // ADD_STATS_SSIZE(heap_size); + ADD_STATS_SSIZE(collections); + ADD_STATS_SSIZE(collected); + ADD_STATS_SSIZE(uncollectable); + ADD_STATS_SSIZE(candidates); + + ADD_STATS_DOUBLE(duration); + val = NULL; + + int rc = PyList_Append(result, item); + Py_CLEAR(item); + if (rc < 0) { + goto error; + } + } + } + +#undef ADD_LOCAL_ULONG +#undef ADD_LOCAL_INT64 +#undef ADD_STATS_SSIZE +#undef ADD_STATS_INT64 +#undef ADD_STATS_DOUBLE + + return 0; + +error: + Py_XDECREF(val); + Py_XDECREF(item); + + return -1; +} + +static int +get_gc_stats_from_interpreter_state(RuntimeOffsets *offsets, + uintptr_t interpreter_state_addr, + int64_t iid, + void *context) +{ + GetGCStatsContext *ctx = (GetGCStatsContext *)context; + if (!ctx->all_interpreters && iid > 0) { + return 0; + } + + uintptr_t gc_stats_addr; + uintptr_t gc_stats_pointer_address = interpreter_state_addr + + offsets->debug_offsets.interpreter_state.gc + + offsets->debug_offsets.gc.generation_stats; + if (_Py_RemoteDebug_ReadRemoteMemory(&offsets->handle, + gc_stats_pointer_address, + sizeof(gc_stats_addr), + &gc_stats_addr) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read GC state address"); + return -1; + } + + struct gc_stats stats; + uint64_t gc_stats_size = offsets->debug_offsets.gc.generation_stats_size; + if (gc_stats_size > sizeof(stats)) { + PyErr_Format(PyExc_RuntimeError, + "Remote gc_stats size (%llu) exceeds local size (%zu)", + (unsigned long long)gc_stats_size, sizeof(stats)); + return -1; + } + if (_Py_RemoteDebug_ReadRemoteMemory(&offsets->handle, + gc_stats_addr, + gc_stats_size, + &stats) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read GC state"); + return -1; + } + + if (read_gc_stats(&stats, iid, ctx->result) < 0) { + return -1; + } + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* Py_REMOTE_DEBUGGING_GC_STATS_H */ diff --git a/Modules/_remote_debugging/interpreters.c b/Modules/_remote_debugging/interpreters.c new file mode 100644 index 00000000000000..73b733cf399f5e --- /dev/null +++ b/Modules/_remote_debugging/interpreters.c @@ -0,0 +1,83 @@ +/****************************************************************************** + * Remote Debugging Module - Interpreters Functions + * + * This file contains function for iterating interpreters. + ******************************************************************************/ + +#include "_remote_debugging.h" + +#ifndef MS_WINDOWS +#include +#endif + +#ifdef __linux__ +#include +#include +#include +#endif + +/* ============================================================================ + * INTERPRETERS ITERATION FUNCTION + * ============================================================================ */ + +int +iterate_interpreters( + RuntimeOffsets *offsets, + interpreter_processor_func processor, + void *context +) { + + uintptr_t interpreter_state_list_head = + (uintptr_t)offsets->debug_offsets.runtime_state.interpreters_head; + uintptr_t interpreter_state_offset = + offsets->runtime_start_address + interpreter_state_list_head; + uintptr_t interpreter_id_offset = + (uintptr_t)offsets->debug_offsets.interpreter_state.id; + uintptr_t interpreter_next_offset = + (uintptr_t)offsets->debug_offsets.interpreter_state.next; + + uintptr_t interpreter_state_addr; + if (_Py_RemoteDebug_ReadRemoteMemory(&offsets->handle, + interpreter_state_offset, + sizeof(void*), + &interpreter_state_addr) < 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read interpreter state address"); + return -1; + } + + if (interpreter_state_addr == 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "No interpreter state found"); + return -1; + } + + int64_t iid = 0; + static_assert( + sizeof((((PyInterpreterState*)NULL)->id)) == sizeof(iid), + "Sizeof of PyInterpreterState.id mismatch with local iid value"); + while (interpreter_state_addr != 0) { + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + &offsets->handle, + interpreter_state_addr + interpreter_id_offset, + sizeof(iid), + &iid)) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read next interpreter state"); + return -1; + } + + // Call the processor function for this interpreter + if (processor(offsets, interpreter_state_addr, iid, context) < 0) { + return -1; + } + + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + &offsets->handle, + interpreter_state_addr + interpreter_next_offset, + sizeof(void*), + &interpreter_state_addr)) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read next interpreter state"); + return -1; + } + } + + return 0; +} diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 32f2cbacf2143b..62d56da78ada7d 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -8,6 +8,7 @@ #include "_remote_debugging.h" #include "binary_io.h" #include "debug_offsets_validation.h" +#include "gc_stats.h" /* Forward declarations for clinic-generated code */ typedef struct { @@ -244,6 +245,25 @@ validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets) return _PyRemoteDebug_ValidateDebugOffsetsLayout(debug_offsets); } +static PyObject * +get_gc_stats(RuntimeOffsets *offsets, bool all_interpreters) +{ + PyObject *result = PyList_New(0); + if (result == NULL) { + return NULL; + } + GetGCStatsContext ctx = { + .result = result, + .all_interpreters = all_interpreters, + }; + if (0 > iterate_interpreters(offsets, get_gc_stats_from_interpreter_state, &ctx)) { + Py_CLEAR(result); + return NULL; + } + + return result; +} + /* ============================================================================ * REMOTEUNWINDER CLASS IMPLEMENTATION * ============================================================================ */ @@ -1100,6 +1120,153 @@ static PyType_Spec RemoteUnwinder_spec = { .slots = RemoteUnwinder_slots, }; +/* ============================================================================ + * GCMONITOR CLASS IMPLEMENTATION + * ============================================================================ */ + +/*[clinic input] +class _remote_debugging.GCMonitor "GCMonitorObject *" "&GCMonitor_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ebc229325a5e5154]*/ + +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +_remote_debugging.GCMonitor.__init__ + pid: int + * + debug: bool = False + +Initialize a new GCMonitor object for monitoring GC events from remote process. + +Args: + pid: Process ID of the target Python process to monitor + debug: If True, chain exceptions to explain the sequence of events that + lead to the exception. + +The GCMonitor provides functionality to read GC statistics from a running +Python process. + +Raises: + PermissionError: If access to the target process is denied + OSError: If unable to attach to the target process or access its memory + RuntimeError: If unable to read debug information from the target process +[clinic start generated code]*/ + +static int +_remote_debugging_GCMonitor___init___impl(GCMonitorObject *self, int pid, + int debug) +/*[clinic end generated code: output=2cdf351c2f6335db input=1185a48535b808be]*/ +{ + self->debug = debug; + if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); + return -1; + } + + self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); + if (self->runtime_start_address == 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address"); + return -1; + } + + if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle, + &self->runtime_start_address, + &self->debug_offsets) < 0) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets"); + return -1; + } + + // Validate that the debug offsets are valid + if (validate_debug_offsets(&self->debug_offsets) == -1) { + set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found"); + return -1; + } + + return 0; +} + +/*[clinic input] +@critical_section +_remote_debugging.GCMonitor.get_gc_stats + + all_interpreters: bool = False + If True, return GC statistics from all interpreters. + If False, return only from main interpreter. + +Get garbage collector statistics from external Python process. + +Returns a list of dictionaries with GC statistics data. + +Returns: + List of dicts. + dict: A dictionary containing: + - gen: + - iid: + - ts_start: + - ts_stop: + - heap_size: + - collections: + - collected: + - uncollectable: + - candidates: + - duration: + +Raises: + RuntimeError: +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self, + int all_interpreters) +/*[clinic end generated code: output=f73f365725224f7a input=f41cda6c30299fee]*/ +{ + RuntimeOffsets offsets = { + .handle = self->handle, + .runtime_start_address = self->runtime_start_address, + .debug_offsets = self->debug_offsets, + }; + return get_gc_stats(&offsets, all_interpreters); +} + +static PyMethodDef GCMonitor_methods[] = { + _REMOTE_DEBUGGING_GCMONITOR_GET_GC_STATS_METHODDEF + {NULL, NULL} +}; + +static void +GCMonitor_dealloc(PyObject *op) +{ + GCMonitorObject *self = GCMonitor_CAST(op); + PyTypeObject *tp = Py_TYPE(self); + + if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); + _Py_RemoteDebug_CleanupProcHandle(&self->handle); + } + PyObject_Del(self); + Py_DECREF(tp); +} + +static PyType_Slot GCMonitor_slots[] = { + {Py_tp_doc, (void *)"GCMonitor(pid): Monitor GC events of a remote Python process."}, + {Py_tp_methods, GCMonitor_methods}, + {Py_tp_init, _remote_debugging_GCMonitor___init__}, + {Py_tp_dealloc, GCMonitor_dealloc}, + {0, NULL} +}; + +static PyType_Spec GCMonitor_spec = { + .name = "_remote_debugging.GCMonitor", + .basicsize = sizeof(GCMonitorObject), + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_IMMUTABLETYPE + ), + .slots = GCMonitor_slots, +}; + /* Forward declarations for type specs defined later */ static PyType_Spec BinaryWriter_spec; static PyType_Spec BinaryReader_spec; @@ -1126,6 +1293,11 @@ _remote_debugging_exec(PyObject *m) return -1; } + CREATE_TYPE(m, st->GCMonitor_Type, &GCMonitor_spec); + if (PyModule_AddType(m, st->GCMonitor_Type) < 0) { + return -1; + } + // Initialize structseq types st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc); if (st->TaskInfo_Type == NULL) { @@ -1242,6 +1414,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) Py_VISIT(state->AwaitedInfo_Type); Py_VISIT(state->BinaryWriter_Type); Py_VISIT(state->BinaryReader_Type); + Py_VISIT(state->GCMonitor_Type); return 0; } @@ -1259,6 +1432,7 @@ remote_debugging_clear(PyObject *mod) Py_CLEAR(state->AwaitedInfo_Type); Py_CLEAR(state->BinaryWriter_Type); Py_CLEAR(state->BinaryReader_Type); + Py_CLEAR(state->GCMonitor_Type); return 0; } @@ -1837,10 +2011,89 @@ _remote_debugging_is_python_process_impl(PyObject *module, int pid) Py_RETURN_TRUE; } +/*[clinic input] +_remote_debugging.get_gc_stats + + pid: int + * + all_interpreters: bool = False + If True, return GC statistics from all interpreters. + If False, return only from main interpreter. + +Get garbage collector statistics from external Python process. + +Returns: + List of dicts. + dict: A dictionary containing: + - gen: + - iid: + - ts_start: + - ts_stop: + - heap_size: + - collections: + - collected: + - uncollectable: + - candidates: + - duration: + +Raises: + RuntimeError: +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_get_gc_stats_impl(PyObject *module, int pid, + int all_interpreters) +/*[clinic end generated code: output=d9dce5f7add149bb input=6a2afe531da4cfda]*/ +{ + RuntimeOffsets offsets; + + PyObject *result = NULL; + + if (_Py_RemoteDebug_InitProcHandle(&offsets.handle, pid) < 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize process handle"); + return NULL; + } + + offsets.runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&offsets.handle); + if (offsets.runtime_start_address == 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to get Python runtime address"); + goto error; + } + + if (_Py_RemoteDebug_ReadDebugOffsets(&offsets.handle, + &offsets.runtime_start_address, + &offsets.debug_offsets) < 0) + { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read debug offsets"); + goto error; + } + + // Validate that the debug offsets are valid + if (validate_debug_offsets(&offsets.debug_offsets) == -1) { + _set_debug_exception_cause(PyExc_RuntimeError, "Invalid debug offsets found"); + goto error; + } + + result = get_gc_stats(&offsets, all_interpreters); + if (result != NULL) { + goto done; + } + +error: + Py_CLEAR(result); + +done: + _Py_RemoteDebug_ClearCache(&offsets.handle); + _Py_RemoteDebug_CleanupProcHandle(&offsets.handle); + + return result; +} + static PyMethodDef remote_debugging_methods[] = { _REMOTE_DEBUGGING_ZSTD_AVAILABLE_METHODDEF _REMOTE_DEBUGGING_GET_CHILD_PIDS_METHODDEF _REMOTE_DEBUGGING_IS_PYTHON_PROCESS_METHODDEF + _REMOTE_DEBUGGING_GET_GC_STATS_METHODDEF {NULL, NULL, 0, NULL}, }; diff --git a/PCbuild/_remote_debugging.vcxproj b/PCbuild/_remote_debugging.vcxproj index 0e86ce9f4c918c..688ac44d83d9ed 100644 --- a/PCbuild/_remote_debugging.vcxproj +++ b/PCbuild/_remote_debugging.vcxproj @@ -108,10 +108,12 @@ + + diff --git a/PCbuild/_remote_debugging.vcxproj.filters b/PCbuild/_remote_debugging.vcxproj.filters index 59d4d5c5c335fb..e3252a4eadde07 100644 --- a/PCbuild/_remote_debugging.vcxproj.filters +++ b/PCbuild/_remote_debugging.vcxproj.filters @@ -42,6 +42,9 @@ Source Files + + Source Files + @@ -50,6 +53,9 @@ Header Files + + Header Files + diff --git a/Python/gc.c b/Python/gc.c index 59bed10c1fb230..ece9d02b57c782 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1405,7 +1405,6 @@ add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats) memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats)); cur_stats->ts_start = stats->ts_start; - cur_stats->ts_stop = stats->ts_stop; cur_stats->collections += 1; cur_stats->collected += stats->collected; @@ -1413,6 +1412,7 @@ add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats) cur_stats->candidates += stats->candidates; cur_stats->duration += stats->duration; + cur_stats->ts_stop = stats->ts_stop; } /* This is the main function. Read this to understand how the