From 6a1b24b0742d7c7931a45a76a5741401522a011d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Jun 2025 15:22:17 -0700 Subject: [PATCH 1/2] Clear weakrefs created by finalizers. Weakrefs to unreachable garbage that are created during running of finalizers need to be cleared. This avoids exposing objects that have `tp_clear` called on them to Python-level code. --- Lib/test/test_gc.py | 20 ++++-- ...5-07-07-17-26-06.gh-issue-91636.GyHU72.rst | 3 + Python/gc.c | 64 +++++++++++++++++++ Python/gc_free_threading.c | 50 ++++++++++++++- 4 files changed, 127 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..85c43055d0dcec 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,14 +295,18 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) @refcount_test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst new file mode 100644 index 00000000000000..09c192f9c5657e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst @@ -0,0 +1,3 @@ +While performing garbage collection, clear weakrefs to unreachable objects +that are created during running of finalizers. If those weakrefs were are +not cleared, they could reveal unreachable objects. diff --git a/Python/gc.c b/Python/gc.c index 02135a3fb442d6..775b7cff9232b3 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1037,6 +1037,62 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1751,6 +1807,14 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + /* Clear weakrefs to objects in the unreachable set. No Python-level + * code must be allowed to access those unreachable objects. During + * delete_garbage(), finalizers outside the unreachable set might run + * and create new weakrefs. If those weakrefs were not cleared, they + * could reveal unreachable objects. + */ + clear_weakrefs(&final_unreachable); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5aaa68c5b51f95..910b6435294451 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1494,7 +1494,7 @@ move_legacy_finalizer_reachable(struct collection_state *state) // Clear all weakrefs to unreachable objects. Weakrefs with callbacks are // enqueued in `wrcb_to_call`, but not invoked yet. static void -clear_weakrefs(struct collection_state *state) +handle_weakrefs(struct collection_state *state) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { @@ -1545,6 +1545,42 @@ clear_weakrefs(struct collection_state *state) } } +// Clear all weakrefs to unreachable objects. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable to ensure their + // callbacks will not be executed later from a `tp_clear()` + // inside delete_garbage(). That would be unsafe: it could + // resurrect a dead object or access a an already cleared object. + // See bpo-38006 for one example. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void call_weakref_callbacks(struct collection_state *state) { @@ -2211,7 +2247,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, interp->gc.long_lived_total = state->long_lived_total; // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state); + handle_weakrefs(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2222,11 +2258,19 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, call_weakref_callbacks(state); finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); + // Handle any objects that may have resurrected after the finalization. err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + if (err == 0) { + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might + // run and create new weakrefs. If those weakrefs were not cleared, + // they could reveal unreachable objects. + clear_weakrefs(state); + } _PyEval_StartTheWorld(interp); if (err < 0) { From 412bc2af33132aaf048a157636a20b0c909e1d94 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 09:28:17 -0700 Subject: [PATCH 2/2] Reduce code duplication. Add a boolean flag to indicate that callbacks should be enabled. This allows the same function to be used before and after finalizer execution. --- Python/gc.c | 77 ++++++++------------------------------ Python/gc_free_threading.c | 48 ++++-------------------- 2 files changed, 24 insertions(+), 101 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 775b7cff9232b3..88849a43680d2e 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -870,7 +870,7 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) * no object in `unreachable` is weakly referenced anymore. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) { PyGC_Head *gc; PyObject *op; /* generally FROM_GC(gc) */ @@ -879,7 +879,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyGC_Head *next; int num_freed = 0; - gc_list_init(&wrcb_to_call); + if (allow_callbacks) { + gc_list_init(&wrcb_to_call); + } /* Clear all weakrefs to the objects in unreachable. If such a weakref * also has a callback, move it into `wrcb_to_call` if the callback @@ -935,6 +937,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + + if (!allow_callbacks) { + continue; + } + if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -987,6 +994,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) } } + if (!allow_callbacks) { + return 0; + } + /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ @@ -1037,62 +1048,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } -/* Clear all weakrefs to unreachable objects. When this returns, no object in - * `unreachable` is weakly referenced anymore. - */ -static void -clear_weakrefs(PyGC_Head *unreachable) -{ - PyGC_Head *gc; - PyGC_Head *next; - - for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyWeakReference **wrlist; - - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { - continue; - } - - /* It supports weakrefs. Does it have any? - * - * This is never triggered for static types so we can avoid the - * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). - */ - wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs. - */ - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - } - } -} - static void debug_cycle(const char *msg, PyObject *op) { @@ -1793,7 +1748,7 @@ gc_collect_region(PyThreadState *tstate, } /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); + stats->collected += handle_weakrefs(&unreachable, to, true); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1811,9 +1766,9 @@ gc_collect_region(PyThreadState *tstate, * code must be allowed to access those unreachable objects. During * delete_garbage(), finalizers outside the unreachable set might run * and create new weakrefs. If those weakrefs were not cleared, they - * could reveal unreachable objects. + * could reveal unreachable objects. Callbacks are not executed. */ - clear_weakrefs(&final_unreachable); + handle_weakrefs(&final_unreachable, NULL, false); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 910b6435294451..d46598b23b3b2f 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1492,9 +1492,9 @@ move_legacy_finalizer_reachable(struct collection_state *state) } // Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// enqueued in `wrcb_to_call`, but not invoked yet. +// optionally enqueued in `wrcb_to_call`, but not invoked yet. static void -handle_weakrefs(struct collection_state *state) +clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { @@ -1526,6 +1526,10 @@ handle_weakrefs(struct collection_state *state) _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + if (!enqueue_callbacks) { + continue; + } + // We do not invoke callbacks for weakrefs that are themselves // unreachable. This is partly for historical reasons: weakrefs // predate safe object finalization, and a weakref that is itself @@ -1545,42 +1549,6 @@ handle_weakrefs(struct collection_state *state) } } -// Clear all weakrefs to unreachable objects. -static void -clear_weakrefs(struct collection_state *state) -{ - PyObject *op; - WORKSTACK_FOR_EACH(&state->unreachable, op) { - if (PyWeakref_Check(op)) { - // Clear weakrefs that are themselves unreachable to ensure their - // callbacks will not be executed later from a `tp_clear()` - // inside delete_garbage(). That would be unsafe: it could - // resurrect a dead object or access a an already cleared object. - // See bpo-38006 for one example. - _PyWeakref_ClearRef((PyWeakReference *)op); - } - - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { - continue; - } - - // NOTE: This is never triggered for static types so we can avoid the - // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). - PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - - // `op` may have some weakrefs. March over the list, clear - // all the weakrefs. - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - // _PyWeakref_ClearRef clears the weakref but leaves - // the callback pointer intact. Obscure: it also - // changes *wrlist. - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - } - } -} - static void call_weakref_callbacks(struct collection_state *state) { @@ -2247,7 +2215,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, interp->gc.long_lived_total = state->long_lived_total; // Clear weakrefs and enqueue callbacks (but do not call them). - handle_weakrefs(state); + clear_weakrefs(state, true); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2269,7 +2237,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // delete_garbage(), finalizers outside the unreachable set might // run and create new weakrefs. If those weakrefs were not cleared, // they could reveal unreachable objects. - clear_weakrefs(state); + clear_weakrefs(state, false); } _PyEval_StartTheWorld(interp);