Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,21 @@ typedef struct {
uint8_t count;
uint8_t indices[MAX_RECORDED_VALUES];
} _PyOpcodeRecordEntry;

typedef struct {
uint8_t count;
uint8_t transform_mask;
uint8_t slots[MAX_RECORDED_VALUES];
} _PyOpcodeRecordSlotMap;

PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
PyAPI_DATA(const _PyOpcodeRecordSlotMap) _PyOpcode_RecordSlotMaps[256];

/* Convert a family-recorded value to the form a recorder uop expects.
* If no transform is needed, return the input value unchanged.
* Takes ownership of `value` and returns a new strong reference or NULL.
*/
PyAPI_FUNC(PyObject *) _PyOpcode_RecordTransformValue(int uop, PyObject *value);
#endif

#ifdef __cplusplus
Expand Down
13 changes: 13 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5849,6 +5849,19 @@ def testfunc(n):
self.assertNotIn("_LOAD_SUPER_ATTR_METHOD", uops)
self.assertEqual(uops.count("_GUARD_NOS_TYPE_VERSION"), 2)

def test_settrace_then_polymorphic_call_does_not_crash(self):
script_helper.assert_python_ok("-c", textwrap.dedent("""
import sys
sys.settrace(lambda *_: None)
sys.settrace(None)

class C:
def __init__(self, x):
pass

for i in 0, 1, 0, 1:
C(0) if i else str(0)
"""))

def global_identity(x):
return x
Expand Down
187 changes: 184 additions & 3 deletions Lib/test/test_generated_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -2074,19 +2074,33 @@ def tearDown(self) -> None:
pass
super().tearDown()

def generate_tables(self, input: str) -> str:
import io
def analyze_input(self, input: str):
with open(self.temp_input_filename, "w+") as f:
f.write(parser.BEGIN_MARKER)
f.write(input)
f.write(parser.END_MARKER)
with handle_stderr():
analysis = analyze_files([self.temp_input_filename])
return analyze_files([self.temp_input_filename])

def generate_tables(self, input: str) -> str:
import io
analysis = self.analyze_input(input)
buf = io.StringIO()
out = CWriter(buf, 0, False)
record_function_generator.generate_recorder_tables(analysis, out)
return buf.getvalue()

def get_slot_map_section(self, output: str) -> str:
return output.split(
"const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n",
1,
)[1].split("};\n\n", 1)[0]

def assert_slot_map_lines(self, output: str, *lines: str) -> None:
slot_map_section = self.get_slot_map_section(output)
for line in lines:
self.assertIn(line, slot_map_section)

def test_single_recording_uop_generates_count(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
Expand Down Expand Up @@ -2145,6 +2159,173 @@ def test_four_recording_uops_rejected(self):
with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
self.generate_tables(input)

def test_family_member_needs_transform_only_when_shape_changes(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
RECORD_VALUE(value);
}
tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
RECORD_VALUE(Py_TYPE(value));
}
op(_DO_STUFF, (value -- res)) {
res = value;
}
macro(OP_RAW) = _RECORD_TOS + _DO_STUFF;
macro(OP_RAW_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
family(OP_RAW, INLINE_CACHE_ENTRIES_OP_RAW) = { OP_RAW_SPECIALIZED };

macro(OP_TYPED) = _RECORD_TOS_TYPE + _DO_STUFF;
macro(OP_TYPED_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
family(OP_TYPED, INLINE_CACHE_ENTRIES_OP_TYPED) = { OP_TYPED_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assert_slot_map_lines(
output,
"[OP_RAW] = {1, 1, {0}}",
"[OP_RAW_SPECIALIZED] = {1, 0, {0}}",
"[OP_TYPED] = {1, 0, {0}}",
"[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
)
Comment thread
markshannon marked this conversation as resolved.

def test_family_member_maps_positional_recorders_to_family_slots(self):
input = """
tier2 op(_RECORD_TOS, (sub -- sub)) {
RECORD_VALUE(sub);
}
tier2 op(_RECORD_NOS, (container, sub -- container, sub)) {
RECORD_VALUE(container);
}
op(_DO_STUFF, (container, sub -- res)) {
res = container;
}
macro(OP) = _RECORD_TOS + _RECORD_NOS + _DO_STUFF;
macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assert_slot_map_lines(
output,
"[OP] = {2, 0, {1, 0}}",
"[OP_SPECIALIZED] = {1, 0, {0}}",
)

def test_family_member_maps_non_positional_recorders_by_stack_shape(self):
input = """
tier2 op(_RECORD_CALLABLE, (callable, self, args[oparg] -- callable, self, args[oparg])) {
RECORD_VALUE(callable);
}
tier2 op(_RECORD_BOUND_METHOD, (callable, self, args[oparg] -- callable, self, args[oparg])) {
RECORD_VALUE(callable);
}
op(_DO_STUFF, (callable, self, args[oparg] -- res)) {
res = callable;
}
macro(OP) = _RECORD_CALLABLE + _DO_STUFF;
macro(OP_SPECIALIZED) = _RECORD_BOUND_METHOD + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assert_slot_map_lines(
output,
"[OP] = {1, 1, {0}}",
"[OP_SPECIALIZED] = {1, 0, {0}}",
)

def test_family_head_records_union_of_member_recorders(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
RECORD_VALUE(value);
}
op(_DO_STUFF, (value -- res)) {
res = value;
}
macro(OP) = _DO_STUFF;
macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")

def test_family_detects_base_and_specialized_recording_difference(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
RECORD_VALUE(value);
}
tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
RECORD_VALUE(Py_TYPE(value));
}
op(_DO_STUFF, (value -- res)) {
res = value;
}
macro(OP) = _RECORD_TOS + _DO_STUFF;
macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
analysis = self.analyze_input(input)
output = self.generate_tables(input)
self.assertEqual(
record_function_generator.get_instruction_record_names(
analysis.instructions["OP"]
),
["_RECORD_TOS"],
)
self.assertEqual(
record_function_generator.get_instruction_record_names(
analysis.instructions["OP_SPECIALIZED"]
),
["_RECORD_TOS_TYPE"],
)
self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
self.assert_slot_map_lines(
output,
"[OP] = {1, 1, {0}}",
"[OP_SPECIALIZED] = {1, 0, {0}}",
)

def test_family_head_falls_back_for_missing_member_slots(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
RECORD_VALUE(value);
}
op(_DO_STUFF, (value -- res)) {
res = value;
}
macro(OP) = _RECORD_TOS + _DO_STUFF;
macro(OP_SPECIALIZED) = _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)

def test_family_mixed_slots_only_transform_changed_recorders(self):
input = """
tier2 op(_RECORD_TOS_TYPE, (left, right -- left, right)) {
RECORD_VALUE(Py_TYPE(right));
}
tier2 op(_RECORD_NOS_TYPE, (left, right -- left, right)) {
RECORD_VALUE(Py_TYPE(left));
}
tier2 op(_RECORD_NOS, (left, right -- left, right)) {
RECORD_VALUE(left);
}
op(_DO_STUFF, (left, right -- res)) {
res = left;
}
macro(OP) = _RECORD_TOS_TYPE + _RECORD_NOS_TYPE + _DO_STUFF;
macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
output = self.generate_tables(input)
self.assertIn("[OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}", output)
self.assert_slot_map_lines(
output,
"[OP] = {2, 2, {1, 0}}",
"[OP_SPECIALIZED] = {1, 0, {0}}",
)

class TestGeneratedAbstractCases(unittest.TestCase):
def setUp(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a crash in the JIT optimizer when specialized opcode families inherited incompatible recorded operand layouts.
51 changes: 49 additions & 2 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,44 @@ is_terminator(const _PyUOpInstruction *uop)
);
}

static PyObject *
record_trace_transform_to_type(PyObject *value)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like these to be generated from the recording uops, but that can wait for another PR.

{
PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value));
Py_DECREF(value);
return tp;
}

/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver.
* If it is a generator, return its function object; otherwise return NULL.
*/
static PyObject *
record_trace_transform_gen_func(PyObject *value)
{
PyObject *func = NULL;
if (PyGen_Check(value)) {
_PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj;
if (!PyStackRef_IsNull(f)) {
func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f));
}
}
Py_DECREF(value);
return func;
}

/* _RECORD_BOUND_METHOD records the raw callable.
* Keep it only for bound methods; otherwise return NULL.
*/
static PyObject *
record_trace_transform_bound_method(PyObject *value)
{
if (Py_TYPE(value) == &PyMethod_Type) {
return value;
}
Py_DECREF(value);
return NULL;
}

/* Returns 1 on success (added to trace), 0 on trace end.
*/
// gh-142543: inlining this function causes stack overflows
Expand Down Expand Up @@ -831,6 +869,8 @@ _PyJit_translate_single_bytecode_to_trace(
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
trace->end -= 2;

const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode];

assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
assert(!_PyErr_Occurred(tstate));

Expand Down Expand Up @@ -1027,8 +1067,15 @@ _PyJit_translate_single_bytecode_to_trace(
}
}
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
tracer->prev_state.recorded_values[record_idx] = NULL;
assert(record_idx < record_slot_map->count);
uint8_t record_slot = record_slot_map->slots[record_idx];
assert(record_slot < tracer->prev_state.recorded_count);
PyObject *recorded_value = tracer->prev_state.recorded_values[record_slot];
tracer->prev_state.recorded_values[record_slot] = NULL;
if ((record_slot_map->transform_mask & (1u << record_idx)) &&
recorded_value != NULL) {
recorded_value = _PyOpcode_RecordTransformValue(uop, recorded_value);
}
Comment on lines +1074 to +1078
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot avoid storing the original values in the family record; we can only convert and release them as early as possible during the copy to uop process.

record_idx++;
operand = (uintptr_t)recorded_value;
}
Expand Down
Loading
Loading