Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions Include/internal/pycore_stackref.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,32 @@ _PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumb
}
#define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__)

/* Tag a PyObject pointer as a borrowed operand for BORROW variants. */
static inline uintptr_t
PyStackRef_TagBorrow(PyObject *obj)
{
return (uintptr_t)obj | Py_TAG_REFCNT;
}

/* Strip tag bits from a pre-tagged operand to recover the PyObject pointer. */
static inline PyObject *
PyStackRef_UntagBorrow(uintptr_t tagged)
{
return (PyObject *)(tagged & ~Py_TAG_BITS);
}

/* Create a stackref from a pre-tagged operand (tag bits already set).
Used by _LOAD_CONST_INLINE_BORROW variants where the operand is
tagged at trace creation time to avoid tagging on every execution. */
static inline _PyStackRef
_PyStackRef_FromPreTagged(uintptr_t tagged, const char *filename, int linenumber)
{
assert(tagged & Py_TAG_REFCNT);
PyObject *obj = (PyObject *)(tagged & ~Py_TAG_BITS);
return _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber);
}
#define PyStackRef_FromPreTagged(tagged) _PyStackRef_FromPreTagged((tagged), __FILE__, __LINE__)

static inline void
_PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber)
{
Expand Down Expand Up @@ -617,6 +643,30 @@ PyStackRef_FromPyObjectBorrow(PyObject *obj)
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT};
}

/* Tag a PyObject pointer as a borrowed operand for BORROW variants. */
static inline uintptr_t
PyStackRef_TagBorrow(PyObject *obj)
{
return (uintptr_t)obj | Py_TAG_REFCNT;
}

/* Strip tag bits from a pre-tagged operand to recover the PyObject pointer. */
static inline PyObject *
PyStackRef_UntagBorrow(uintptr_t tagged)
{
return (PyObject *)(tagged & ~Py_TAG_BITS);
}

/* Create a stackref from a pre-tagged operand (tag bits already set).
Used by _LOAD_CONST_INLINE_BORROW variants where the operand is
tagged at trace creation time to avoid tagging on every execution. */
static inline _PyStackRef
PyStackRef_FromPreTagged(uintptr_t tagged)
{
assert(tagged & Py_TAG_REFCNT);
return (_PyStackRef){ .bits = tagged };
}

/* WARNING: This macro evaluates its argument more than once */
#ifdef _WIN32
#define PyStackRef_DUP(REF) \
Expand Down
11 changes: 11 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions Lib/test/test_generated_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,34 @@ def test_cache_effect(self):
"""
self.run_cases_test(input, output)

def test_pretagged_cache_effect(self):
input = """
inst(OP, (ptr/4^, value --)) {
DEAD(value);
}
"""
output = """
TARGET(OP) {
#if _Py_TAIL_CALL_INTERP
int opcode = OP;
(void)(opcode);
#endif
_Py_CODEUNIT* const this_instr = next_instr;
(void)this_instr;
frame->instr_ptr = next_instr;
next_instr += 5;
INSTRUCTION_STATS(OP);
_PyStackRef value;
value = stack_pointer[-1];
uintptr_t ptr = read_u64(&this_instr[1].cache);
(void)ptr;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
DISPATCH();
}
"""
self.run_cases_test(input, output)

def test_suppress_dispatch(self):
input = """
label(somewhere) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Optimize _LOAD_CONST_INLINE_BORROW by pre-tagging operands at trace
creation. Patch by Donghee Na.
4 changes: 2 additions & 2 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -6103,8 +6103,8 @@ dummy_func(
value = PyStackRef_FromPyObjectNew(ptr);
}

tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
value = PyStackRef_FromPyObjectBorrow(ptr);
tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4^ -- value)) {
value = PyStackRef_FromPreTagged(ptr);
}

tier2 op(_START_EXECUTOR, (executor/4 --)) {
Expand Down
12 changes: 6 additions & 6 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 3 additions & 7 deletions Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,8 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
if (res == NULL) {
return NULL;
}
if (_Py_IsImmortal(res)) {
inst->opcode = _LOAD_CONST_INLINE_BORROW;
} else {
inst->opcode = _LOAD_CONST_INLINE;
}
inst->operand0 = (uint64_t)res;
inst->opcode = _Py_IsImmortal(res) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
inst->operand0 = _PyUop_PrepareOperand0(inst->opcode, (uint64_t)res);
return res;
}

Expand Down Expand Up @@ -233,7 +229,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
out->format = this_instr->format;
out->oparg = (oparg);
out->target = this_instr->target;
out->operand0 = (operand0);
out->operand0 = _PyUop_PrepareOperand0(opcode, (uint64_t)operand0);
out->operand1 = this_instr->operand1;
#ifdef Py_STATS
out->fitness = this_instr->fitness;
Expand Down
4 changes: 2 additions & 2 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,8 +891,8 @@ dummy_func(void) {
value = sym_new_const(ctx, ptr);
}

op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
op(_LOAD_CONST_INLINE_BORROW, (ptr/4^ -- value)) {
value = PyJitRef_Borrow(sym_new_const(ctx, PyStackRef_UntagBorrow(ptr)));
}

op(_POP_TOP_OPARG, (args[oparg] --)) {
Expand Down
4 changes: 2 additions & 2 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 21 additions & 3 deletions Tools/cases_generator/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,21 @@ def __str__(self) -> str:
class CacheEntry:
name: str
size: int
pretagged: bool = False

@classmethod
def from_parsed(cls, effect: parser.CacheEffect) -> "CacheEntry":
if effect.pretagged and effect.size != 4:
raise analysis_error(
f"'^' (pretagged) marker requires size /4, "
f"got /{effect.size}",
effect.tokens[0],
)
return cls(effect.name, effect.size, effect.pretagged)

def __str__(self) -> str:
return f"{self.name}/{self.size}"
suffix = "^" if self.pretagged else ""
return f"{self.name}/{self.size}{suffix}"


@dataclass
Expand Down Expand Up @@ -433,7 +445,7 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
position = "First" if index == 0 else "Last"
msg = f"{position} cache entry in op is unused. Move to enclosing macro."
raise analysis_error(msg, cache.tokens[0])
return [CacheEntry(i.name, int(i.size)) for i in caches]
return [CacheEntry.from_parsed(i) for i in caches]


def find_variable_stores(node: parser.InstDef) -> list[lexer.Token]:
Expand Down Expand Up @@ -600,6 +612,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool:
"PyStackRef_CLOSE_SPECIALIZED",
"PyStackRef_DUP",
"PyStackRef_False",
"PyStackRef_FromPreTagged",
"PyStackRef_FromPyObjectBorrow",
"PyStackRef_FromPyObjectNew",
"PyStackRef_FromPyObjectSteal",
Expand Down Expand Up @@ -766,7 +779,7 @@ def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[SimpleStmt, Esca
continue
#if not tkn.text.startswith(("Py", "_Py", "monitor")):
# continue
if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")):
if tkn.text.startswith(("sym_", "optimize_", "PyJitRef", "PyStackRef_Tag", "PyStackRef_Untag")):
# Optimize functions
continue
if tkn.text.endswith("Check"):
Expand Down Expand Up @@ -1111,6 +1124,11 @@ def desugar_inst(
# Move unused cache entries to the Instruction, removing them from the Uop.
for input in inst.inputs:
if isinstance(input, parser.CacheEffect) and input.name == "unused":
if input.pretagged:
raise analysis_error(
"'unused' cache slot cannot carry a '^' marker",
input.tokens[0],
)
parts.append(Skip(input.size))
else:
op_inputs.append(input)
Expand Down
15 changes: 14 additions & 1 deletion Tools/cases_generator/interpreter_definition.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ and a piece of C code describing its semantics:
NAME ["*"]

stream:
NAME "/" size
NAME "/" size ["^"]

size:
INTEGER
Expand Down Expand Up @@ -162,6 +162,10 @@ instruction stream. It returns a 16, 32 or 64 bit value.
If the name is `unused` the size can be any value and that many codeunits
will be skipped in the instruction stream.

A `/4` slot trailed by `^` is read as raw `uintptr_t` instead of `PyObject *`,
indicating its bits are already tagged as a borrowed `_PyStackRef`. Only
allowed on `/4` and not on `unused`.

By convention cache effects (`stream`) must precede the input effects.

The name `oparg` is pre-defined as a 32 bit value fetched from the instruction stream.
Expand Down Expand Up @@ -313,6 +317,15 @@ This might become (if it was an instruction):
}
```

### Pre-tagged cache effect
```C
op ( LOAD_CONST_INLINE_BORROW, (ptr/4^ -- value) ) {
value = PyStackRef_FromPreTagged(ptr);
}
```
The `^` marks `ptr` as a pre-tagged borrowed `_PyStackRef`; it is declared
`uintptr_t` and read with `read_u64` rather than `read_obj`.

### More examples

For explanations see "Generating the interpreter" below.
Expand Down
5 changes: 4 additions & 1 deletion Tools/cases_generator/optimizer_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,10 @@ def write_uop(
idx = 0
for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
if cache.pretagged:
type = "uintptr_t "
cast = "uintptr_t"
elif cache.size == 4:
type = cast = "PyObject *"
else:
type = f"uint{cache.size*16}_t "
Expand Down
9 changes: 6 additions & 3 deletions Tools/cases_generator/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ class Expression(Node):
class CacheEffect(Node):
name: str
size: int
pretagged: bool = False


@dataclass
Expand Down Expand Up @@ -449,16 +450,18 @@ def output(self) -> OutputEffect | None:

@contextual
def cache_effect(self) -> CacheEffect | None:
# IDENTIFIER '/' NUMBER
# IDENTIFIER '/' NUMBER ['^']
# The optional '^' marks the slot's bits as a pre-tagged _PyStackRef
# (see _PyPreTaggedRef in pycore_stackref.h).
if tkn := self.expect(lx.IDENTIFIER):
if self.expect(lx.DIVIDE):
num = self.require(lx.NUMBER).text
try:
size = int(num)
except ValueError:
raise self.make_syntax_error(f"Expected integer, got {num!r}")
else:
return CacheEffect(tkn.text, size)
pretagged = bool(self.expect(lx.XOR))
return CacheEffect(tkn.text, size, pretagged)
return None

@contextual
Expand Down
7 changes: 6 additions & 1 deletion Tools/cases_generator/tier1_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,12 @@ def write_uop(

for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
if cache.pretagged:
# Read raw bits; read_obj would falsely declare PyObject* for
# what is actually a tagged uintptr_t.
type = "uintptr_t "
reader = "read_u64"
elif cache.size == 4:
type = "PyObject *"
reader = "read_obj"
else:
Expand Down
5 changes: 4 additions & 1 deletion Tools/cases_generator/tier2_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,10 @@ def write_uop(uop: Uop, emitter: Tier2Emitter, stack: Stack, cached_items: int =
for cache in uop.caches:
if cache.name != "unused":
bits = cache.size*16
if cache.size == 4:
if cache.pretagged:
type = "uintptr_t "
cast = "uintptr_t"
elif cache.size == 4:
type = cast = "PyObject *"
else:
type = f"uint{bits}_t "
Expand Down
Loading
Loading