Skip to content

Commit 7551b65

Browse files
inlining decision pass
1 parent f165138 commit 7551b65

File tree

1 file changed

+57
-41
lines changed

1 file changed

+57
-41
lines changed

Python/optimizer_analysis.c

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -211,46 +211,6 @@ abstractcontext_init(
211211
return 0;
212212
}
213213

214-
static int
215-
frame_is_inlineable(_Py_UOpsAbstractInterpContext *ctx,
216-
_Py_UOpsAbstractFrame *frame)
217-
{
218-
if (frame->push_frame == NULL || frame->pop_frame == NULL) {
219-
return 0;
220-
}
221-
PyFunctionObject *obj = (PyFunctionObject *)frame->push_frame->operand;
222-
if (obj == NULL) {
223-
return 0;
224-
}
225-
PyCodeObject *co = obj->func_code;
226-
if (co == NULL) {
227-
return 0;
228-
}
229-
// Ban closures
230-
if (co->co_ncellvars > 0 || co->co_nfreevars > 0) {
231-
DPRINTF(3, "inline_fail: closure\n");
232-
return 0;
233-
}
234-
// Ban generators, async, etc.
235-
int flags = co->co_flags;
236-
if ((flags & CO_COROUTINE) ||
237-
(flags & CO_GENERATOR) ||
238-
(flags & CO_ITERABLE_COROUTINE) ||
239-
(flags & CO_ASYNC_GENERATOR) ||
240-
// TODO we can support these in the future.
241-
(flags & CO_VARKEYWORDS) ||
242-
(flags & CO_VARARGS)) {
243-
DPRINTF(3, "inline_fail: generator/coroutine\n");
244-
return 0;
245-
}
246-
// Somewhat arbitrary, but if the stack is too big, we will copy a lot
247-
// more on deopt, making it not really worth it.
248-
if (co->co_stacksize > 32 || co->co_nlocalsplus > 32) {
249-
return 0;
250-
}
251-
return 1;
252-
}
253-
254214
static int
255215
ctx_frame_pop(
256216
_Py_UOpsAbstractInterpContext *ctx
@@ -777,9 +737,47 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
777737
}
778738
}
779739

740+
static int
741+
function_decide_inlineable(PyFunctionObject *func)
742+
{
743+
if (func == NULL) {
744+
return 0;
745+
}
746+
PyCodeObject *co = func->func_code;
747+
if (co == NULL) {
748+
return 0;
749+
}
750+
// Ban closures
751+
if (co->co_ncellvars > 0 || co->co_nfreevars > 0) {
752+
DPRINTF(2, "inline_fail: closure\n");
753+
return 0;
754+
}
755+
// Ban generators, async, etc.
756+
int flags = co->co_flags;
757+
if ((flags & CO_COROUTINE) ||
758+
(flags & CO_GENERATOR) ||
759+
(flags & CO_ITERABLE_COROUTINE) ||
760+
(flags & CO_ASYNC_GENERATOR) ||
761+
// TODO we can support these in the future.
762+
(flags & CO_VARKEYWORDS) ||
763+
(flags & CO_VARARGS)) {
764+
DPRINTF(2, "inline_fail: generator/coroutine/varargs/varkeywords\n");
765+
return 0;
766+
}
767+
// Somewhat arbitrary, but if the stack is too big, we will copy a lot
768+
// more on deopt, making it not really worth it.
769+
if (co->co_stacksize > 64) {
770+
DPRINTF(2, "inline_fail: stack too big");
771+
return 0;
772+
}
773+
return 1;
774+
}
775+
780776
static void
781777
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
782778
{
779+
_PyUOpInstruction *push_frame[MAX_ABSTRACT_FRAME_DEPTH];
780+
int frame_depth = 1;
783781
PyCodeObject *co = (PyCodeObject *)frame->f_executable;
784782
for (int pc = 0; pc < buffer_size; pc++) {
785783
int opcode = buffer[pc].opcode;
@@ -800,7 +798,20 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
800798
}
801799
break;
802800
}
803-
case _PUSH_FRAME:
801+
case _PUSH_FRAME: {
802+
push_frame[frame_depth] = &buffer[pc];
803+
frame_depth++;
804+
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
805+
if (func == NULL) {
806+
co = NULL;
807+
}
808+
else {
809+
assert(PyFunction_Check(func));
810+
co = (PyCodeObject *)func->func_code;
811+
}
812+
assert(frame_depth <= MAX_ABSTRACT_FRAME_DEPTH);
813+
break;
814+
}
804815
case _POP_FRAME:
805816
{
806817
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
@@ -811,6 +822,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
811822
assert(PyFunction_Check(func));
812823
co = (PyCodeObject *)func->func_code;
813824
}
825+
frame_depth--;
826+
function_decide_inlineable(func);
827+
assert(frame_depth >= 1);
814828
break;
815829
}
816830
case _JUMP_TO_TOP:
@@ -820,6 +834,8 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
820834
}
821835
}
822836

837+
838+
823839
// 0 - failure, no error raised, just fall back to Tier 1
824840
// -1 - failure, and raise error
825841
// 1 - optimizer success

0 commit comments

Comments
 (0)