Skip to content

Commit b137e4e

Browse files
authored
[mypyc] Speed up native-to-native imports within the same group (#21101)
When compiling multiple modules, mypyc generally creates one big shared library with all the code, and also tiny shim shared libraries for each compiled module so that Python import machinery can find the modules. This is inefficient at least on macOS, since each shared library that is loaded into the process seems to have a non-trivial cost, including each shim. On the first run, this cost is much higher, and the first mypy run after `pip install` can take 30s or more on macOS. This PR addresses the slow imports on macOS by adding a custom implementation of native-to-native imports within the same compilation group that avoids using the shim. We directly construct the module object, populate `sys.modules`, and set an attribute in the parent package, without using Python import machinery. This speeds up a minimal mypy run (`mypy -c 'import os'`) on macOS by up to 10x (first cold run after installation), but even small warm runs are significantly faster. The measurements were all over the place, but at least in one measurement the minimal warm run was over 1.5x faster with these changes. Impact on Linux should be small (an earlier version of this PR was slightly faster on Linux, but didn't measure the current one). I haven't measured the impact on Windows. Some notes about the implementation: * Group similar imported names in `from <...> import` and try to generate a single call to import multiple names to avoid verbose IR. * When importing non-native modules or native modules defined in another group, we still rely on Python import machinery. * Various attributes are implicitly defined by Python when importing a module, and I set these attributes explicitly. * I split module init into two parts, since the attributes mentioned above need to be set before running the module body. * Avoid generating shims for some `__init__.py` files when compiling mypy as a micro-optimization. I used Claude Code and Codex to implement much of the code in small increments (based on a manually written core implementation). I also iterated on the code quite significantly after the basic implementation was done.
1 parent 978b711 commit b137e4e

File tree

12 files changed

+1378
-56
lines changed

12 files changed

+1378
-56
lines changed

mypyc/build.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,12 @@ def construct_groups(
465465
groups = []
466466
used_sources = set()
467467
for files, name in separate:
468-
group_sources = [src for src in sources if src.path in files]
468+
normalized_files = {os.path.normpath(f) for f in files}
469+
group_sources = [
470+
src
471+
for src in sources
472+
if src.path is not None and os.path.normpath(src.path) in normalized_files
473+
]
469474
groups.append((group_sources, name))
470475
used_sources.update(group_sources)
471476
unused_sources = [src for src in sources if src not in used_sources]

mypyc/codegen/emitmodule.py

Lines changed: 75 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,12 @@ def generate_module_def(self, emitter: Emitter, module_name: str, module: Module
991991
self.emit_module_def_slots(emitter, module_prefix, module_name)
992992
self.emit_module_def_struct(emitter, module_name, module_prefix)
993993
self.emit_module_init_func(emitter, module_name, module_prefix)
994+
elif self.use_shared_lib:
995+
# Multi-phase init with shared lib: shims handle PyInit_*, but we
996+
# still need CPyInitOnly_* for same-group native imports, and the
997+
# PyModuleDef struct it depends on.
998+
self.emit_module_def_struct(emitter, module_name, module_prefix)
999+
self.emit_init_only_func(emitter, module_name, module_prefix)
9941000

9951001
def emit_module_def_slots(
9961002
self, emitter: Emitter, module_prefix: str, module_name: str
@@ -1052,8 +1058,12 @@ def emit_module_def_struct(
10521058
f'"{module_name}",',
10531059
"NULL, /* docstring */",
10541060
"0, /* size of per-interpreter state of the module */",
1055-
f"{module_prefix}module_methods,",
10561061
)
1062+
if self.multi_phase_init:
1063+
# Methods are added later via PyModule_AddFunctions in CPyExec_*.
1064+
emitter.emit_line("NULL, /* m_methods */")
1065+
else:
1066+
emitter.emit_line(f"{module_prefix}module_methods,")
10571067
if self.multi_phase_init and not self.use_shared_lib:
10581068
slots_name = f"{module_prefix}_slots"
10591069
emitter.emit_line(f"{slots_name}, /* m_slots */")
@@ -1098,7 +1108,9 @@ def emit_module_exec_func(
10981108
exec function for each module and these will be called by the shims
10991109
via Capsules.
11001110
"""
1101-
declaration = f"int CPyExec_{exported_name(module_name)}(PyObject *module)"
1111+
exec_name = f"CPyExec_{exported_name(module_name)}"
1112+
declaration = f"int {exec_name}(PyObject *module)"
1113+
emitter.context.declarations[exec_name] = HeaderDeclaration(declaration + ";")
11021114
module_static = self.module_internal_static_name(module_name, emitter)
11031115
emitter.emit_lines(declaration, "{")
11041116
emitter.emit_line("intern_strings();")
@@ -1183,30 +1195,57 @@ def emit_module_exec_func(
11831195
emitter.emit_line("return -1;")
11841196
emitter.emit_line("}")
11851197

1198+
def emit_init_only_func(self, emitter: Emitter, module_name: str, module_prefix: str) -> None:
1199+
"""Emit CPyInitOnly_* which creates the module object without executing the body.
1200+
1201+
This allows the caller to set up attributes like __file__ and __package__
1202+
before the module body runs. Used for same-group native imports.
1203+
"""
1204+
init_only_name = f"CPyInitOnly_{exported_name(module_name)}"
1205+
init_only_decl = f"PyObject *{init_only_name}(void)"
1206+
emitter.context.declarations[init_only_name] = HeaderDeclaration(init_only_decl + ";")
1207+
module_static = self.module_internal_static_name(module_name, emitter)
1208+
emitter.emit_lines(init_only_decl, "{")
1209+
emitter.emit_lines(
1210+
f"if ({module_static}) {{",
1211+
f"Py_INCREF({module_static});",
1212+
f"return {module_static};",
1213+
"}",
1214+
)
1215+
emitter.emit_lines(
1216+
f"{module_static} = PyModule_Create(&{module_prefix}module);",
1217+
f"return {module_static};",
1218+
)
1219+
emitter.emit_lines("}")
1220+
emitter.emit_line("")
1221+
11861222
def emit_module_init_func(
11871223
self, emitter: Emitter, module_name: str, module_prefix: str
11881224
) -> None:
11891225
if not self.use_shared_lib:
11901226
declaration = f"PyMODINIT_FUNC PyInit_{module_name}(void)"
11911227
else:
1192-
declaration = f"PyObject *CPyInit_{exported_name(module_name)}(void)"
1193-
emitter.emit_lines(declaration, "{")
1228+
n = f"CPyInit_{exported_name(module_name)}"
1229+
declaration = f"PyObject *{n}(void)"
1230+
emitter.context.declarations[n] = HeaderDeclaration(declaration + ";")
11941231

11951232
if self.multi_phase_init:
1233+
emitter.emit_lines(declaration, "{")
11961234
def_name = f"{module_prefix}module"
11971235
emitter.emit_line(f"return PyModuleDef_Init(&{def_name});")
11981236
emitter.emit_line("}")
11991237
return
12001238

12011239
exec_func = f"CPyExec_{exported_name(module_name)}"
12021240

1203-
# Store the module reference in a static and return it when necessary.
1204-
# This is separate from the *global* reference to the module that will
1205-
# be populated when it is imported by a compiled module. We want that
1206-
# reference to only be populated when the module has been successfully
1207-
# imported, whereas this we want to have to stop a circular import.
1241+
if self.use_shared_lib:
1242+
self.emit_init_only_func(emitter, module_name, module_prefix)
1243+
1244+
# Emit CPyInit_* / PyInit_* which creates the module and executes the body.
1245+
emitter.emit_lines(declaration, "{")
12081246
module_static = self.module_internal_static_name(module_name, emitter)
12091247

1248+
emitter.emit_line("PyObject* modname = NULL;")
12101249
emitter.emit_lines(
12111250
f"if ({module_static}) {{",
12121251
f"Py_INCREF({module_static});",
@@ -1219,9 +1258,35 @@ def emit_module_init_func(
12191258
f"if (unlikely({module_static} == NULL))",
12201259
" goto fail;",
12211260
)
1261+
# Register in sys.modules early so that circular imports via
1262+
# CPyImport_ImportNative can detect that this module is already
1263+
# being initialized and avoid re-executing the module body.
1264+
emitter.emit_line(f'modname = PyUnicode_FromString("{module_name}");')
1265+
emitter.emit_line("if (modname == NULL) CPyError_OutOfMemory();")
1266+
emitter.emit_line(
1267+
f"if (PyObject_SetItem(PyImport_GetModuleDict(), modname, {module_static}) < 0)"
1268+
)
1269+
emitter.emit_line(" goto fail;")
1270+
emitter.emit_line("Py_CLEAR(modname);")
12221271
emitter.emit_lines(f"if ({exec_func}({module_static}) != 0)", " goto fail;")
12231272
emitter.emit_line(f"return {module_static};")
1224-
emitter.emit_lines("fail:", "return NULL;")
1273+
emitter.emit_lines("fail:")
1274+
# Clean up on failure: remove from sys.modules and clear the static
1275+
# so that a subsequent import attempt will retry initialization.
1276+
emitter.emit_line("{")
1277+
emitter.emit_line(" PyObject *exc_type, *exc_val, *exc_tb;")
1278+
emitter.emit_line(" PyErr_Fetch(&exc_type, &exc_val, &exc_tb);")
1279+
emitter.emit_line(" if (modname == NULL) {")
1280+
emitter.emit_line(f' modname = PyUnicode_FromString("{module_name}");')
1281+
emitter.emit_line(" if (modname == NULL) CPyError_OutOfMemory();")
1282+
emitter.emit_line(" }")
1283+
emitter.emit_line(" PyObject_DelItem(PyImport_GetModuleDict(), modname);")
1284+
emitter.emit_line(" PyErr_Clear();")
1285+
emitter.emit_line(" Py_DECREF(modname);")
1286+
emitter.emit_line(f" Py_CLEAR({module_static});")
1287+
emitter.emit_line(" PyErr_Restore(exc_type, exc_val, exc_tb);")
1288+
emitter.emit_line("}")
1289+
emitter.emit_line("return NULL;")
12251290
emitter.emit_lines("}")
12261291

12271292
def generate_top_level_call(self, module: ModuleIR, emitter: Emitter) -> None:

mypyc/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import importlib.machinery
34
import sys
45
import sysconfig
56
from typing import Any, Final
@@ -97,6 +98,11 @@
9798
# we are on Python 3.13 or later.
9899
IS_FREE_THREADED: Final = bool(sysconfig.get_config_var("Py_GIL_DISABLED"))
99100

101+
# The file extension suffix for C extension modules on the current platform
102+
# (e.g. ".cpython-312-x86_64-linux-gnu.so" or ".pyd").
103+
_EXT_SUFFIXES: Final = importlib.machinery.EXTENSION_SUFFIXES
104+
EXT_SUFFIX: Final = _EXT_SUFFIXES[0] if _EXT_SUFFIXES else ".so"
105+
100106

101107
JsonDict = dict[str, Any]
102108

mypyc/irbuild/builder.py

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,15 @@
5858
)
5959
from mypy.util import module_prefix, split_target
6060
from mypy.visitor import ExpressionVisitor, StatementVisitor
61-
from mypyc.common import BITMAP_BITS, GENERATOR_ATTRIBUTE_PREFIX, SELF_NAME, TEMP_ATTR_NAME
61+
from mypyc.common import (
62+
BITMAP_BITS,
63+
EXT_SUFFIX,
64+
GENERATOR_ATTRIBUTE_PREFIX,
65+
MODULE_PREFIX,
66+
SELF_NAME,
67+
TEMP_ATTR_NAME,
68+
shared_lib_name,
69+
)
6270
from mypyc.crash import catch_errors
6371
from mypyc.errors import Errors
6472
from mypyc.ir.class_ir import ClassIR, NonExtClassInfo
@@ -76,6 +84,8 @@
7684
InitStatic,
7785
Integer,
7886
IntOp,
87+
LoadAddress,
88+
LoadGlobal,
7989
LoadStatic,
8090
MethodCall,
8191
Op,
@@ -96,6 +106,7 @@
96106
bitmap_rprimitive,
97107
bool_rprimitive,
98108
bytes_rprimitive,
109+
c_pointer_rprimitive,
99110
c_pyssize_t_rprimitive,
100111
dict_rprimitive,
101112
int_rprimitive,
@@ -106,6 +117,7 @@
106117
is_tagged,
107118
is_tuple_rprimitive,
108119
none_rprimitive,
120+
object_pointer_rprimitive,
109121
object_rprimitive,
110122
str_rprimitive,
111123
)
@@ -130,11 +142,17 @@
130142
)
131143
from mypyc.irbuild.util import bytes_from_str, is_constant
132144
from mypyc.irbuild.vec import vec_set_item
145+
from mypyc.namegen import exported_name
133146
from mypyc.options import CompilerOptions
134147
from mypyc.primitives.dict_ops import dict_get_item_op, dict_set_item_op
135148
from mypyc.primitives.generic_ops import iter_op, next_op, py_setattr_op
136149
from mypyc.primitives.list_ops import list_get_item_unsafe_op, list_pop_last, to_list
137-
from mypyc.primitives.misc_ops import check_unpack_count_op, get_module_dict_op, import_op
150+
from mypyc.primitives.misc_ops import (
151+
check_unpack_count_op,
152+
get_module_dict_op,
153+
import_op,
154+
native_import_op,
155+
)
138156
from mypyc.primitives.registry import CFunctionDescription, function_ops
139157
from mypyc.primitives.tuple_ops import tuple_get_item_unsafe_op
140158

@@ -461,15 +479,56 @@ def add_to_non_ext_dict(
461479
# doesn't cause contention.
462480
self.builder.set_immortal_if_free_threaded(val, line)
463481

464-
def gen_import(self, id: str, line: int) -> None:
465-
self.imports[id] = None
482+
def gen_import(self, module: str, line: int) -> None:
483+
self.imports[module] = None
466484

467485
needs_import, out = BasicBlock(), BasicBlock()
468-
self.check_if_module_loaded(id, line, needs_import, out)
486+
self.check_if_module_loaded(module, line, needs_import, out)
469487

470488
self.activate_block(needs_import)
471-
value = self.call_c(import_op, [self.load_str(id, line)], line)
472-
self.add(InitStatic(value, id, namespace=NAMESPACE_MODULE))
489+
if self.is_native_module(module) and self.is_same_group_module(module):
490+
# Use custom import machinery for native-to-native imports in the same group
491+
init_only_func = self.add(
492+
LoadGlobal(c_pointer_rprimitive, f"CPyInitOnly_{exported_name(module)}")
493+
)
494+
exec_func = self.add(
495+
LoadGlobal(c_pointer_rprimitive, f"CPyExec_{exported_name(module)}")
496+
)
497+
module_static = self.add(
498+
LoadAddress(
499+
object_pointer_rprimitive,
500+
f"{MODULE_PREFIX}{exported_name(module + '__internal')}",
501+
)
502+
)
503+
group_name = self.mapper.group_map.get(self.module_name)
504+
if group_name is not None:
505+
shared_lib_mod_name = shared_lib_name(group_name)
506+
mod_dict = self.call_c(get_module_dict_op, [], line)
507+
shared_lib_obj = self.primitive_op(
508+
dict_get_item_op, [mod_dict, self.load_str(shared_lib_mod_name, line)], line
509+
)
510+
shared_lib_file = self.py_get_attr(shared_lib_obj, "__file__", line)
511+
else:
512+
shared_lib_file = self.none_object(line)
513+
ext_suffix = self.load_str(EXT_SUFFIX, line)
514+
is_pkg = self.is_package_module(module)
515+
value = self.call_c(
516+
native_import_op,
517+
[
518+
self.load_str(module, line),
519+
init_only_func,
520+
exec_func,
521+
module_static,
522+
shared_lib_file,
523+
ext_suffix,
524+
Integer(1 if is_pkg else 0, c_pyssize_t_rprimitive),
525+
],
526+
line,
527+
)
528+
else:
529+
# Import using generic Python C API
530+
value = self.call_c(import_op, [self.load_str(module, line)], line)
531+
self.add(InitStatic(value, module, namespace=NAMESPACE_MODULE))
473532
self.goto_and_activate(out)
474533

475534
def check_if_module_loaded(
@@ -1099,6 +1158,20 @@ def is_native_module(self, module: str) -> bool:
10991158
"""Is the given module one compiled by mypyc?"""
11001159
return self.mapper.is_native_module(module)
11011160

1161+
def is_same_group_module(self, module: str) -> bool:
1162+
"""Is the given module in the same compilation group as the current module?
1163+
1164+
Modules in the same group share a compiled C extension and can reference
1165+
each other's C-level symbols directly. Modules in separate groups (separate
1166+
compilation mode) must use the Python import system instead.
1167+
"""
1168+
return self.mapper.group_map.get(module) == self.mapper.group_map.get(self.module_name)
1169+
1170+
def is_package_module(self, module: str) -> bool:
1171+
"""Is the given module a package (i.e., an __init__.py file)?"""
1172+
st = self.graph.get(module)
1173+
return st is not None and st.tree is not None and st.tree.is_package_init_file()
1174+
11021175
def is_native_ref_expr(self, expr: RefExpr) -> bool:
11031176
return self.mapper.is_native_ref_expr(expr)
11041177

0 commit comments

Comments
 (0)