Skip to content

Commit d0b5a5a

Browse files
[update_lib] fast date lookup for todo (#7299)
* [update_lib] fast date lookup for todo * add deps * Auto-format: ruff format --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent ca390dc commit d0b5a5a

File tree

2 files changed

+99
-32
lines changed

2 files changed

+99
-32
lines changed

scripts/update_lib/cmd_todo.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -597,17 +597,16 @@ def format_all_todo(
597597
else get_module_diff_stat(item["name"], cpython_prefix, lib_prefix)
598598
)
599599

600-
# Add last_updated to displayed test items (verbose only - slow)
601-
if verbose:
602-
for tests in test_by_lib.values():
603-
for test in tests:
604-
test["last_updated"] = get_test_last_updated(
605-
test["name"], cpython_prefix, lib_prefix
606-
)
607-
for test in no_lib_tests:
600+
# Add last_updated to displayed test items
601+
for tests in test_by_lib.values():
602+
for test in tests:
608603
test["last_updated"] = get_test_last_updated(
609604
test["name"], cpython_prefix, lib_prefix
610605
)
606+
for test in no_lib_tests:
607+
test["last_updated"] = get_test_last_updated(
608+
test["name"], cpython_prefix, lib_prefix
609+
)
611610

612611
# Format lib todo with embedded tests
613612
lines.extend(format_todo_list(lib_todo, test_by_lib, limit, verbose))

scripts/update_lib/deps.py

Lines changed: 92 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ def clear_import_graph_caches() -> None:
289289
"test_dictcomps.py",
290290
"test_dictviews.py",
291291
"test_userdict.py",
292+
"mapping_tests.py",
292293
],
293294
},
294295
"list": {
@@ -326,6 +327,10 @@ def clear_import_graph_caches() -> None:
326327
},
327328
"re": {
328329
"hard_deps": ["sre_compile.py", "sre_constants.py", "sre_parse.py"],
330+
"test": [
331+
"test_re.py",
332+
"re_tests.py",
333+
],
329334
},
330335
"weakref": {
331336
"hard_deps": ["_weakrefset.py"],
@@ -496,6 +501,8 @@ def clear_import_graph_caches() -> None:
496501
"test_syslog.py",
497502
"test_sys_setprofile.py",
498503
"test_sys_settrace.py",
504+
"test_audit.py",
505+
"audit-tests.py",
499506
],
500507
},
501508
"str": {
@@ -708,6 +715,12 @@ def clear_import_graph_caches() -> None:
708715
"_test_eintr.py",
709716
]
710717
},
718+
"curses": {
719+
"test": [
720+
"test_curses.py",
721+
"curses_tests.py",
722+
],
723+
},
711724
}
712725

713726

@@ -1109,6 +1122,83 @@ def _count_path_diff(path_a: pathlib.Path, path_b: pathlib.Path) -> int:
11091122
return 0
11101123

11111124

1125+
@functools.cache
1126+
def _bulk_last_updated() -> dict[str, str]:
1127+
"""Get last git commit dates for all paths under Lib/ in one git call.
1128+
1129+
Keys are Lib/-relative paths (e.g. "re/__init__.py", "test/test_os.py",
1130+
"os.py"), plus directory rollups (e.g. "re", "test/test_zoneinfo").
1131+
1132+
Returns:
1133+
Dict mapping Lib/-relative path to date string.
1134+
"""
1135+
file_map: dict[str, str] = {}
1136+
try:
1137+
result = subprocess.run(
1138+
["git", "log", "--format=%cd", "--date=short", "--name-only", "--", "Lib/"],
1139+
capture_output=True,
1140+
text=True,
1141+
timeout=30,
1142+
)
1143+
if result.returncode != 0:
1144+
return file_map
1145+
except Exception:
1146+
return file_map
1147+
1148+
current_date = None
1149+
for line in result.stdout.splitlines():
1150+
line = line.strip()
1151+
if not line:
1152+
continue
1153+
# Date lines are YYYY-MM-DD format
1154+
if len(line) == 10 and line[4] == "-" and line[7] == "-":
1155+
current_date = line
1156+
elif current_date and line.startswith("Lib/"):
1157+
# Strip "Lib/" prefix to get Lib-relative key
1158+
rel = line[4:]
1159+
if rel and rel not in file_map:
1160+
file_map[rel] = current_date
1161+
1162+
# Pre-compute directory rollups
1163+
dir_map: dict[str, str] = {}
1164+
for filepath, date in file_map.items():
1165+
parts = filepath.split("/")
1166+
for i in range(1, len(parts)):
1167+
dirpath = "/".join(parts[:i])
1168+
if dirpath not in dir_map or date > dir_map[dirpath]:
1169+
dir_map[dirpath] = date
1170+
1171+
dir_map.update(file_map)
1172+
return dir_map
1173+
1174+
1175+
@functools.cache
1176+
def _lib_prefix_stripped(lib_prefix: str) -> str:
1177+
"""Get the normalized prefix to strip from paths, with trailing /."""
1178+
# e.g. "Lib" -> "Lib/", "./Lib" -> "Lib/", "../Lib" -> "../Lib/"
1179+
return pathlib.Path(lib_prefix).as_posix().rstrip("/") + "/"
1180+
1181+
1182+
def _lookup_last_updated(paths: list[str], lib_prefix: str) -> str | None:
1183+
"""Look up the most recent date among paths from the bulk cache."""
1184+
cache = _bulk_last_updated()
1185+
prefix = _lib_prefix_stripped(lib_prefix)
1186+
latest = None
1187+
for p in paths:
1188+
p_norm = pathlib.Path(p).as_posix()
1189+
# Strip lib_prefix to get Lib-relative key
1190+
# e.g. "Lib/test/test_os.py" -> "test/test_os.py"
1191+
# "../Lib/re" -> "re"
1192+
if p_norm.startswith(prefix):
1193+
key = p_norm[len(prefix) :]
1194+
else:
1195+
key = p_norm
1196+
date = cache.get(key)
1197+
if date and (latest is None or date > latest):
1198+
latest = date
1199+
return latest
1200+
1201+
11121202
def get_module_last_updated(
11131203
name: str, cpython_prefix: str, lib_prefix: str
11141204
) -> str | None:
@@ -1126,18 +1216,7 @@ def get_module_last_updated(
11261216
continue
11271217
if not local_paths:
11281218
return None
1129-
try:
1130-
result = subprocess.run(
1131-
["git", "log", "-1", "--format=%cd", "--date=short", "--"] + local_paths,
1132-
capture_output=True,
1133-
text=True,
1134-
timeout=10,
1135-
)
1136-
if result.returncode == 0 and result.stdout.strip():
1137-
return result.stdout.strip()
1138-
except Exception:
1139-
pass
1140-
return None
1219+
return _lookup_last_updated(local_paths, lib_prefix)
11411220

11421221

11431222
def get_module_diff_stat(name: str, cpython_prefix: str, lib_prefix: str) -> int:
@@ -1167,18 +1246,7 @@ def get_test_last_updated(
11671246
local_path = _get_local_test_path(cpython_path, lib_prefix)
11681247
if not local_path.exists():
11691248
return None
1170-
try:
1171-
result = subprocess.run(
1172-
["git", "log", "-1", "--format=%cd", "--date=short", "--", str(local_path)],
1173-
capture_output=True,
1174-
text=True,
1175-
timeout=10,
1176-
)
1177-
if result.returncode == 0 and result.stdout.strip():
1178-
return result.stdout.strip()
1179-
except Exception:
1180-
pass
1181-
return None
1249+
return _lookup_last_updated([str(local_path)], lib_prefix)
11821250

11831251

11841252
def get_test_dependencies(

0 commit comments

Comments
 (0)