Skip to content

Commit f36614a

Browse files
committed
[update_lib] fast date lookup for todo
1 parent 5eadae8 commit f36614a

File tree

2 files changed

+86
-32
lines changed

2 files changed

+86
-32
lines changed

scripts/update_lib/cmd_todo.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -597,17 +597,16 @@ def format_all_todo(
597597
else get_module_diff_stat(item["name"], cpython_prefix, lib_prefix)
598598
)
599599

600-
# Add last_updated to displayed test items (verbose only - slow)
601-
if verbose:
602-
for tests in test_by_lib.values():
603-
for test in tests:
604-
test["last_updated"] = get_test_last_updated(
605-
test["name"], cpython_prefix, lib_prefix
606-
)
607-
for test in no_lib_tests:
600+
# Add last_updated to displayed test items
601+
for tests in test_by_lib.values():
602+
for test in tests:
608603
test["last_updated"] = get_test_last_updated(
609604
test["name"], cpython_prefix, lib_prefix
610605
)
606+
for test in no_lib_tests:
607+
test["last_updated"] = get_test_last_updated(
608+
test["name"], cpython_prefix, lib_prefix
609+
)
611610

612611
# Format lib todo with embedded tests
613612
lines.extend(format_todo_list(lib_todo, test_by_lib, limit, verbose))

scripts/update_lib/deps.py

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,83 @@ def _count_path_diff(path_a: pathlib.Path, path_b: pathlib.Path) -> int:
11091109
return 0
11101110

11111111

1112+
@functools.cache
1113+
def _bulk_last_updated() -> dict[str, str]:
1114+
"""Get last git commit dates for all paths under Lib/ in one git call.
1115+
1116+
Keys are Lib/-relative paths (e.g. "re/__init__.py", "test/test_os.py",
1117+
"os.py"), plus directory rollups (e.g. "re", "test/test_zoneinfo").
1118+
1119+
Returns:
1120+
Dict mapping Lib/-relative path to date string.
1121+
"""
1122+
file_map: dict[str, str] = {}
1123+
try:
1124+
result = subprocess.run(
1125+
["git", "log", "--format=%cd", "--date=short", "--name-only", "--",
1126+
"Lib/"],
1127+
capture_output=True,
1128+
text=True,
1129+
timeout=30,
1130+
)
1131+
if result.returncode != 0:
1132+
return file_map
1133+
except Exception:
1134+
return file_map
1135+
1136+
current_date = None
1137+
for line in result.stdout.splitlines():
1138+
line = line.strip()
1139+
if not line:
1140+
continue
1141+
# Date lines are YYYY-MM-DD format
1142+
if len(line) == 10 and line[4] == "-" and line[7] == "-":
1143+
current_date = line
1144+
elif current_date and line.startswith("Lib/"):
1145+
# Strip "Lib/" prefix to get Lib-relative key
1146+
rel = line[4:]
1147+
if rel and rel not in file_map:
1148+
file_map[rel] = current_date
1149+
1150+
# Pre-compute directory rollups
1151+
dir_map: dict[str, str] = {}
1152+
for filepath, date in file_map.items():
1153+
parts = filepath.split("/")
1154+
for i in range(1, len(parts)):
1155+
dirpath = "/".join(parts[:i])
1156+
if dirpath not in dir_map or date > dir_map[dirpath]:
1157+
dir_map[dirpath] = date
1158+
1159+
dir_map.update(file_map)
1160+
return dir_map
1161+
1162+
1163+
@functools.cache
1164+
def _lib_prefix_stripped(lib_prefix: str) -> str:
1165+
"""Get the normalized prefix to strip from paths, with trailing /."""
1166+
# e.g. "Lib" -> "Lib/", "../Lib" -> "../Lib/"
1167+
return lib_prefix.rstrip("/") + "/"
1168+
1169+
1170+
def _lookup_last_updated(paths: list[str], lib_prefix: str) -> str | None:
1171+
"""Look up the most recent date among paths from the bulk cache."""
1172+
cache = _bulk_last_updated()
1173+
prefix = _lib_prefix_stripped(lib_prefix)
1174+
latest = None
1175+
for p in paths:
1176+
# Strip lib_prefix to get Lib-relative key
1177+
# e.g. "Lib/test/test_os.py" -> "test/test_os.py"
1178+
# "../Lib/re" -> "re"
1179+
if p.startswith(prefix):
1180+
key = p[len(prefix):]
1181+
else:
1182+
key = p
1183+
date = cache.get(key)
1184+
if date and (latest is None or date > latest):
1185+
latest = date
1186+
return latest
1187+
1188+
11121189
def get_module_last_updated(
11131190
name: str, cpython_prefix: str, lib_prefix: str
11141191
) -> str | None:
@@ -1126,18 +1203,7 @@ def get_module_last_updated(
11261203
continue
11271204
if not local_paths:
11281205
return None
1129-
try:
1130-
result = subprocess.run(
1131-
["git", "log", "-1", "--format=%cd", "--date=short", "--"] + local_paths,
1132-
capture_output=True,
1133-
text=True,
1134-
timeout=10,
1135-
)
1136-
if result.returncode == 0 and result.stdout.strip():
1137-
return result.stdout.strip()
1138-
except Exception:
1139-
pass
1140-
return None
1206+
return _lookup_last_updated(local_paths, lib_prefix)
11411207

11421208

11431209
def get_module_diff_stat(name: str, cpython_prefix: str, lib_prefix: str) -> int:
@@ -1167,18 +1233,7 @@ def get_test_last_updated(
11671233
local_path = _get_local_test_path(cpython_path, lib_prefix)
11681234
if not local_path.exists():
11691235
return None
1170-
try:
1171-
result = subprocess.run(
1172-
["git", "log", "-1", "--format=%cd", "--date=short", "--", str(local_path)],
1173-
capture_output=True,
1174-
text=True,
1175-
timeout=10,
1176-
)
1177-
if result.returncode == 0 and result.stdout.strip():
1178-
return result.stdout.strip()
1179-
except Exception:
1180-
pass
1181-
return None
1236+
return _lookup_last_updated([str(local_path)], lib_prefix)
11821237

11831238

11841239
def get_test_dependencies(

0 commit comments

Comments
 (0)