Skip to content

Commit 10361b1

Browse files
authored
Merge branch 'master' into jessica.gamio/llmobs-session-id-mlos-646
2 parents 8fa5686 + 7270bfd commit 10361b1

206 files changed

Lines changed: 7935 additions & 3643 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/dependency_age.py

Lines changed: 228 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import os
66
import re
77
import sys
8+
import urllib.error
89
import urllib.parse
910
import urllib.request
11+
from email.utils import parsedate_to_datetime
1012
from dataclasses import dataclass
1113
from datetime import datetime, timedelta, timezone
1214
from pathlib import Path
@@ -15,10 +17,10 @@
1517

1618
GRADLE_VERSIONS_URL = "https://services.gradle.org/versions/all"
1719
MAVEN_SEARCH_URL = "https://search.maven.org/solrsearch/select"
20+
MAVEN_REPO_URL = "https://repo1.maven.org/maven2"
1821
DEFAULT_MIN_AGE_HOURS = 48
1922

2023

21-
2224
@dataclass(frozen=True)
2325
class Candidate:
2426
version: str
@@ -28,6 +30,7 @@ class Candidate:
2830
# Entry point for GitHub Actions workflows
2931
# select-gradle: get newest Gradle release that is at least MIN_DEPENDENCY_AGE_HOURS hours old
3032
# select-maven: get newest Maven artifact release that is at least MIN_DEPENDENCY_AGE_HOURS hours old
33+
# validate-lockfiles: check that each new coordinate in the Gradle lockfiles is at least MIN_DEPENDENCY_AGE_HOURS hours old
3134
def parse_args() -> argparse.Namespace:
3235
parser = argparse.ArgumentParser(description="Dependency age helpers for GitHub workflows.")
3336
subparsers = parser.add_subparsers(dest="command", required=True)
@@ -50,6 +53,15 @@ def parse_args() -> argparse.Namespace:
5053
help="Case-insensitive regex fragment used to exclude prerelease versions.",
5154
)
5255

56+
validate = subparsers.add_parser("validate-lockfiles", help="Validate age of new coordinates in Gradle lockfiles.")
57+
validate.add_argument("--baseline-dir", required=True)
58+
validate.add_argument("--current-dir", default=".")
59+
validate.add_argument("--metadata-file", help="JSON file mapping group:artifact:version to a timestamp override.")
60+
validate.add_argument("--repo-url", action="append", default=[])
61+
validate.add_argument("--min-age-hours", type=int, default=default_min_age_hours())
62+
validate.add_argument("--now")
63+
validate.add_argument("--github-output", default=None)
64+
5365
return parser.parse_args()
5466

5567

@@ -98,7 +110,7 @@ def parse_datetime(value: Any) -> datetime:
98110
except ValueError:
99111
pass
100112

101-
# ISO 8601: normalise Z and +HHMM +HH:MM for fromisoformat
113+
# ISO 8601: normalise Z and +HHMM -> +HH:MM for fromisoformat
102114
text = re.sub(r"([+-])(\d{2})(\d{2})$", r"\1\2:\3", text.replace("Z", "+00:00"))
103115
return datetime.fromisoformat(text).astimezone(timezone.utc)
104116

@@ -120,8 +132,12 @@ def emit_outputs(outputs: dict[str, Any], github_output: str | None) -> None:
120132
print(line)
121133
if github_output:
122134
with open(github_output, "a", encoding="utf-8") as handle:
123-
for line in lines:
124-
handle.write(f"{line}\n")
135+
for key, value in outputs.items():
136+
text = "" if value is None else str(value)
137+
if "\n" in text:
138+
handle.write(f"{key}<<__EOF__\n{text}\n__EOF__\n")
139+
else:
140+
handle.write(f"{key}={text}\n")
125141

126142

127143
# load JSON from file or URL
@@ -156,7 +172,6 @@ def select_gradle_release(args: argparse.Namespace) -> int:
156172

157173
return emit_selection_result(
158174
label="Gradle",
159-
cutoff=cutoff,
160175
github_output=args.github_output,
161176
candidates=candidates,
162177
not_found_reason=(
@@ -189,7 +204,6 @@ def select_maven_release(args: argparse.Namespace) -> int:
189204

190205
return emit_selection_result(
191206
label=f"{args.group_id}:{args.artifact_id}",
192-
cutoff=cutoff,
193207
github_output=args.github_output,
194208
candidates=candidates,
195209
not_found_reason=(
@@ -246,7 +260,7 @@ def load_maven_documents(
246260
return docs
247261

248262

249-
# parse a version string into a tuple of ints for numeric comparison (e.g. "3.9.11" → (3, 9, 11))
263+
# parse a version string into a sortable tuple for comparison; numeric segments sort before non-numeric
250264
def _version_sort_key(version: str) -> tuple:
251265
segments = []
252266
for segment in re.split(r"([.\-])", version):
@@ -272,7 +286,6 @@ def _version_sort_key(version: str) -> tuple:
272286
def emit_selection_result(
273287
*,
274288
label: str,
275-
cutoff: datetime,
276289
github_output: str | None,
277290
candidates: list[Candidate],
278291
not_found_reason: str,
@@ -336,12 +349,219 @@ def emit_selection_result(
336349
return 0
337350

338351

352+
# check that every new coordinate in the Gradle lockfiles is at least min_age_hours old
353+
def validate_lockfiles(args: argparse.Namespace) -> int:
354+
cutoff = now_utc(args.now) - timedelta(hours=args.min_age_hours)
355+
baseline_dir = Path(args.baseline_dir)
356+
current_dir = Path(args.current_dir)
357+
metadata = load_metadata_overrides(args.metadata_file)
358+
repo_urls = args.repo_url if args.repo_url else [MAVEN_REPO_URL]
359+
360+
# Guard against a silent snapshot failure: if baseline is empty but current has lockfiles,
361+
# every coordinate would appear "new" and the age check would be meaningless
362+
baseline_has_lockfiles = baseline_dir.exists() and any(baseline_dir.rglob("gradle.lockfile"))
363+
current_has_lockfiles = any(current_dir.rglob("gradle.lockfile"))
364+
if not baseline_has_lockfiles and current_has_lockfiles:
365+
print("::error::Baseline has no lockfiles but current directory does — the snapshot step may have failed.")
366+
emit_outputs({"cutoff_at": format_datetime(cutoff), "reverted_files": 0}, args.github_output)
367+
return 1
368+
369+
changed = changed_lockfile_coordinates(baseline_dir=baseline_dir, current_dir=current_dir)
370+
if not changed:
371+
print("No dependency version changes detected across Gradle lockfiles.")
372+
emit_outputs({"cutoff_at": format_datetime(cutoff), "reverted_files": 0}, args.github_output)
373+
return 0
374+
375+
changed_by_file: dict[str, list[str]] = {}
376+
for relative_path, gav in changed:
377+
changed_by_file.setdefault(relative_path, []).append(gav)
378+
379+
timestamp_cache: dict[str, tuple[datetime | None, str | None]] = {}
380+
too_new = "too_new"
381+
unverified = "unverified"
382+
violations_by_file: dict[str, list[tuple[str, str]]] = {}
383+
for relative_path, gavs in sorted(changed_by_file.items()):
384+
for gav in gavs:
385+
if gav not in timestamp_cache:
386+
timestamp_cache[gav] = resolve_gav_timestamp(gav=gav, metadata=metadata, repo_urls=repo_urls)
387+
published_at, reason = timestamp_cache[gav]
388+
if published_at is None:
389+
violations_by_file.setdefault(relative_path, []).append((gav, unverified))
390+
elif published_at > cutoff:
391+
violations_by_file.setdefault(relative_path, []).append((gav, too_new))
392+
else:
393+
print(f"Verified {gav} (published {format_datetime(published_at)}, cutoff {format_datetime(cutoff)})")
394+
395+
if violations_by_file:
396+
revert_lockfiles_to_baseline(violations_by_file=violations_by_file, baseline_dir=baseline_dir, current_dir=current_dir)
397+
for relative_path, entries in sorted(violations_by_file.items()):
398+
for gav, kind in entries:
399+
print(f"::warning file={relative_path}::{gav}: {'Cannot verify age' if kind == unverified else 'Too new'}. Reverted lockfile to baseline.")
400+
401+
reverted_files = len(violations_by_file)
402+
summary = build_validation_summary(violations_by_file=violations_by_file, min_age_hours=args.min_age_hours)
403+
emit_outputs({"cutoff_at": format_datetime(cutoff), "reverted_files": reverted_files, "summary": summary}, args.github_output)
404+
print(f"Validated {len(changed)} changed coordinate(s) across {len(changed_by_file)} lockfile(s). {reverted_files} lockfile(s) reverted.")
405+
return 0
406+
407+
408+
# build summary of reverted dependencies for PR descriptions
409+
def build_validation_summary(*, violations_by_file: dict[str, list[tuple[str, str]]], min_age_hours: int) -> str:
410+
if not violations_by_file:
411+
return ""
412+
summary_messages = {
413+
"too_new": f"Did not meet {min_age_hours}h dependency age requirement",
414+
"unverified": "Cannot verify age in Maven Central",
415+
}
416+
lines = [
417+
f"## Dependency age policy",
418+
f"",
419+
f"The following dependencies were reverted:",
420+
f"",
421+
]
422+
# deduplicate
423+
seen: set[str] = set()
424+
for entries in violations_by_file.values():
425+
for gav, kind in entries:
426+
if gav not in seen:
427+
seen.add(gav)
428+
lines.append(f"- `{gav}` — {summary_messages[kind]}")
429+
return "\n".join(lines)
430+
431+
432+
# restore each violating lockfile to its baseline copy to keep the file consistent
433+
def revert_lockfiles_to_baseline(
434+
*,
435+
violations_by_file: dict[str, list[tuple[str, str]]],
436+
baseline_dir: Path,
437+
current_dir: Path,
438+
) -> None:
439+
for relative_path in sorted(violations_by_file):
440+
current_path = current_dir / relative_path
441+
baseline_path = baseline_dir / relative_path
442+
if baseline_path.exists():
443+
current_path.write_text(baseline_path.read_text(encoding="utf-8"), encoding="utf-8")
444+
print(f"Reverted {relative_path} to baseline.")
445+
else:
446+
current_path.unlink(missing_ok=True)
447+
print(f"Removed new lockfile {relative_path} (no baseline copy to restore).")
448+
449+
450+
# look up the publish timestamp for a group:artifact:version coordinate
451+
# uses a HEAD request against the POM file to read the Last-Modified header
452+
# tries each repo URL in order, falling back to the next on 404
453+
# returns (datetime, None) on success; (None, reason) when the timestamp cannot be determined
454+
def resolve_gav_timestamp(
455+
*,
456+
gav: str,
457+
metadata: dict[str, Any],
458+
repo_urls: list[str],
459+
) -> tuple[datetime | None, str | None]:
460+
if gav in metadata:
461+
return parse_metadata_override(gav, metadata[gav])
462+
463+
group_id, artifact_id, version = gav.split(":", 2)
464+
group_path = group_id.replace(".", "/")
465+
pom_path = f"{group_path}/{artifact_id}/{version}/{artifact_id}-{version}.pom"
466+
467+
for repo_url in repo_urls:
468+
result = _head_pom_timestamp(f"{repo_url}/{pom_path}")
469+
if result is not None:
470+
return result, None
471+
return None, f"{gav} was not found in any configured repository."
472+
473+
474+
# issue a HEAD request for a POM URL and return the parsed Last-Modified timestamp, or None on 404
475+
# retries once on transient errors; raises on persistent non-404 failures
476+
def _head_pom_timestamp(pom_url: str) -> datetime | None:
477+
for attempt in range(2):
478+
try:
479+
request = urllib.request.Request(pom_url, method="HEAD")
480+
with urllib.request.urlopen(request, timeout=30) as response:
481+
last_modified = response.headers.get("Last-Modified")
482+
if not last_modified:
483+
return None
484+
return parsedate_to_datetime(last_modified).astimezone(timezone.utc)
485+
except urllib.error.HTTPError as exc:
486+
if exc.code in (404, 403):
487+
return None
488+
if attempt == 1:
489+
return None
490+
except (urllib.error.URLError, TimeoutError, OSError):
491+
if attempt == 1:
492+
return None
493+
return None
494+
495+
496+
# load optional metadata overrides from a JSON file (group:artifact:version -> timestamp)
497+
def load_metadata_overrides(path: str | None) -> dict[str, Any]:
498+
if not path:
499+
return {}
500+
return load_json(path, None)
501+
502+
503+
# parse a single metadata override value: a timestamp string/number, or a dict with a timestamp key
504+
def parse_metadata_override(gav: str, override: Any) -> tuple[datetime | None, str | None]:
505+
if isinstance(override, dict):
506+
for key in ("timestamp", "published_at", "timestamp_ms"):
507+
if key in override:
508+
try:
509+
return parse_datetime(override[key]), None
510+
except (ValueError, TypeError) as exc:
511+
return None, f"Metadata override for {gav} has an invalid timestamp: {exc}"
512+
return None, f"Metadata override for {gav} is missing a timestamp key (expected: timestamp, published_at, or timestamp_ms)."
513+
if isinstance(override, (int, float, str)):
514+
try:
515+
return parse_datetime(override), None
516+
except (ValueError, TypeError) as exc:
517+
return None, f"Metadata override for {gav} has an invalid timestamp: {exc}"
518+
return None, f"Unsupported metadata override format for {gav}."
519+
520+
521+
# diff baseline and current lockfile directories; return (relative_path, gav) for each added or changed coordinate
522+
def changed_lockfile_coordinates(*, baseline_dir: Path, current_dir: Path) -> list[tuple[str, str]]:
523+
changed: list[tuple[str, str]] = []
524+
baseline_lockfiles = collect_lockfiles(baseline_dir)
525+
current_lockfiles = collect_lockfiles(current_dir)
526+
for relative_path in sorted(set(baseline_lockfiles) | set(current_lockfiles)):
527+
before = baseline_lockfiles.get(relative_path, set())
528+
after = current_lockfiles.get(relative_path, set())
529+
for gav in sorted(after - before):
530+
changed.append((relative_path, gav))
531+
return changed
532+
533+
534+
# recursively find all gradle.lockfile paths under root and parse them into sets of coordinates
535+
def collect_lockfiles(root: Path) -> dict[str, set[str]]:
536+
if not root.exists():
537+
return {}
538+
return {
539+
str(path.relative_to(root)): parse_lockfile(path)
540+
for path in root.rglob("gradle.lockfile")
541+
}
542+
543+
544+
# parse a lockfile into a set of group:artifact:version coordinates (skipping comments and empty lines)
545+
def parse_lockfile(path: Path) -> set[str]:
546+
coordinates: set[str] = set()
547+
for line in path.read_text(encoding="utf-8").splitlines():
548+
line = line.strip()
549+
if not line or line.startswith("#"):
550+
continue
551+
coordinate = line.split("=", 1)[0]
552+
if coordinate.count(":") == 2:
553+
coordinates.add(coordinate)
554+
return coordinates
555+
556+
339557
def main() -> int:
340558
args = parse_args()
341559
if args.command == "select-gradle":
342560
return select_gradle_release(args)
343561
if args.command == "select-maven":
344562
return select_maven_release(args)
563+
if args.command == "validate-lockfiles":
564+
return validate_lockfiles(args)
345565
raise ValueError(f"Unsupported command: {args.command}")
346566

347567

0 commit comments

Comments
 (0)