55import os
66import re
77import sys
8+ import urllib .error
89import urllib .parse
910import urllib .request
11+ from email .utils import parsedate_to_datetime
1012from dataclasses import dataclass
1113from datetime import datetime , timedelta , timezone
1214from pathlib import Path
1517
1618GRADLE_VERSIONS_URL = "https://services.gradle.org/versions/all"
1719MAVEN_SEARCH_URL = "https://search.maven.org/solrsearch/select"
20+ MAVEN_REPO_URL = "https://repo1.maven.org/maven2"
1821DEFAULT_MIN_AGE_HOURS = 48
1922
2023
21-
2224@dataclass (frozen = True )
2325class Candidate :
2426 version : str
@@ -28,6 +30,7 @@ class Candidate:
2830# Entry point for GitHub Actions workflows
2931# select-gradle: get newest Gradle release that is at least MIN_DEPENDENCY_AGE_HOURS hours old
3032# select-maven: get newest Maven artifact release that is at least MIN_DEPENDENCY_AGE_HOURS hours old
33+ # validate-lockfiles: check that each new coordinate in the Gradle lockfiles is at least MIN_DEPENDENCY_AGE_HOURS hours old
3134def parse_args () -> argparse .Namespace :
3235 parser = argparse .ArgumentParser (description = "Dependency age helpers for GitHub workflows." )
3336 subparsers = parser .add_subparsers (dest = "command" , required = True )
@@ -50,6 +53,15 @@ def parse_args() -> argparse.Namespace:
5053 help = "Case-insensitive regex fragment used to exclude prerelease versions." ,
5154 )
5255
56+ validate = subparsers .add_parser ("validate-lockfiles" , help = "Validate age of new coordinates in Gradle lockfiles." )
57+ validate .add_argument ("--baseline-dir" , required = True )
58+ validate .add_argument ("--current-dir" , default = "." )
59+ validate .add_argument ("--metadata-file" , help = "JSON file mapping group:artifact:version to a timestamp override." )
60+ validate .add_argument ("--repo-url" , action = "append" , default = [])
61+ validate .add_argument ("--min-age-hours" , type = int , default = default_min_age_hours ())
62+ validate .add_argument ("--now" )
63+ validate .add_argument ("--github-output" , default = None )
64+
5365 return parser .parse_args ()
5466
5567
@@ -98,7 +110,7 @@ def parse_datetime(value: Any) -> datetime:
98110 except ValueError :
99111 pass
100112
101- # ISO 8601: normalise Z and +HHMM → +HH:MM for fromisoformat
113+ # ISO 8601: normalise Z and +HHMM -> +HH:MM for fromisoformat
102114 text = re .sub (r"([+-])(\d{2})(\d{2})$" , r"\1\2:\3" , text .replace ("Z" , "+00:00" ))
103115 return datetime .fromisoformat (text ).astimezone (timezone .utc )
104116
@@ -120,8 +132,12 @@ def emit_outputs(outputs: dict[str, Any], github_output: str | None) -> None:
120132 print (line )
121133 if github_output :
122134 with open (github_output , "a" , encoding = "utf-8" ) as handle :
123- for line in lines :
124- handle .write (f"{ line } \n " )
135+ for key , value in outputs .items ():
136+ text = "" if value is None else str (value )
137+ if "\n " in text :
138+ handle .write (f"{ key } <<__EOF__\n { text } \n __EOF__\n " )
139+ else :
140+ handle .write (f"{ key } ={ text } \n " )
125141
126142
127143# load JSON from file or URL
@@ -156,7 +172,6 @@ def select_gradle_release(args: argparse.Namespace) -> int:
156172
157173 return emit_selection_result (
158174 label = "Gradle" ,
159- cutoff = cutoff ,
160175 github_output = args .github_output ,
161176 candidates = candidates ,
162177 not_found_reason = (
@@ -189,7 +204,6 @@ def select_maven_release(args: argparse.Namespace) -> int:
189204
190205 return emit_selection_result (
191206 label = f"{ args .group_id } :{ args .artifact_id } " ,
192- cutoff = cutoff ,
193207 github_output = args .github_output ,
194208 candidates = candidates ,
195209 not_found_reason = (
@@ -246,7 +260,7 @@ def load_maven_documents(
246260 return docs
247261
248262
249- # parse a version string into a tuple of ints for numeric comparison (e.g. "3.9.11" → (3, 9, 11))
263+ # parse a version string into a sortable tuple for comparison; numeric segments sort before non-numeric
250264def _version_sort_key (version : str ) -> tuple :
251265 segments = []
252266 for segment in re .split (r"([.\-])" , version ):
@@ -272,7 +286,6 @@ def _version_sort_key(version: str) -> tuple:
272286def emit_selection_result (
273287 * ,
274288 label : str ,
275- cutoff : datetime ,
276289 github_output : str | None ,
277290 candidates : list [Candidate ],
278291 not_found_reason : str ,
@@ -336,12 +349,219 @@ def emit_selection_result(
336349 return 0
337350
338351
352+ # check that every new coordinate in the Gradle lockfiles is at least min_age_hours old
353+ def validate_lockfiles (args : argparse .Namespace ) -> int :
354+ cutoff = now_utc (args .now ) - timedelta (hours = args .min_age_hours )
355+ baseline_dir = Path (args .baseline_dir )
356+ current_dir = Path (args .current_dir )
357+ metadata = load_metadata_overrides (args .metadata_file )
358+ repo_urls = args .repo_url if args .repo_url else [MAVEN_REPO_URL ]
359+
360+ # Guard against a silent snapshot failure: if baseline is empty but current has lockfiles,
361+ # every coordinate would appear "new" and the age check would be meaningless
362+ baseline_has_lockfiles = baseline_dir .exists () and any (baseline_dir .rglob ("gradle.lockfile" ))
363+ current_has_lockfiles = any (current_dir .rglob ("gradle.lockfile" ))
364+ if not baseline_has_lockfiles and current_has_lockfiles :
365+ print ("::error::Baseline has no lockfiles but current directory does — the snapshot step may have failed." )
366+ emit_outputs ({"cutoff_at" : format_datetime (cutoff ), "reverted_files" : 0 }, args .github_output )
367+ return 1
368+
369+ changed = changed_lockfile_coordinates (baseline_dir = baseline_dir , current_dir = current_dir )
370+ if not changed :
371+ print ("No dependency version changes detected across Gradle lockfiles." )
372+ emit_outputs ({"cutoff_at" : format_datetime (cutoff ), "reverted_files" : 0 }, args .github_output )
373+ return 0
374+
375+ changed_by_file : dict [str , list [str ]] = {}
376+ for relative_path , gav in changed :
377+ changed_by_file .setdefault (relative_path , []).append (gav )
378+
379+ timestamp_cache : dict [str , tuple [datetime | None , str | None ]] = {}
380+ too_new = "too_new"
381+ unverified = "unverified"
382+ violations_by_file : dict [str , list [tuple [str , str ]]] = {}
383+ for relative_path , gavs in sorted (changed_by_file .items ()):
384+ for gav in gavs :
385+ if gav not in timestamp_cache :
386+ timestamp_cache [gav ] = resolve_gav_timestamp (gav = gav , metadata = metadata , repo_urls = repo_urls )
387+ published_at , reason = timestamp_cache [gav ]
388+ if published_at is None :
389+ violations_by_file .setdefault (relative_path , []).append ((gav , unverified ))
390+ elif published_at > cutoff :
391+ violations_by_file .setdefault (relative_path , []).append ((gav , too_new ))
392+ else :
393+ print (f"Verified { gav } (published { format_datetime (published_at )} , cutoff { format_datetime (cutoff )} )" )
394+
395+ if violations_by_file :
396+ revert_lockfiles_to_baseline (violations_by_file = violations_by_file , baseline_dir = baseline_dir , current_dir = current_dir )
397+ for relative_path , entries in sorted (violations_by_file .items ()):
398+ for gav , kind in entries :
399+ print (f"::warning file={ relative_path } ::{ gav } : { 'Cannot verify age' if kind == unverified else 'Too new' } . Reverted lockfile to baseline." )
400+
401+ reverted_files = len (violations_by_file )
402+ summary = build_validation_summary (violations_by_file = violations_by_file , min_age_hours = args .min_age_hours )
403+ emit_outputs ({"cutoff_at" : format_datetime (cutoff ), "reverted_files" : reverted_files , "summary" : summary }, args .github_output )
404+ print (f"Validated { len (changed )} changed coordinate(s) across { len (changed_by_file )} lockfile(s). { reverted_files } lockfile(s) reverted." )
405+ return 0
406+
407+
408+ # build summary of reverted dependencies for PR descriptions
409+ def build_validation_summary (* , violations_by_file : dict [str , list [tuple [str , str ]]], min_age_hours : int ) -> str :
410+ if not violations_by_file :
411+ return ""
412+ summary_messages = {
413+ "too_new" : f"Did not meet { min_age_hours } h dependency age requirement" ,
414+ "unverified" : "Cannot verify age in Maven Central" ,
415+ }
416+ lines = [
417+ f"## Dependency age policy" ,
418+ f"" ,
419+ f"The following dependencies were reverted:" ,
420+ f"" ,
421+ ]
422+ # deduplicate
423+ seen : set [str ] = set ()
424+ for entries in violations_by_file .values ():
425+ for gav , kind in entries :
426+ if gav not in seen :
427+ seen .add (gav )
428+ lines .append (f"- `{ gav } ` — { summary_messages [kind ]} " )
429+ return "\n " .join (lines )
430+
431+
432+ # restore each violating lockfile to its baseline copy to keep the file consistent
433+ def revert_lockfiles_to_baseline (
434+ * ,
435+ violations_by_file : dict [str , list [tuple [str , str ]]],
436+ baseline_dir : Path ,
437+ current_dir : Path ,
438+ ) -> None :
439+ for relative_path in sorted (violations_by_file ):
440+ current_path = current_dir / relative_path
441+ baseline_path = baseline_dir / relative_path
442+ if baseline_path .exists ():
443+ current_path .write_text (baseline_path .read_text (encoding = "utf-8" ), encoding = "utf-8" )
444+ print (f"Reverted { relative_path } to baseline." )
445+ else :
446+ current_path .unlink (missing_ok = True )
447+ print (f"Removed new lockfile { relative_path } (no baseline copy to restore)." )
448+
449+
450+ # look up the publish timestamp for a group:artifact:version coordinate
451+ # uses a HEAD request against the POM file to read the Last-Modified header
452+ # tries each repo URL in order, falling back to the next on 404
453+ # returns (datetime, None) on success; (None, reason) when the timestamp cannot be determined
454+ def resolve_gav_timestamp (
455+ * ,
456+ gav : str ,
457+ metadata : dict [str , Any ],
458+ repo_urls : list [str ],
459+ ) -> tuple [datetime | None , str | None ]:
460+ if gav in metadata :
461+ return parse_metadata_override (gav , metadata [gav ])
462+
463+ group_id , artifact_id , version = gav .split (":" , 2 )
464+ group_path = group_id .replace ("." , "/" )
465+ pom_path = f"{ group_path } /{ artifact_id } /{ version } /{ artifact_id } -{ version } .pom"
466+
467+ for repo_url in repo_urls :
468+ result = _head_pom_timestamp (f"{ repo_url } /{ pom_path } " )
469+ if result is not None :
470+ return result , None
471+ return None , f"{ gav } was not found in any configured repository."
472+
473+
474+ # issue a HEAD request for a POM URL and return the parsed Last-Modified timestamp, or None on 404
475+ # retries once on transient errors; raises on persistent non-404 failures
476+ def _head_pom_timestamp (pom_url : str ) -> datetime | None :
477+ for attempt in range (2 ):
478+ try :
479+ request = urllib .request .Request (pom_url , method = "HEAD" )
480+ with urllib .request .urlopen (request , timeout = 30 ) as response :
481+ last_modified = response .headers .get ("Last-Modified" )
482+ if not last_modified :
483+ return None
484+ return parsedate_to_datetime (last_modified ).astimezone (timezone .utc )
485+ except urllib .error .HTTPError as exc :
486+ if exc .code in (404 , 403 ):
487+ return None
488+ if attempt == 1 :
489+ return None
490+ except (urllib .error .URLError , TimeoutError , OSError ):
491+ if attempt == 1 :
492+ return None
493+ return None
494+
495+
496+ # load optional metadata overrides from a JSON file (group:artifact:version -> timestamp)
497+ def load_metadata_overrides (path : str | None ) -> dict [str , Any ]:
498+ if not path :
499+ return {}
500+ return load_json (path , None )
501+
502+
503+ # parse a single metadata override value: a timestamp string/number, or a dict with a timestamp key
504+ def parse_metadata_override (gav : str , override : Any ) -> tuple [datetime | None , str | None ]:
505+ if isinstance (override , dict ):
506+ for key in ("timestamp" , "published_at" , "timestamp_ms" ):
507+ if key in override :
508+ try :
509+ return parse_datetime (override [key ]), None
510+ except (ValueError , TypeError ) as exc :
511+ return None , f"Metadata override for { gav } has an invalid timestamp: { exc } "
512+ return None , f"Metadata override for { gav } is missing a timestamp key (expected: timestamp, published_at, or timestamp_ms)."
513+ if isinstance (override , (int , float , str )):
514+ try :
515+ return parse_datetime (override ), None
516+ except (ValueError , TypeError ) as exc :
517+ return None , f"Metadata override for { gav } has an invalid timestamp: { exc } "
518+ return None , f"Unsupported metadata override format for { gav } ."
519+
520+
521+ # diff baseline and current lockfile directories; return (relative_path, gav) for each added or changed coordinate
522+ def changed_lockfile_coordinates (* , baseline_dir : Path , current_dir : Path ) -> list [tuple [str , str ]]:
523+ changed : list [tuple [str , str ]] = []
524+ baseline_lockfiles = collect_lockfiles (baseline_dir )
525+ current_lockfiles = collect_lockfiles (current_dir )
526+ for relative_path in sorted (set (baseline_lockfiles ) | set (current_lockfiles )):
527+ before = baseline_lockfiles .get (relative_path , set ())
528+ after = current_lockfiles .get (relative_path , set ())
529+ for gav in sorted (after - before ):
530+ changed .append ((relative_path , gav ))
531+ return changed
532+
533+
534+ # recursively find all gradle.lockfile paths under root and parse them into sets of coordinates
535+ def collect_lockfiles (root : Path ) -> dict [str , set [str ]]:
536+ if not root .exists ():
537+ return {}
538+ return {
539+ str (path .relative_to (root )): parse_lockfile (path )
540+ for path in root .rglob ("gradle.lockfile" )
541+ }
542+
543+
544+ # parse a lockfile into a set of group:artifact:version coordinates (skipping comments and empty lines)
545+ def parse_lockfile (path : Path ) -> set [str ]:
546+ coordinates : set [str ] = set ()
547+ for line in path .read_text (encoding = "utf-8" ).splitlines ():
548+ line = line .strip ()
549+ if not line or line .startswith ("#" ):
550+ continue
551+ coordinate = line .split ("=" , 1 )[0 ]
552+ if coordinate .count (":" ) == 2 :
553+ coordinates .add (coordinate )
554+ return coordinates
555+
556+
339557def main () -> int :
340558 args = parse_args ()
341559 if args .command == "select-gradle" :
342560 return select_gradle_release (args )
343561 if args .command == "select-maven" :
344562 return select_maven_release (args )
563+ if args .command == "validate-lockfiles" :
564+ return validate_lockfiles (args )
345565 raise ValueError (f"Unsupported command: { args .command } " )
346566
347567
0 commit comments