@@ -578,33 +578,53 @@ def _eq_mtime(t1, t2):
578578 # dostime only stores even seconds, so be lenient
579579 return abs (t1 - t2 ) <= 1
580580
581+
581582# Given the contents of a .py[co] file, unmarshal the data
582583# and return the code object. Return None if it the magic word doesn't
583- # match (we do this instead of raising an exception as we fall back
584+ # match, or if the recorded .py[co] metadata does not match the source,
585+ # (we do this instead of raising an exception as we fall back
584586# to .py if available and we don't want to mask other errors).
585- def _unmarshal_code (pathname , data , mtime ):
586- if len ( data ) < 16 :
587- raise ZipImportError ( 'bad pyc data' )
588-
589- if data [: 4 ] != _bootstrap_external . MAGIC_NUMBER :
590- _bootstrap . _verbose_message ( '{!r} has bad magic' , pathname )
591- return None # signal caller to try alternative
592-
593- flags = _unpack_uint32 ( data [ 4 : 8 ])
594- if flags != 0 :
595- # Hash-based pyc. We currently refuse to handle checked hash-based
596- # pycs. We could validate hash-based pycs against the source, but it
597- # seems likely that most people putting hash-based pycs in a zipfile
598- # will use unchecked ones.
587+ def _unmarshal_code (self , pathname , fullpath , fullname , data ):
588+ exc_details = {
589+ 'name' : fullname ,
590+ 'path' : fullpath ,
591+ }
592+
593+ try :
594+ flags = _bootstrap_external . _classify_pyc ( data , fullname , exc_details )
595+ except ImportError :
596+ return None
597+
598+ hash_based = flags & 0b1 != 0
599+ if hash_based :
600+ check_source = flags & 0b10 != 0
599601 if (_imp .check_hash_based_pycs != 'never' and
600- (flags != 0x1 or _imp .check_hash_based_pycs == 'always' )):
601- return None
602- elif mtime != 0 and not _eq_mtime (_unpack_uint32 (data [8 :12 ]), mtime ):
603- _bootstrap ._verbose_message ('{!r} has bad mtime' , pathname )
604- return None # signal caller to try alternative
602+ (check_source or _imp .check_hash_based_pycs == 'always' )):
603+ source_bytes = _get_pyc_source (self , fullpath )
604+ if source_bytes is not None :
605+ source_hash = _imp .source_hash (
606+ _bootstrap_external ._RAW_MAGIC_NUMBER ,
607+ source_bytes ,
608+ )
609+
610+ try :
611+ _boostrap_external ._validate_hash_pyc (
612+ data , source_hash , fullname , exc_details )
613+ except ImportError :
614+ return None
615+ else :
616+ source_mtime , source_size = \
617+ _get_mtime_and_size_of_source (self , fullpath )
618+
619+ if source_mtime :
620+ # We don't use _bootstrap_external._validate_timestamp_pyc
621+ # to allow for a more lenient timestamp check.
622+ if (not _eq_mtime (_unpack_uint32 (data [8 :12 ]), source_mtime ) or
623+ _unpack_uint32 (data [12 :16 ]) != source_size ):
624+ _bootstrap ._verbose_message (
625+ f'bytecode is stale for { fullname !r} ' )
626+ return None
605627
606- # XXX the pyc's size field is ignored; timestamp collisions are probably
607- # unimportant with zip files.
608628 code = marshal .loads (data [16 :])
609629 if not isinstance (code , _code_type ):
610630 raise TypeError (f'compiled module { pathname !r} is not a code object' )
@@ -639,9 +659,9 @@ def _parse_dostime(d, t):
639659 - 1 , - 1 , - 1 ))
640660
641661# Given a path to a .pyc file in the archive, return the
642- # modification time of the matching .py file, or 0 if no source
643- # is available.
644- def _get_mtime_of_source (self , path ):
662+ # modification time of the matching .py file and its size,
663+ # or (0, 0) if no source is available.
664+ def _get_mtime_and_size_of_source (self , path ):
645665 try :
646666 # strip 'c' or 'o' from *.py[co]
647667 assert path [- 1 :] in ('c' , 'o' )
@@ -651,9 +671,27 @@ def _get_mtime_of_source(self, path):
651671 # with an embedded pyc time stamp
652672 time = toc_entry [5 ]
653673 date = toc_entry [6 ]
654- return _parse_dostime (date , time )
674+ uncompressed_size = toc_entry [3 ]
675+ return _parse_dostime (date , time ), uncompressed_size
655676 except (KeyError , IndexError , TypeError ):
656- return 0
677+ return 0 , 0
678+
679+
680+ # Given a path to a .pyc file in the archive, return the
681+ # contents of the matching .py file, or None if no source
682+ # is available.
683+ def _get_pyc_source (self , path ):
684+ # strip 'c' or 'o' from *.py[co]
685+ assert path [- 1 :] in ('c' , 'o' )
686+ path = path [:- 1 ]
687+
688+ try :
689+ toc_entry = self ._files [path ]
690+ except KeyError :
691+ return None
692+ else :
693+ return _get_data (self .archive , toc_entry )
694+
657695
658696# Get the code object associated with the module specified by
659697# 'fullname'.
@@ -670,8 +708,7 @@ def _get_module_code(self, fullname):
670708 modpath = toc_entry [0 ]
671709 data = _get_data (self .archive , toc_entry )
672710 if isbytecode :
673- mtime = _get_mtime_of_source (self , fullpath )
674- code = _unmarshal_code (modpath , data , mtime )
711+ code = _unmarshal_code (self , modpath , fullpath , fullname , data )
675712 else :
676713 code = _compile_source (modpath , data )
677714 if code is None :
0 commit comments