Skip to content

Commit 58ab4b5

Browse files
Issue #26293: Fixed writing ZIP files that starts not from the start of the
file. Offsets in ZIP file now are relative to the start of the archive in conforming to the specification.
1 parent 08995a3 commit 58ab4b5

File tree

3 files changed

+62
-13
lines changed

3 files changed

+62
-13
lines changed

Lib/test/test_zipfile.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,49 @@ def test_append_to_non_zip_file(self):
344344
f.seek(len(data))
345345
with zipfile.ZipFile(f, "r") as zipfp:
346346
self.assertEqual(zipfp.namelist(), [TESTFN])
347+
self.assertEqual(zipfp.read(TESTFN), self.data)
348+
with open(TESTFN2, 'rb') as f:
349+
self.assertEqual(f.read(len(data)), data)
350+
zipfiledata = f.read()
351+
with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
352+
self.assertEqual(zipfp.namelist(), [TESTFN])
353+
self.assertEqual(zipfp.read(TESTFN), self.data)
354+
355+
def test_read_concatenated_zip_file(self):
356+
with io.BytesIO() as bio:
357+
with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
358+
zipfp.write(TESTFN, TESTFN)
359+
zipfiledata = bio.getvalue()
360+
data = b'I am not a ZipFile!'*10
361+
with open(TESTFN2, 'wb') as f:
362+
f.write(data)
363+
f.write(zipfiledata)
364+
365+
with zipfile.ZipFile(TESTFN2) as zipfp:
366+
self.assertEqual(zipfp.namelist(), [TESTFN])
367+
self.assertEqual(zipfp.read(TESTFN), self.data)
368+
369+
def test_append_to_concatenated_zip_file(self):
370+
with io.BytesIO() as bio:
371+
with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
372+
zipfp.write(TESTFN, TESTFN)
373+
zipfiledata = bio.getvalue()
374+
data = b'I am not a ZipFile!'*1000000
375+
with open(TESTFN2, 'wb') as f:
376+
f.write(data)
377+
f.write(zipfiledata)
378+
379+
with zipfile.ZipFile(TESTFN2, 'a') as zipfp:
380+
self.assertEqual(zipfp.namelist(), [TESTFN])
381+
zipfp.writestr('strfile', self.data)
382+
383+
with open(TESTFN2, 'rb') as f:
384+
self.assertEqual(f.read(len(data)), data)
385+
zipfiledata = f.read()
386+
with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
387+
self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile'])
388+
self.assertEqual(zipfp.read(TESTFN), self.data)
389+
self.assertEqual(zipfp.read('strfile'), self.data)
347390

348391
def test_ignores_newline_at_end(self):
349392
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:

Lib/zipfile.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
772772
# set the modified flag so central directory gets written
773773
# even if no files are added to the archive
774774
self._didModify = True
775+
self._start_disk = self.fp.tell()
775776
elif key == 'a':
776777
try:
777778
# See if file is a zip file
@@ -785,6 +786,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
785786
# set the modified flag so central directory gets written
786787
# even if no files are added to the archive
787788
self._didModify = True
789+
self._start_disk = self.fp.tell()
788790
else:
789791
raise RuntimeError('Mode must be "r", "w" or "a"')
790792
except:
@@ -815,17 +817,18 @@ def _RealGetContents(self):
815817
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
816818
self._comment = endrec[_ECD_COMMENT] # archive comment
817819

818-
# "concat" is zero, unless zip was concatenated to another file
819-
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
820+
# self._start_disk: Position of the start of ZIP archive
821+
# It is zero, unless ZIP was concatenated to another file
822+
self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
820823
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
821824
# If Zip64 extension structures are present, account for them
822-
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
825+
self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
823826

824827
if self.debug > 2:
825-
inferred = concat + offset_cd
826-
print "given, inferred, offset", offset_cd, inferred, concat
828+
inferred = self._start_disk + offset_cd
829+
print "given, inferred, offset", offset_cd, inferred, self._start_disk
827830
# self.start_dir: Position of start of central directory
828-
self.start_dir = offset_cd + concat
831+
self.start_dir = offset_cd + self._start_disk
829832
fp.seek(self.start_dir, 0)
830833
data = fp.read(size_cd)
831834
fp = cStringIO.StringIO(data)
@@ -855,7 +858,7 @@ def _RealGetContents(self):
855858
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
856859

857860
x._decodeExtra()
858-
x.header_offset = x.header_offset + concat
861+
x.header_offset = x.header_offset + self._start_disk
859862
x.filename = x._decodeFilename()
860863
self.filelist.append(x)
861864
self.NameToInfo[x.filename] = x
@@ -1198,7 +1201,7 @@ def write(self, filename, arcname=None, compress_type=None):
11981201
raise RuntimeError('Compressed size larger than uncompressed size')
11991202
# Seek backwards and write file header (which will now include
12001203
# correct CRC and file sizes)
1201-
position = self.fp.tell() # Preserve current position in file
1204+
position = self.fp.tell() # Preserve current position in file
12021205
self.fp.seek(zinfo.header_offset, 0)
12031206
self.fp.write(zinfo.FileHeader(zip64))
12041207
self.fp.seek(position, 0)
@@ -1284,11 +1287,10 @@ def close(self):
12841287
file_size = zinfo.file_size
12851288
compress_size = zinfo.compress_size
12861289

1287-
if zinfo.header_offset > ZIP64_LIMIT:
1288-
extra.append(zinfo.header_offset)
1290+
header_offset = zinfo.header_offset - self._start_disk
1291+
if header_offset > ZIP64_LIMIT:
1292+
extra.append(header_offset)
12891293
header_offset = 0xffffffffL
1290-
else:
1291-
header_offset = zinfo.header_offset
12921294

12931295
extra_data = zinfo.extra
12941296
if extra:
@@ -1332,7 +1334,7 @@ def close(self):
13321334
# Write end-of-zip-archive record
13331335
centDirCount = len(self.filelist)
13341336
centDirSize = pos2 - pos1
1335-
centDirOffset = pos1
1337+
centDirOffset = pos1 - self._start_disk
13361338
requires_zip64 = None
13371339
if centDirCount > ZIP_FILECOUNT_LIMIT:
13381340
requires_zip64 = "Files count"

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ Core and Builtins
4949
Library
5050
-------
5151

52+
- Issue #26293: Fixed writing ZIP files that starts not from the start of the
53+
file. Offsets in ZIP file now are relative to the start of the archive in
54+
conforming to the specification.
55+
5256
- Fix possible integer overflows and crashes in the mmap module with unusual
5357
usage patterns.
5458

0 commit comments

Comments
 (0)