Skip to content

Commit 57750be

Browse files
bmwiedemannned-deily
authored andcommitted
bpo-30693: zip+tarfile: sort directory listing (#2263)
tarfile and zipfile now sort directory listing to generate tar and zip archives in a more reproducible way. See also https://reproducible-builds.org/docs/stable-inputs/ on that topic.
1 parent 209108b commit 57750be

File tree

7 files changed

+39
-6
lines changed

7 files changed

+39
-6
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the
451451
(directory, fifo, symbolic link, etc.). If given, *arcname* specifies an
452452
alternative name for the file in the archive. Directories are added
453453
recursively by default. This can be avoided by setting *recursive* to
454-
:const:`False`. If *filter* is given, it
454+
:const:`False`. Recursion adds entries in sorted order.
455+
If *filter* is given, it
455456
should be a function that takes a :class:`TarInfo` object argument and
456457
returns the changed :class:`TarInfo` object. If it instead returns
457458
:const:`None` the :class:`TarInfo` object will be excluded from the
@@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the
460461
.. versionchanged:: 3.2
461462
Added the *filter* parameter.
462463

464+
.. versionchanged:: 3.7
465+
Recursion adds entries in sorted order.
466+
463467

464468
.. method:: TarFile.addfile(tarinfo, fileobj=None)
465469

Doc/library/zipfile.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
491491
:file:`\*.pyc` are added at the top level. If the directory is a
492492
package directory, then all :file:`\*.pyc` are added under the package
493493
name as a file path, and if any subdirectories are package directories,
494-
all of these are added recursively.
494+
all of these are added recursively in sorted order.
495495

496496
*basename* is intended for internal use only.
497497

@@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the
524524
.. versionchanged:: 3.6.2
525525
The *pathname* parameter accepts a :term:`path-like object`.
526526

527+
.. versionchanged:: 3.7
528+
Recursion sorts directory entries.
529+
527530

528531
.. _zipinfo-objects:
529532

Lib/tarfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1943,7 +1943,7 @@ def add(self, name, arcname=None, recursive=True, *, filter=None):
19431943
elif tarinfo.isdir():
19441944
self.addfile(tarinfo)
19451945
if recursive:
1946-
for f in os.listdir(name):
1946+
for f in sorted(os.listdir(name)):
19471947
self.add(os.path.join(name, f), os.path.join(arcname, f),
19481948
recursive, filter=filter)
19491949

Lib/test/test_tarfile.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,30 @@ def test_directory_size(self):
11291129
finally:
11301130
support.rmdir(path)
11311131

1132+
# mock the following:
1133+
# os.listdir: so we know that files are in the wrong order
1134+
@unittest.mock.patch('os.listdir')
1135+
def test_ordered_recursion(self, mock_listdir):
1136+
path = os.path.join(TEMPDIR, "directory")
1137+
os.mkdir(path)
1138+
open(os.path.join(path, "1"), "a").close()
1139+
open(os.path.join(path, "2"), "a").close()
1140+
mock_listdir.return_value = ["2", "1"]
1141+
try:
1142+
tar = tarfile.open(tmpname, self.mode)
1143+
try:
1144+
tar.add(path)
1145+
paths = []
1146+
for m in tar.getmembers():
1147+
paths.append(os.path.split(m.name)[-1])
1148+
self.assertEqual(paths, ["directory", "1", "2"]);
1149+
finally:
1150+
tar.close()
1151+
finally:
1152+
support.unlink(os.path.join(path, "1"))
1153+
support.unlink(os.path.join(path, "2"))
1154+
support.rmdir(path)
1155+
11321156
def test_gettarinfo_pathlike_name(self):
11331157
with tarfile.open(tmpname, self.mode) as tar:
11341158
path = pathlib.Path(TEMPDIR) / "file"

Lib/zipfile.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,7 +1940,7 @@ def writepy(self, pathname, basename="", filterfunc=None):
19401940
if self.debug:
19411941
print("Adding", arcname)
19421942
self.write(fname, arcname)
1943-
dirlist = os.listdir(pathname)
1943+
dirlist = sorted(os.listdir(pathname))
19441944
dirlist.remove("__init__.py")
19451945
# Add all *.py files and package subdirectories
19461946
for filename in dirlist:
@@ -1965,7 +1965,7 @@ def writepy(self, pathname, basename="", filterfunc=None):
19651965
# This is NOT a package directory, add its files at top level
19661966
if self.debug:
19671967
print("Adding files from directory", pathname)
1968-
for filename in os.listdir(pathname):
1968+
for filename in sorted(os.listdir(pathname)):
19691969
path = os.path.join(pathname, filename)
19701970
root, ext = os.path.splitext(filename)
19711971
if ext == ".py":
@@ -2116,7 +2116,7 @@ def addToZip(zf, path, zippath):
21162116
elif os.path.isdir(path):
21172117
if zippath:
21182118
zf.write(path, zippath)
2119-
for nm in os.listdir(path):
2119+
for nm in sorted(os.listdir(path)):
21202120
addToZip(zf,
21212121
os.path.join(path, nm), os.path.join(zippath, nm))
21222122
# else: ignore
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The ZipFile class now recurses directories in a reproducible way.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The TarFile class now recurses directories in a reproducible way.

0 commit comments

Comments
 (0)