Skip to content

Commit b01e594

Browse files
[3.14] gh-146581: Fix vulnerability in shutil.unpack_archive() for ZIP files on Windows (GH-146591) (GH-149064)
Use ZipFile.extractall() to sanitize file names and extract files. Files with invalid names (e.g. absolute paths) are now skipped. Files containing ".." in the name are no longer skipped. (cherry picked from commit fc829e8) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 0cd8123 commit b01e594

4 files changed

Lines changed: 89 additions & 28 deletions

File tree

Lib/shutil.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,27 +1314,9 @@ def _unpack_zipfile(filename, extract_dir):
13141314
if not zipfile.is_zipfile(filename):
13151315
raise ReadError("%s is not a zip file" % filename)
13161316

1317-
zip = zipfile.ZipFile(filename)
1318-
try:
1319-
for info in zip.infolist():
1320-
name = info.filename
1321-
1322-
# don't extract absolute paths or ones with .. in them
1323-
if name.startswith('/') or '..' in name:
1324-
continue
1325-
1326-
targetpath = os.path.join(extract_dir, *name.split('/'))
1327-
if not targetpath:
1328-
continue
1329-
1330-
_ensure_directory(targetpath)
1331-
if not name.endswith('/'):
1332-
# file
1333-
with zip.open(name, 'r') as source, \
1334-
open(targetpath, 'wb') as target:
1335-
copyfileobj(source, target)
1336-
finally:
1337-
zip.close()
1317+
with zipfile.ZipFile(filename) as zip:
1318+
zip._ignore_invalid_names = True
1319+
zip.extractall(extract_dir)
13381320

13391321
def _unpack_tarfile(filename, extract_dir, *, filter=None):
13401322
"""Unpack tar/tar.gz/tar.bz2/tar.xz/tar.zst `filename` to `extract_dir`

Lib/test/test_shutil.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,8 +2110,6 @@ def test_make_zipfile_rootdir_nodir(self):
21102110
def check_unpack_archive(self, format, **kwargs):
21112111
self.check_unpack_archive_with_converter(
21122112
format, lambda path: path, **kwargs)
2113-
self.check_unpack_archive_with_converter(
2114-
format, FakePath, **kwargs)
21152113
self.check_unpack_archive_with_converter(format, FakePath, **kwargs)
21162114

21172115
def check_unpack_archive_with_converter(self, format, converter, **kwargs):
@@ -2168,6 +2166,71 @@ def test_unpack_archive_zip(self):
21682166
with self.assertRaises(TypeError):
21692167
self.check_unpack_archive('zip', filter='data')
21702168

2169+
def test_unpack_archive_zip_badpaths(self):
2170+
srcdir = self.mkdtemp()
2171+
zipname = os.path.join(srcdir, 'test.zip')
2172+
abspath = os.path.join(srcdir, 'abspath')
2173+
with zipfile.ZipFile(zipname, 'w') as zf:
2174+
zf.writestr(abspath, 'badfile')
2175+
zf.writestr(os.sep + abspath, 'badfile')
2176+
zf.writestr('/abspath', 'badfile')
2177+
zf.writestr('C:/abspath', 'badfile')
2178+
zf.writestr('D:\\abspath', 'badfile')
2179+
zf.writestr('E:abspath', 'badfile')
2180+
zf.writestr('F:/G:/abspath', 'badfile')
2181+
zf.writestr('//server/share/abspath', 'badfile')
2182+
zf.writestr('\\\\server2\\share\\abspath', 'badfile')
2183+
zf.writestr('../relpath', 'badfile')
2184+
zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile')
2185+
zf.writestr('good/file', 'goodfile')
2186+
zf.writestr('good..file', 'goodfile')
2187+
2188+
dstdir = os.path.join(self.mkdtemp(), 'dst')
2189+
unpack_archive(zipname, dstdir)
2190+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file')))
2191+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file')))
2192+
self.assertFalse(os.path.exists(abspath))
2193+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath')))
2194+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_')))
2195+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'server')))
2196+
if os.name != 'nt':
2197+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath')))
2198+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath')))
2199+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath')))
2200+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath')))
2201+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath')))
2202+
if os.pardir == '..':
2203+
self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath')))
2204+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath')))
2205+
else:
2206+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath')))
2207+
self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2')))
2208+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2')))
2209+
2210+
dstdir2 = os.path.join(self.mkdtemp(), 'dst')
2211+
os.mkdir(dstdir2)
2212+
with os_helper.change_cwd(dstdir2):
2213+
unpack_archive(zipname, '')
2214+
self.assertTrue(os.path.isfile(os.path.join('good', 'file')))
2215+
self.assertTrue(os.path.isfile('good..file'))
2216+
self.assertFalse(os.path.exists(abspath))
2217+
self.assertFalse(os.path.exists('abspath'))
2218+
self.assertFalse(os.path.exists('C_'))
2219+
self.assertFalse(os.path.exists('server'))
2220+
if os.name != 'nt':
2221+
self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath')))
2222+
self.assertTrue(os.path.isfile('D:\\abspath'))
2223+
self.assertTrue(os.path.isfile('E:abspath'))
2224+
self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath')))
2225+
self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath'))
2226+
if os.pardir == '..':
2227+
self.assertFalse(os.path.exists(os.path.join('..', 'relpath')))
2228+
self.assertFalse(os.path.exists('relpath'))
2229+
else:
2230+
self.assertTrue(os.path.isfile(os.path.join('..', 'relpath')))
2231+
self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2')))
2232+
self.assertFalse(os.path.exists('relpath2'))
2233+
21712234
def test_unpack_registry(self):
21722235

21732236
formats = get_unpack_formats()

Lib/zipfile/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,7 @@ class ZipFile:
14101410

14111411
fp = None # Set here since __del__ checks it
14121412
_windows_illegal_name_trans_table = None
1413+
_ignore_invalid_names = False
14131414

14141415
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
14151416
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
@@ -1890,21 +1891,31 @@ def _extract_member(self, member, targetpath, pwd):
18901891

18911892
# build the destination pathname, replacing
18921893
# forward slashes to platform specific separators.
1893-
arcname = member.filename.replace('/', os.path.sep)
1894-
1895-
if os.path.altsep:
1894+
arcname = member.filename
1895+
if os.path.sep != '/':
1896+
arcname = arcname.replace('/', os.path.sep)
1897+
if os.path.altsep and os.path.altsep != '/':
18961898
arcname = arcname.replace(os.path.altsep, os.path.sep)
18971899
# interpret absolute pathname as relative, remove drive letter or
18981900
# UNC path, redundant separators, "." and ".." components.
1899-
arcname = os.path.splitdrive(arcname)[1]
1901+
drive, root, arcname = os.path.splitroot(arcname)
1902+
if self._ignore_invalid_names and (drive or root):
1903+
return None
1904+
if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep):
1905+
return None
19001906
invalid_path_parts = ('', os.path.curdir, os.path.pardir)
19011907
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
19021908
if x not in invalid_path_parts)
19031909
if os.path.sep == '\\':
19041910
# filter illegal characters on Windows
1905-
arcname = self._sanitize_windows_name(arcname, os.path.sep)
1911+
arcname2 = self._sanitize_windows_name(arcname, os.path.sep)
1912+
if self._ignore_invalid_names and arcname2 != arcname:
1913+
return None
1914+
arcname = arcname2
19061915

19071916
if not arcname and not member.is_dir():
1917+
if self._ignore_invalid_names:
1918+
return None
19081919
raise ValueError("Empty filename.")
19091920

19101921
targetpath = os.path.join(targetpath, arcname)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows
2+
which allowed to write files outside of the destination tree if the patch in
3+
the archive contains a Windows drive prefix. Now such invalid paths will be
4+
skipped. Files containing ".." in the name (like "foo..bar") are no longer
5+
skipped.

0 commit comments

Comments
 (0)