Skip to content

Commit ab5ef98

Browse files
[3.13] gh-146581: Fix vulnerability in shutil.unpack_archive() for ZIP files on Windows (GH-146591) (GH-149065)
Use ZipFile.extractall() to sanitize file names and extract files. Files with invalid names (e.g. absolute paths) are now skipped. Files containing ".." in the name are no longer skipped. (cherry picked from commit fc829e8) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent a724c9f commit ab5ef98

4 files changed

Lines changed: 89 additions & 28 deletions

File tree

Lib/shutil.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,27 +1246,9 @@ def _unpack_zipfile(filename, extract_dir):
12461246
if not zipfile.is_zipfile(filename):
12471247
raise ReadError("%s is not a zip file" % filename)
12481248

1249-
zip = zipfile.ZipFile(filename)
1250-
try:
1251-
for info in zip.infolist():
1252-
name = info.filename
1253-
1254-
# don't extract absolute paths or ones with .. in them
1255-
if name.startswith('/') or '..' in name:
1256-
continue
1257-
1258-
targetpath = os.path.join(extract_dir, *name.split('/'))
1259-
if not targetpath:
1260-
continue
1261-
1262-
_ensure_directory(targetpath)
1263-
if not name.endswith('/'):
1264-
# file
1265-
with zip.open(name, 'r') as source, \
1266-
open(targetpath, 'wb') as target:
1267-
copyfileobj(source, target)
1268-
finally:
1269-
zip.close()
1249+
with zipfile.ZipFile(filename) as zip:
1250+
zip._ignore_invalid_names = True
1251+
zip.extractall(extract_dir)
12701252

12711253
def _unpack_tarfile(filename, extract_dir, *, filter=None):
12721254
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`

Lib/test/test_shutil.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,8 +2114,6 @@ def test_make_zipfile_rootdir_nodir(self):
21142114
def check_unpack_archive(self, format, **kwargs):
21152115
self.check_unpack_archive_with_converter(
21162116
format, lambda path: path, **kwargs)
2117-
self.check_unpack_archive_with_converter(
2118-
format, FakePath, **kwargs)
21192117
self.check_unpack_archive_with_converter(format, FakePath, **kwargs)
21202118

21212119
def check_unpack_archive_with_converter(self, format, converter, **kwargs):
@@ -2171,6 +2169,71 @@ def test_unpack_archive_zip(self):
21712169
with self.assertRaises(TypeError):
21722170
self.check_unpack_archive('zip', filter='data')
21732171

2172+
def test_unpack_archive_zip_badpaths(self):
2173+
srcdir = self.mkdtemp()
2174+
zipname = os.path.join(srcdir, 'test.zip')
2175+
abspath = os.path.join(srcdir, 'abspath')
2176+
with zipfile.ZipFile(zipname, 'w') as zf:
2177+
zf.writestr(abspath, 'badfile')
2178+
zf.writestr(os.sep + abspath, 'badfile')
2179+
zf.writestr('/abspath', 'badfile')
2180+
zf.writestr('C:/abspath', 'badfile')
2181+
zf.writestr('D:\\abspath', 'badfile')
2182+
zf.writestr('E:abspath', 'badfile')
2183+
zf.writestr('F:/G:/abspath', 'badfile')
2184+
zf.writestr('//server/share/abspath', 'badfile')
2185+
zf.writestr('\\\\server2\\share\\abspath', 'badfile')
2186+
zf.writestr('../relpath', 'badfile')
2187+
zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile')
2188+
zf.writestr('good/file', 'goodfile')
2189+
zf.writestr('good..file', 'goodfile')
2190+
2191+
dstdir = os.path.join(self.mkdtemp(), 'dst')
2192+
unpack_archive(zipname, dstdir)
2193+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file')))
2194+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file')))
2195+
self.assertFalse(os.path.exists(abspath))
2196+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath')))
2197+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_')))
2198+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'server')))
2199+
if os.name != 'nt':
2200+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath')))
2201+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath')))
2202+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath')))
2203+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath')))
2204+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath')))
2205+
if os.pardir == '..':
2206+
self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath')))
2207+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath')))
2208+
else:
2209+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath')))
2210+
self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2')))
2211+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2')))
2212+
2213+
dstdir2 = os.path.join(self.mkdtemp(), 'dst')
2214+
os.mkdir(dstdir2)
2215+
with os_helper.change_cwd(dstdir2):
2216+
unpack_archive(zipname, '')
2217+
self.assertTrue(os.path.isfile(os.path.join('good', 'file')))
2218+
self.assertTrue(os.path.isfile('good..file'))
2219+
self.assertFalse(os.path.exists(abspath))
2220+
self.assertFalse(os.path.exists('abspath'))
2221+
self.assertFalse(os.path.exists('C_'))
2222+
self.assertFalse(os.path.exists('server'))
2223+
if os.name != 'nt':
2224+
self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath')))
2225+
self.assertTrue(os.path.isfile('D:\\abspath'))
2226+
self.assertTrue(os.path.isfile('E:abspath'))
2227+
self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath')))
2228+
self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath'))
2229+
if os.pardir == '..':
2230+
self.assertFalse(os.path.exists(os.path.join('..', 'relpath')))
2231+
self.assertFalse(os.path.exists('relpath'))
2232+
else:
2233+
self.assertTrue(os.path.isfile(os.path.join('..', 'relpath')))
2234+
self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2')))
2235+
self.assertFalse(os.path.exists('relpath2'))
2236+
21742237
def test_unpack_registry(self):
21752238

21762239
formats = get_unpack_formats()

Lib/zipfile/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,6 +1340,7 @@ class ZipFile:
13401340

13411341
fp = None # Set here since __del__ checks it
13421342
_windows_illegal_name_trans_table = None
1343+
_ignore_invalid_names = False
13431344

13441345
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
13451346
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
@@ -1824,21 +1825,31 @@ def _extract_member(self, member, targetpath, pwd):
18241825

18251826
# build the destination pathname, replacing
18261827
# forward slashes to platform specific separators.
1827-
arcname = member.filename.replace('/', os.path.sep)
1828-
1829-
if os.path.altsep:
1828+
arcname = member.filename
1829+
if os.path.sep != '/':
1830+
arcname = arcname.replace('/', os.path.sep)
1831+
if os.path.altsep and os.path.altsep != '/':
18301832
arcname = arcname.replace(os.path.altsep, os.path.sep)
18311833
# interpret absolute pathname as relative, remove drive letter or
18321834
# UNC path, redundant separators, "." and ".." components.
1833-
arcname = os.path.splitdrive(arcname)[1]
1835+
drive, root, arcname = os.path.splitroot(arcname)
1836+
if self._ignore_invalid_names and (drive or root):
1837+
return None
1838+
if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep):
1839+
return None
18341840
invalid_path_parts = ('', os.path.curdir, os.path.pardir)
18351841
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
18361842
if x not in invalid_path_parts)
18371843
if os.path.sep == '\\':
18381844
# filter illegal characters on Windows
1839-
arcname = self._sanitize_windows_name(arcname, os.path.sep)
1845+
arcname2 = self._sanitize_windows_name(arcname, os.path.sep)
1846+
if self._ignore_invalid_names and arcname2 != arcname:
1847+
return None
1848+
arcname = arcname2
18401849

18411850
if not arcname and not member.is_dir():
1851+
if self._ignore_invalid_names:
1852+
return None
18421853
raise ValueError("Empty filename.")
18431854

18441855
targetpath = os.path.join(targetpath, arcname)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows
2+
which allowed to write files outside of the destination tree if the patch in
3+
the archive contains a Windows drive prefix. Now such invalid paths will be
4+
skipped. Files containing ".." in the name (like "foo..bar") are no longer
5+
skipped.

0 commit comments

Comments
 (0)