Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/14483.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed ``bin_xml_escape`` in junitxml incorrectly escaping supplementary plane characters (U+10000 and above, including emoji) due to using ``\u`` instead of ``\U`` for the supplementary plane range in the ``illegal_xml_re`` regex.
4 changes: 1 addition & 3 deletions src/_pytest/junitxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def repl(matchobj: re.Match[str]) -> str:
# The spec range of valid chars is:
# Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
# For an unknown(?) reason, we disallow #x7F (DEL) as well.
illegal_xml_re = (
"[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\u10000-\u10ffff]"
)
illegal_xml_re = "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]"
return re.sub(illegal_xml_re, repl, str(arg))


Expand Down
10 changes: 8 additions & 2 deletions testing/test_junitxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,7 @@ def test_invalid_xml_escape() -> None:
0xFFFE,
0x0FFFF,
) # , 0x110000)
valid = (0x9, 0xA, 0x20)
# 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
valid = (0x9, 0xA, 0x20, 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)

for i in invalid:
got = bin_xml_escape(chr(i))
Expand All @@ -1136,6 +1135,13 @@ def test_invalid_xml_escape() -> None:
assert chr(i) == bin_xml_escape(chr(i))


def test_bin_xml_escape_supplementary_plane() -> None:
assert bin_xml_escape(chr(0x1F600)) == chr(0x1F600)
assert bin_xml_escape("test_😀") == "test_😀"
assert bin_xml_escape("test_𠀀") == "test_𠀀"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kinda whitespace is this

We may need to rename the function as we ought to avoid leaving likeness as is

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I'll fold the supplementary plane checks into the existing test and drop the separate function. Pushing a fix shortly.

assert bin_xml_escape("test_𝄞") == "test_𝄞"


def test_logxml_path_expansion(tmp_path: Path, monkeypatch: MonkeyPatch) -> None:
home_tilde = Path(os.path.expanduser("~")).joinpath("test.xml")
xml_tilde = LogXML(Path("~", "test.xml"), None)
Expand Down
Loading