diff --git a/0001-3.12-gh-116042-Fix-location-for-SyntaxErrors-of-inva.patch b/0001-3.12-gh-116042-Fix-location-for-SyntaxErrors-of-inva.patch new file mode 100644 index 0000000000000000000000000000000000000000..49e07b4ca49d31281287745e944eeb6d0bb9aaea --- /dev/null +++ b/0001-3.12-gh-116042-Fix-location-for-SyntaxErrors-of-inva.patch @@ -0,0 +1,251 @@ +From 5e8a9eb13d5fb9258084138d59d02e786d253557 Mon Sep 17 00:00:00 2001 +From: Pablo Galindo Salgado +Date: Thu, 13 Feb 2025 01:42:24 +0000 +Subject: [PATCH] [3.12] gh-116042: Fix location for SyntaxErrors of invalid + escapes in the tokenizer (GH-116049) (#130065) + +(cherry picked from commit 56eda256336310a08d4beb75b998488cb359444b) +--- + Lib/test/test_cmd_line_script.py | 2 +- + Lib/test/test_string_literals.py | 39 +++++++++++--- + ...-02-13-00-28-43.gh-issue-116042.861juq.rst | 2 + + Parser/pegen_errors.c | 4 +- + Parser/string_parser.c | 53 ++++++++++++++++--- + 5 files changed, 82 insertions(+), 18 deletions(-) + create mode 100644 Misc/NEWS.d/next/Core and Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst + +diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py +index 1b588826010..7109e3d164e 100644 +--- a/Lib/test/test_cmd_line_script.py ++++ b/Lib/test/test_cmd_line_script.py +@@ -652,7 +652,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): + self.assertEqual( + stderr.splitlines()[-3:], + [ b' foo = """\\q"""', +- b' ^^^^^^^^', ++ b' ^^', + b'SyntaxError: invalid escape sequence \'\\q\'' + ], + ) +diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py +index 371e819..dcef448 100644 +--- a/Lib/test/test_string_literals.py ++++ b/Lib/test/test_string_literals.py +@@ -118,7 +118,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(w[0].filename, '') +- self.assertEqual(w[0].lineno, 1) ++ self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) +@@ -128,7 +128,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.filename, '') +- self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.lineno, 2) + self.assertEqual(exc.offset, 1) + + # Check that the warning is raised ony once if there are syntax errors +@@ -155,7 +155,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '') +- self.assertEqual(w[0].lineno, 1) ++ self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) +@@ -165,9 +165,32 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.filename, '') +- self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.lineno, 2) + self.assertEqual(exc.offset, 1) + ++ def test_invalid_escape_locations_with_offset(self): ++ with warnings.catch_warnings(record=True) as w: ++ warnings.simplefilter('error', category=SyntaxWarning) ++ with self.assertRaises(SyntaxError) as cm: ++ eval("\"'''''''''''''''''''''invalid\ Escape\"") ++ exc = cm.exception ++ self.assertEqual(w, []) ++ self.assertEqual(exc.msg, r"invalid escape sequence '\ '") ++ self.assertEqual(exc.filename, '') ++ self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.offset, 30) ++ ++ with warnings.catch_warnings(record=True) as w: ++ warnings.simplefilter('error', category=SyntaxWarning) ++ with self.assertRaises(SyntaxError) as cm: ++ eval("\"''Incorrect \ logic?\"") ++ exc = cm.exception ++ self.assertEqual(w, []) ++ self.assertEqual(exc.msg, r"invalid escape sequence '\ '") ++ self.assertEqual(exc.filename, '') ++ self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.offset, 14) ++ + def test_eval_str_raw(self): + self.assertEqual(eval(""" r'x' """), 'x') + self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') +@@ -207,7 +230,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(w[0].filename, '') +- self.assertEqual(w[0].lineno, 1) ++ self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) +@@ -217,7 +240,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.filename, '') +- self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.lineno, 2) + + def test_eval_bytes_invalid_octal_escape(self): + for i in range(0o400, 0o1000): +@@ -231,7 +254,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '') +- self.assertEqual(w[0].lineno, 1) ++ self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) +@@ -241,7 +264,7 @@ class TestLiterals(unittest.TestCase): + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.filename, '') +- self.assertEqual(exc.lineno, 1) ++ self.assertEqual(exc.lineno, 2) + + def test_eval_bytes_raw(self): + self.assertEqual(eval(""" br'x' """), b'x') +diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst b/Misc/NEWS.d/next/Core and Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst +new file mode 100644 +index 00000000000..098804fa92e +--- /dev/null ++++ b/Misc/NEWS.d/next/Core and Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst +@@ -0,0 +1,2 @@ ++Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by ++Pablo Galindo +diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c +index 72f13498976..d1cb91d2998 100644 +--- a/Parser/pegen_errors.c ++++ b/Parser/pegen_errors.c +@@ -350,8 +350,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, + assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); + + if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { +- Py_ssize_t size = p->tok->inp - p->tok->buf; +- error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); ++ Py_ssize_t size = p->tok->inp - p->tok->line_start; ++ error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace"); + } + else if (p->tok->fp == NULL || p->tok->fp == stdin) { + error_line = get_error_line_from_tokenizer_buffers(p, lineno); +diff --git a/Parser/string_parser.c b/Parser/string_parser.c +index 164f715e153..751b56d0ee0 100644 +--- a/Parser/string_parser.c ++++ b/Parser/string_parser.c +@@ -9,7 +9,7 @@ + //// STRING HANDLING FUNCTIONS //// + + static int +-warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) ++warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t) + { + if (p->call_invalid_rules) { + // Do not report warnings if we are in the second pass of the parser +@@ -38,8 +38,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token + else { + category = PyExc_DeprecationWarning; + } ++ ++ // Calculate the lineno and the col_offset of the invalid escape sequence ++ const char *start = buffer; ++ const char *end = first_invalid_escape; ++ int lineno = t->lineno; ++ int col_offset = t->col_offset; ++ while (start < end) { ++ if (*start == '\n') { ++ lineno++; ++ col_offset = 0; ++ } ++ else { ++ col_offset++; ++ } ++ start++; ++ } ++ ++ // Count the number of quotes in the token ++ char first_quote = 0; ++ if (lineno == t->lineno) { ++ int quote_count = 0; ++ char* tok = PyBytes_AsString(t->bytes); ++ for (int i = 0; i < PyBytes_Size(t->bytes); i++) { ++ if (tok[i] == '\'' || tok[i] == '\"') { ++ if (quote_count == 0) { ++ first_quote = tok[i]; ++ } ++ if (tok[i] == first_quote) { ++ quote_count++; ++ } ++ } else { ++ break; ++ } ++ } ++ ++ col_offset += quote_count; ++ } ++ + if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, +- t->lineno, NULL, NULL) < 0) { ++ lineno, NULL, NULL) < 0) { + if (PyErr_ExceptionMatches(category)) { + /* Replace the Syntax/DeprecationWarning exception with a SyntaxError + to get a more accurate error report */ +@@ -50,11 +88,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token + error location, if p->known_err_token is not set. */ + p->known_err_token = t; + if (octal) { +- RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", +- first_invalid_escape); ++ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, ++ "invalid octal escape sequence '\\%.3s'", first_invalid_escape); + } + else { +- RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); ++ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, ++ "invalid escape sequence '\\%c'", c); + } + } + Py_DECREF(msg); +@@ -148,7 +187,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) + // HACK: later we can simply pass the line no, since we don't preserve the tokens + // when we are decoding the string but we preserve the line numbers. + if (v != NULL && first_invalid_escape != NULL && t != NULL) { +- if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { ++ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { + /* We have not decref u before because first_invalid_escape points + inside u. */ + Py_XDECREF(u); +@@ -170,7 +209,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) + } + + if (first_invalid_escape != NULL) { +- if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { ++ if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { + Py_DECREF(result); + return NULL; + } +-- +2.39.3 + diff --git a/0001-3.12-gh-120155-Fix-Coverity-issue-in-parse_string-GH.patch b/0001-3.12-gh-120155-Fix-Coverity-issue-in-parse_string-GH.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf7e31340fa508759022af64071a712877601397 --- /dev/null +++ b/0001-3.12-gh-120155-Fix-Coverity-issue-in-parse_string-GH.patch @@ -0,0 +1,37 @@ +From 5290e405c171dbf541b1cd8549d69d53d04a6bc0 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Tue, 25 Jun 2024 19:40:08 +0200 +Subject: [PATCH] [3.12] gh-120155: Fix Coverity issue in parse_string() + (GH-120997) (#121006) + +gh-120155: Fix Coverity issue in parse_string() (GH-120997) +(cherry picked from commit 769aea332940f03c3e5b1ad9badd6635c1ac992a) + +Co-authored-by: Victor Stinner +--- + Parser/string_parser.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/Parser/string_parser.c b/Parser/string_parser.c +index 65c320c2173..164f715e153 100644 +--- a/Parser/string_parser.c ++++ b/Parser/string_parser.c +@@ -226,9 +226,14 @@ _PyPegen_parse_string(Parser *p, Token *t) + PyErr_BadInternalCall(); + return NULL; + } ++ + /* Skip the leading quote char. */ + s++; + len = strlen(s); ++ // gh-120155: 's' contains at least the trailing quote, ++ // so the code '--len' below is safe. ++ assert(len >= 1); ++ + if (len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); + return NULL; +-- +2.39.3 + diff --git a/CVE-2025-4516-3.12-gh-133767-Fix-use-after-free-in-the-unicode-esc.patch b/CVE-2025-4516-3.12-gh-133767-Fix-use-after-free-in-the-unicode-esc.patch new file mode 100644 index 0000000000000000000000000000000000000000..2808f0e1eb9a9c70125727d59d835728fd4a324b --- /dev/null +++ b/CVE-2025-4516-3.12-gh-133767-Fix-use-after-free-in-the-unicode-esc.patch @@ -0,0 +1,522 @@ +From 4398b788ffc1f954a2c552da285477d42a571292 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Mon, 26 May 2025 06:33:22 +0300 +Subject: [PATCH] [3.12] gh-133767: Fix use-after-free in the unicode-escape + decoder with an error handler (GH-129648) (GH-133944) (#134337) + +If the error handler is used, a new bytes object is created to set as +the object attribute of UnicodeDecodeError, and that bytes object then +replaces the original data. A pointer to the decoded data will became invalid +after destroying that temporary bytes object. So we need other way to return +the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). + +_PyBytes_DecodeEscape() does not have such issue, because it does not +use the error handlers registry, but it should be changed for compatibility +with _PyUnicode_DecodeUnicodeEscapeInternal(). +(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) +(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d) +--- + Include/cpython/bytesobject.h | 4 ++ + Include/cpython/unicodeobject.h | 13 ++++ + Lib/test/test_codeccallbacks.py | 39 +++++++++++- + Lib/test/test_codecs.py | 52 +++++++++++++--- + ...-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 + + Objects/bytesobject.c | 54 ++++++++++------ + Objects/unicodeobject.c | 61 +++++++++++++------ + Parser/string_parser.c | 26 +++++--- + 8 files changed, 194 insertions(+), 57 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst + +diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h +index e982031c107..eef607a5760 100644 +--- a/Include/cpython/bytesobject.h ++++ b/Include/cpython/bytesobject.h +@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex( + int use_bytearray); + + /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */ ++PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, ++ const char *, ++ int *, const char **); ++// Export for binary compatibility. + PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t, + const char *, const char **); + +diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h +index f177cd9e2af..cf389286860 100644 +--- a/Include/cpython/unicodeobject.h ++++ b/Include/cpython/unicodeobject.h +@@ -684,6 +684,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful( + ); + /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ ++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( ++ const char *string, /* Unicode-Escape encoded string */ ++ Py_ssize_t length, /* size of string */ ++ const char *errors, /* error handling */ ++ Py_ssize_t *consumed, /* bytes consumed */ ++ int *first_invalid_escape_char, /* on return, if not -1, contain the first ++ invalid escaped char (<= 0xff) or invalid ++ octal escape (> 0xff) in string. */ ++ const char **first_invalid_escape_ptr); /* on return, if not NULL, may ++ point to the first invalid escaped ++ char in string. ++ May be NULL if errors is not NULL. */ ++// Export for binary compatibility. + PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ +diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py +index 4991330489d..d85f609d806 100644 +--- a/Lib/test/test_codeccallbacks.py ++++ b/Lib/test/test_codeccallbacks.py +@@ -1,6 +1,7 @@ + import codecs + import html.entities + import itertools ++import re + import sys + import unicodedata + import unittest +@@ -1124,7 +1125,7 @@ def test_bug828737(self): + text = 'abcghi'*n + text.translate(charmap) + +- def test_mutatingdecodehandler(self): ++ def test_mutating_decode_handler(self): + baddata = [ + ("ascii", b"\xff"), + ("utf-7", b"++"), +@@ -1159,6 +1160,42 @@ def mutating(exc): + for (encoding, data) in baddata: + self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") + ++ def test_mutating_decode_handler_unicode_escape(self): ++ decode = codecs.unicode_escape_decode ++ def mutating(exc): ++ if isinstance(exc, UnicodeDecodeError): ++ r = data.get(exc.object[:exc.end]) ++ if r is not None: ++ exc.object = r[0] + exc.object[exc.end:] ++ return ('\u0404', r[1]) ++ raise AssertionError("don't know how to handle %r" % exc) ++ ++ codecs.register_error('test.mutating2', mutating) ++ data = { ++ br'\x0': (b'\\', 0), ++ br'\x3': (b'xxx\\', 3), ++ br'\x5': (b'x\\', 1), ++ } ++ def check(input, expected, msg): ++ with self.assertWarns(DeprecationWarning) as cm: ++ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input))) ++ self.assertIn(msg, str(cm.warning)) ++ ++ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'") ++ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'") ++ ++ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'") ++ ++ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x5n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'") ++ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'") ++ + # issue32583 + def test_crashing_decode_handler(self): + # better generating one more character to fill the extra space slot +diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py +index f683f069ae1..2e64a52acba 100644 +--- a/Lib/test/test_codecs.py ++++ b/Lib/test/test_codecs.py +@@ -1196,23 +1196,39 @@ def test_escape(self): + check(br"[\1010]", b"[A0]") + check(br"[\x41]", b"[A]") + check(br"[\x410]", b"[A0]") ++ ++ def test_warnings(self): ++ decode = codecs.escape_decode ++ check = coding_checker(self, decode) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtvx': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % i): + check(b"\\" + b, b"\\" + b) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % (i-32)): + check(b"\\" + b.upper(), b"\\" + b.upper()) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\8'"): + check(br"\8", b"\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", b"\\9") +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\\xfa'") as cm: + check(b"\\\xfa", b"\\\xfa") + for i in range(0o400, 0o1000): +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid octal escape sequence '\\%o'" % i): + check(rb'\%o' % i, bytes([i & 0o377])) + ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\z'"): ++ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4)) ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid octal escape sequence '\\501'"): ++ self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6)) ++ + def test_errors(self): + decode = codecs.escape_decode + self.assertRaises(ValueError, decode, br"\x") +@@ -2479,24 +2495,40 @@ def test_escape_decode(self): + check(br"[\x410]", "[A0]") + check(br"\u20ac", "\u20ac") + check(br"\U0001d120", "\U0001d120") ++ ++ def test_decode_warnings(self): ++ decode = codecs.unicode_escape_decode ++ check = coding_checker(self, decode) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtuvx': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % i): + check(b"\\" + b, "\\" + chr(i)) + if b.upper() not in b'UN': +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\%c'" % (i-32)): + check(b"\\" + b.upper(), "\\" + chr(i-32)) +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\8'"): + check(br"\8", "\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", "\\9") +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\\xfa'") as cm: + check(b"\\\xfa", "\\\xfa") + for i in range(0o400, 0o1000): +- with self.assertWarns(DeprecationWarning): ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid octal escape sequence '\\%o'" % i): + check(rb'\%o' % i, chr(i)) + ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid escape sequence '\\z'"): ++ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4)) ++ with self.assertWarnsRegex(DeprecationWarning, ++ r"invalid octal escape sequence '\\501'"): ++ self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6)) ++ + def test_decode_errors(self): + decode = codecs.unicode_escape_decode + for c, d in (b'x', 2), (b'u', 4), (b'U', 4): +diff --git a/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst +new file mode 100644 +index 00000000000..39d2f1e1a89 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst +@@ -0,0 +1,2 @@ ++Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error ++handler. +diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c +index f3a978c86c3..dae84127a7d 100644 +--- a/Objects/bytesobject.c ++++ b/Objects/bytesobject.c +@@ -1048,10 +1048,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, + } + + /* Unescape a backslash-escaped string. */ +-PyObject *_PyBytes_DecodeEscape(const char *s, ++PyObject *_PyBytes_DecodeEscape2(const char *s, + Py_ssize_t len, + const char *errors, +- const char **first_invalid_escape) ++ int *first_invalid_escape_char, ++ const char **first_invalid_escape_ptr) + { + int c; + char *p; +@@ -1065,7 +1066,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + return NULL; + writer.overallocate = 1; + +- *first_invalid_escape = NULL; ++ *first_invalid_escape_char = -1; ++ *first_invalid_escape_ptr = NULL; + + end = s + len; + while (s < end) { +@@ -1103,9 +1105,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + c = (c<<3) + *s++ - '0'; + } + if (c > 0377) { +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-3; /* Back up 3 chars, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = c; ++ /* Back up 3 chars, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 3; + } + } + *p++ = c; +@@ -1146,9 +1149,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + break; + + default: +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-1; /* Back up one char, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = (unsigned char)s[-1]; ++ /* Back up one char, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 1; + } + *p++ = '\\'; + s--; +@@ -1162,23 +1166,37 @@ PyObject *_PyBytes_DecodeEscape(const char *s, + return NULL; + } + ++// Export for binary compatibility. ++PyObject *_PyBytes_DecodeEscape(const char *s, ++ Py_ssize_t len, ++ const char *errors, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyBytes_DecodeEscape2( ++ s, len, errors, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject *PyBytes_DecodeEscape(const char *s, + Py_ssize_t len, + const char *errors, + Py_ssize_t Py_UNUSED(unicode), + const char *Py_UNUSED(recode_encoding)) + { +- const char* first_invalid_escape; +- PyObject *result = _PyBytes_DecodeEscape(s, len, errors, +- &first_invalid_escape); ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) + return NULL; +- if (first_invalid_escape != NULL) { +- unsigned char c = *first_invalid_escape; +- if ('4' <= c && c <= '7') { ++ if (first_invalid_escape_char != -1) { ++ if (first_invalid_escape_char > 0xff) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, +- "invalid octal escape sequence '\\%.3s'", +- first_invalid_escape) < 0) ++ "invalid octal escape sequence '\\%o'", ++ first_invalid_escape_char) < 0) + { + Py_DECREF(result); + return NULL; +@@ -1187,7 +1205,7 @@ PyObject *PyBytes_DecodeEscape(const char *s, + else { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", +- c) < 0) ++ first_invalid_escape_char) < 0) + { + Py_DECREF(result); + return NULL; +diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c +index 05562ad9927..5accbd6d1dd 100644 +--- a/Objects/unicodeobject.c ++++ b/Objects/unicodeobject.c +@@ -6046,13 +6046,15 @@ PyUnicode_AsUTF16String(PyObject *unicode) + /* --- Unicode Escape Codec ----------------------------------------------- */ + + PyObject * +-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ++_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed, +- const char **first_invalid_escape) ++ int *first_invalid_escape_char, ++ const char **first_invalid_escape_ptr) + { + const char *starts = s; ++ const char *initial_starts = starts; + _PyUnicodeWriter writer; + const char *end; + PyObject *errorHandler = NULL; +@@ -6061,7 +6063,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + PyInterpreterState *interp = _PyInterpreterState_Get(); + + // so we can remember if we've seen an invalid escape char or not +- *first_invalid_escape = NULL; ++ *first_invalid_escape_char = -1; ++ *first_invalid_escape_ptr = NULL; + + if (size == 0) { + if (consumed) { +@@ -6149,9 +6152,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + } + } + if (ch > 0377) { +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-3; /* Back up 3 chars, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = ch; ++ if (starts == initial_starts) { ++ /* Back up 3 chars, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 3; ++ } + } + } + WRITE_CHAR(ch); +@@ -6252,9 +6258,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + goto error; + + default: +- if (*first_invalid_escape == NULL) { +- *first_invalid_escape = s-1; /* Back up one char, since we've +- already incremented s. */ ++ if (*first_invalid_escape_char == -1) { ++ *first_invalid_escape_char = c; ++ if (starts == initial_starts) { ++ /* Back up one char, since we've already incremented s. */ ++ *first_invalid_escape_ptr = s - 1; ++ } + } + WRITE_ASCII_CHAR('\\'); + WRITE_CHAR(c); +@@ -6293,24 +6302,40 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, + return NULL; + } + ++// Export for binary compatibility. ++PyObject * ++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ++ Py_ssize_t size, ++ const char *errors, ++ Py_ssize_t *consumed, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyUnicode_DecodeUnicodeEscapeInternal2( ++ s, size, errors, consumed, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject * + _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) + { +- const char *first_invalid_escape; +- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, + consumed, +- &first_invalid_escape); ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) + return NULL; +- if (first_invalid_escape != NULL) { +- unsigned char c = *first_invalid_escape; +- if ('4' <= c && c <= '7') { ++ if (first_invalid_escape_char != -1) { ++ if (first_invalid_escape_char > 0xff) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, +- "invalid octal escape sequence '\\%.3s'", +- first_invalid_escape) < 0) ++ "invalid octal escape sequence '\\%o'", ++ first_invalid_escape_char) < 0) + { + Py_DECREF(result); + return NULL; +@@ -6319,7 +6344,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, + else { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", +- c) < 0) ++ first_invalid_escape_char) < 0) + { + Py_DECREF(result); + return NULL; +diff --git a/Parser/string_parser.c b/Parser/string_parser.c +index 8607885f2e4..c4c41b07f6b 100644 +--- a/Parser/string_parser.c ++++ b/Parser/string_parser.c +@@ -181,15 +181,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) + len = p - buf; + s = buf; + +- const char *first_invalid_escape; +- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape); ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + + // HACK: later we can simply pass the line no, since we don't preserve the tokens + // when we are decoding the string but we preserve the line numbers. +- if (v != NULL && first_invalid_escape != NULL && t != NULL) { +- if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { +- /* We have not decref u before because first_invalid_escape points +- inside u. */ ++ if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) { ++ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) { ++ /* We have not decref u before because first_invalid_escape_ptr ++ points inside u. */ + Py_XDECREF(u); + Py_DECREF(v); + return NULL; +@@ -202,14 +205,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) + static PyObject * + decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) + { +- const char *first_invalid_escape; +- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); ++ int first_invalid_escape_char; ++ const char *first_invalid_escape_ptr; ++ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, ++ &first_invalid_escape_char, ++ &first_invalid_escape_ptr); + if (result == NULL) { + return NULL; + } + +- if (first_invalid_escape != NULL) { +- if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { ++ if (first_invalid_escape_ptr != NULL) { ++ if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) { + Py_DECREF(result); + return NULL; + } +-- +2.39.3 + diff --git a/python3.12.spec b/python3.12.spec index 205fc30a42777c6fdd560cb31a52c29f13b93094..507c58c2b1ba63fe2d2f40424bbab5cb5728976e 100644 --- a/python3.12.spec +++ b/python3.12.spec @@ -65,7 +65,7 @@ Summary: Version %{pybasever} of the Python interpreter Name: python%{pybasever} Version: %{src_version} -Release: 15%{?dist} +Release: 16%{?dist} License: Python-2.0.1 URL: https://www.python.org/ @@ -95,6 +95,11 @@ Patch0014: CVE-2025-4435.patch # https://github.com/python/cpython/pull/135464 Patch0015: CVE-2025-6069.patch +# CVE-2025-4516 pre-patch +Patch0016: 0001-3.12-gh-120155-Fix-Coverity-issue-in-parse_string-GH.patch +Patch0017: 0001-3.12-gh-116042-Fix-location-for-SyntaxErrors-of-inva.patch +Patch0018: CVE-2025-4516-3.12-gh-133767-Fix-use-after-free-in-the-unicode-esc.patch + Patch3000: 00251-change-user-install-location.patch Patch3001: 00371-revert-bpo-1596321-fix-threading-_shutdown-for-the-main-thread-gh-28549-gh-28589.patch Patch3002: 00415-cve-2023-27043-gh-102988-reject-malformed-addresses-in-email-parseaddr-111116.patch @@ -1084,6 +1089,12 @@ LD_LIBRARY_PATH=$(pwd)/normal $(pwd)/normal/python -m test.regrtest \ %endif %changelog +* Wed Jul 9 2025 Shuo Wang - 3.12.2-16 +- fix CVE-2025-4516 +- gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130065) +- gh-120155: Fix Coverity issue in parse_string() (GH-120997) (#121006) +- gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944) (#134337) + * Wed Jun 25 2025 cunshunxia - 3.12.2-15 - fix CVE-2025-4435. - Also addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517