diff --git a/0001-3.12-gh-118643-Fix-AttributeError-in-the-email-modul.patch b/0001-3.12-gh-118643-Fix-AttributeError-in-the-email-modul.patch new file mode 100644 index 0000000000000000000000000000000000000000..0c212f63031ae71d3671427df2883d06ee1bf4a3 --- /dev/null +++ b/0001-3.12-gh-118643-Fix-AttributeError-in-the-email-modul.patch @@ -0,0 +1,97 @@ +From 8c96850161da23ad2b37551d2a89c7d4716fe024 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Wed, 22 May 2024 13:08:05 +0200 +Subject: [PATCH] [3.12] gh-118643: Fix AttributeError in the email module + (GH-119099) (GH-119390) + +Fix regression introduced in gh-100884: AttributeError when re-fold a long +address list. + +Also fix more cases of incorrect encoding of the address separator in the +address list missed in gh-100884. +(cherry picked from commit 858b9e85fcdd495947c9e892ce6e3734652c48f2) + +Co-authored-by: Serhiy Storchaka +--- + Lib/email/_header_value_parser.py | 15 ++++++++++++--- + Lib/test/test_email/test__header_value_parser.py | 12 ++++++++++-- + ...2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst | 2 ++ + 3 files changed, 24 insertions(+), 5 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst + +diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py +index 6148801460c..ab3c3031ef5 100644 +--- a/Lib/email/_header_value_parser.py ++++ b/Lib/email/_header_value_parser.py +@@ -956,6 +956,7 @@ class _InvalidEwError(errors.HeaderParseError): + DOT = ValueTerminal('.', 'dot') + ListSeparator = ValueTerminal(',', 'list-separator') + ListSeparator.as_ew_allowed = False ++ListSeparator.syntactic_break = False + RouteComponentMarker = ValueTerminal('@', 'route-component-marker') + + # +@@ -2844,7 +2845,9 @@ def _refold_parse_tree(parse_tree, *, policy): + if not hasattr(part, 'encode'): + # It's not a Terminal, do each piece individually. + parts = list(part) + parts +- else: ++ want_encoding = False ++ continue ++ elif part.as_ew_allowed: + # It's a terminal, wrap it as an encoded word, possibly + # combining it with previously encoded words if allowed. + if (last_ew is not None and +@@ -2858,8 +2861,14 @@ def _refold_parse_tree(parse_tree, *, policy): + # so clear it now. + leading_whitespace = '' + last_charset = charset +- want_encoding = False +- continue ++ want_encoding = False ++ continue ++ else: ++ # It's a terminal which should be kept non-encoded ++ # (e.g. a ListSeparator). ++ last_ew = None ++ want_encoding = False ++ # fall through + + if len(tstr) <= maxlen - len(lines[-1]): + lines[-1] += tstr +diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py +index 56a1e3a3de5..5413319a414 100644 +--- a/Lib/test/test_email/test__header_value_parser.py ++++ b/Lib/test/test_email/test__header_value_parser.py +@@ -3077,9 +3077,17 @@ def test_address_list_with_unicode_names_in_quotes(self): + ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + + def test_address_list_with_list_separator_after_fold(self): +- to = '0123456789' * 8 + '@foo, ä ' ++ a = 'x' * 66 + '@example.com' ++ to = f'{a}, "Hübsch Kaktus" ' + self._test(parser.get_address_list(to)[0], +- '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= \n') ++ f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') ++ ++ a = '.' * 79 ++ to = f'"{a}" , "Hübsch Kaktus" ' ++ self._test(parser.get_address_list(to)[0], ++ f'{a}\n' ++ ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' ++ '\n') + + # XXX Need tests with comments on various sides of a unicode token, + # and with unicode tokens in the comments. Spaces inside the quotes +diff --git a/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst b/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst +new file mode 100644 +index 00000000000..e86a49af74c +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-05-16-17-31-46.gh-issue-118643.hAWH4C.rst +@@ -0,0 +1,2 @@ ++Fix an AttributeError in the :mod:`email` module when re-fold a long address ++list. Also fix more cases of incorrect encoding of the address separator in the address list. +-- +2.39.3 + diff --git a/0001-3.12-gh-92081-Fix-for-email.generator.Generator-with.patch b/0001-3.12-gh-92081-Fix-for-email.generator.Generator-with.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4b3f842681f49172131423f7bc06ae4ecd04161 --- /dev/null +++ b/0001-3.12-gh-92081-Fix-for-email.generator.Generator-with.patch @@ -0,0 +1,254 @@ +From ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Mon, 20 May 2024 22:10:49 +0200 +Subject: [PATCH] [3.12] gh-92081: Fix for email.generator.Generator with + whitespace between encoded words. (GH-92281) (#119246) + +* Fix for email.generator.Generator with whitespace between encoded words. + +email.generator.Generator currently does not handle whitespace between +encoded words correctly when the encoded words span multiple lines. The +current generator will create an encoded word for each line. If the end +of the line happens to correspond with the end real word in the +plaintext, the generator will place an unencoded space at the start of +the subsequent lines to represent the whitespace between the plaintext +words. + +A compliant decoder will strip all the whitespace from between two +encoded words which leads to missing spaces in the round-tripped +output. + +The fix for this is to make sure that whitespace between two encoded +words ends up inside of one or the other of the encoded words. This +fix places the space inside of the second encoded word. + +A second problem happens with continuation lines. A continuation line that +starts with whitespace and is followed by a non-encoded word is fine because +the newline between such continuation lines is defined as condensing to +a single space character. When the continuation line starts with whitespace +followed by an encoded word, however, the RFCs specify that the word is run +together with the encoded word on the previous line. This is because normal +words are filded on syntactic breaks by encoded words are not. + +The solution to this is to add the whitespace to the start of the encoded word +on the continuation line. + +Test cases are from GH-92081 + +* Rename a variable so it's not confused with the final variable. +(cherry picked from commit a6fdb31b6714c9f3c65fefbb3fe388b2b139a75f) + +Co-authored-by: Toshio Kuratomi +--- + Lib/email/_header_value_parser.py | 48 ++++++++++++++++--- + Lib/test/test_email/test_generator.py | 35 ++++++++++++++ + Lib/test/test_email/test_headerregistry.py | 3 +- + ...3-04-26-22-24-17.gh-issue-92081.V8xMot.rst | 1 + + 4 files changed, 79 insertions(+), 8 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst + +diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py +index d1b4c7df4f4..6148801460c 100644 +--- a/Lib/email/_header_value_parser.py ++++ b/Lib/email/_header_value_parser.py +@@ -2784,11 +2784,15 @@ def _refold_parse_tree(parse_tree, *, policy): + # max_line_length 0/None means no limit, ie: infinitely long. + maxlen = policy.max_line_length or sys.maxsize + encoding = 'utf-8' if policy.utf8 else 'us-ascii' +- lines = [''] +- last_ew = None ++ lines = [''] # Folded lines to be output ++ leading_whitespace = '' # When we have whitespace between two encoded ++ # words, we may need to encode the whitespace ++ # at the beginning of the second word. ++ last_ew = None # Points to the last encoded character if there's an ew on ++ # the line + last_charset = None + wrap_as_ew_blocked = 0 +- want_encoding = False ++ want_encoding = False # This is set to True if we need to encode this part + end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked') + parts = list(parse_tree) + while parts: +@@ -2812,10 +2816,12 @@ def _refold_parse_tree(parse_tree, *, policy): + # 'charset' property on the policy. + charset = 'utf-8' + want_encoding = True ++ + if part.token_type == 'mime-parameters': + # Mime parameter folding (using RFC2231) is extra special. + _fold_mime_parameters(part, lines, maxlen, encoding) + continue ++ + if want_encoding and not wrap_as_ew_blocked: + if not part.as_ew_allowed: + want_encoding = False +@@ -2847,21 +2853,38 @@ def _refold_parse_tree(parse_tree, *, policy): + last_charset == 'utf-8' and charset != 'us-ascii')): + last_ew = None + last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, +- part.ew_combine_allowed, charset) ++ part.ew_combine_allowed, charset, leading_whitespace) ++ # This whitespace has been added to the lines in _fold_as_ew() ++ # so clear it now. ++ leading_whitespace = '' + last_charset = charset + want_encoding = False + continue ++ + if len(tstr) <= maxlen - len(lines[-1]): + lines[-1] += tstr + continue ++ + # This part is too long to fit. The RFC wants us to break at + # "major syntactic breaks", so unless we don't consider this + # to be one, check if it will fit on the next line by itself. ++ leading_whitespace = '' + if (part.syntactic_break and + len(tstr) + 1 <= maxlen): + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): ++ # We're going to fold the data onto a new line here. Due to ++ # the way encoded strings handle continuation lines, we need to ++ # be prepared to encode any whitespace if the next line turns ++ # out to start with an encoded word. + lines.append(newline + tstr) ++ ++ whitespace_accumulator = [] ++ for char in lines[-1]: ++ if char not in WSP: ++ break ++ whitespace_accumulator.append(char) ++ leading_whitespace = ''.join(whitespace_accumulator) + last_ew = None + continue + if not hasattr(part, 'encode'): +@@ -2885,9 +2908,10 @@ def _refold_parse_tree(parse_tree, *, policy): + else: + # We can't fold it onto the next line either... + lines[-1] += tstr ++ + return policy.linesep.join(lines) + policy.linesep + +-def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset): ++def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): + """Fold string to_encode into lines as encoded word, combining if allowed. + Return the new value for last_ew, or None if ew_combine_allowed is False. + +@@ -2902,7 +2926,7 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset): + to_encode = str( + get_unstructured(lines[-1][last_ew:] + to_encode)) + lines[-1] = lines[-1][:last_ew] +- if to_encode[0] in WSP: ++ elif to_encode[0] in WSP: + # We're joining this to non-encoded text, so don't encode + # the leading blank. + leading_wsp = to_encode[0] +@@ -2910,6 +2934,7 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset): + if (len(lines[-1]) == maxlen): + lines.append(_steal_trailing_WSP_if_exists(lines)) + lines[-1] += leading_wsp ++ + trailing_wsp = '' + if to_encode[-1] in WSP: + # Likewise for the trailing space. +@@ -2929,11 +2954,20 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset): + + while to_encode: + remaining_space = maxlen - len(lines[-1]) +- text_space = remaining_space - chrome_len ++ text_space = remaining_space - chrome_len - len(leading_whitespace) + if text_space <= 0: + lines.append(' ') + continue + ++ # If we are at the start of a continuation line, prepend whitespace ++ # (we only want to do this when the line starts with an encoded word ++ # but if we're folding in this helper function, then we know that we ++ # are going to be writing out an encoded word.) ++ if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: ++ encoded_word = _ew.encode(leading_whitespace, charset=encode_as) ++ lines[-1] += encoded_word ++ leading_whitespace = '' ++ + to_encode_word = to_encode[:text_space] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space +diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py +index 3ebcb684d00..bfff1051262 100644 +--- a/Lib/test/test_email/test_generator.py ++++ b/Lib/test/test_email/test_generator.py +@@ -281,6 +281,41 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase): + ioclass = io.BytesIO + typ = lambda self, x: x.encode('ascii') + ++ def test_defaults_handle_spaces_between_encoded_words_when_folded(self): ++ source = ("Уведомление о принятии в работу обращения для" ++ " подключения услуги") ++ expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n' ++ ' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n' ++ ' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii') ++ msg = EmailMessage() ++ msg['Subject'] = source ++ s = io.BytesIO() ++ g = BytesGenerator(s) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), expected) ++ ++ def test_defaults_handle_spaces_at_start_of_subject(self): ++ source = " Уведомление" ++ expected = b"Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n" ++ msg = EmailMessage() ++ msg['Subject'] = source ++ s = io.BytesIO() ++ g = BytesGenerator(s) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), expected) ++ ++ def test_defaults_handle_spaces_at_start_of_continuation_line(self): ++ source = " ф ффффффффффффффффффф ф ф" ++ expected = (b"Subject: " ++ b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n" ++ b" =?utf-8?b?INGEINGE?=\n\n") ++ msg = EmailMessage() ++ msg['Subject'] = source ++ s = io.BytesIO() ++ g = BytesGenerator(s) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), expected) ++ + def test_cte_type_7bit_handles_unknown_8bit(self): + source = ("Subject: Maintenant je vous présente mon " + "collègue\n\n").encode('utf-8') +diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py +index bb7ca8dfd8c..5a608a033c7 100644 +--- a/Lib/test/test_email/test_headerregistry.py ++++ b/Lib/test/test_email/test_headerregistry.py +@@ -7,6 +7,7 @@ + from test.test_email import TestEmailBase, parameterize + from email import headerregistry + from email.headerregistry import Address, Group ++from email.header import decode_header + from test.support import ALWAYS_EQ + + +@@ -1648,7 +1649,7 @@ def test_address_display_names(self): + 'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. ' + 'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.', + '=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c' +- '=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse' ++ '=C3=B4nsectetuer?=\n =?utf-8?q?_adipiscing=2E_Suspendisse' + '_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8' + '?q?p=C3=B4tenti=2E?=', + ), +diff --git a/Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst b/Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst +new file mode 100644 +index 00000000000..0302e957b88 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst +@@ -0,0 +1 @@ ++Fix missing spaces in email headers when the spaces are mixed with encoded 8-bit characters. +-- +2.39.3 + diff --git a/CVE-2025-1795-3.12-gh-100884-email-_header_value_parser-don-t-enco.patch b/CVE-2025-1795-3.12-gh-100884-email-_header_value_parser-don-t-enco.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1f58009082e50e9dfa92c51d91033e0264ad734 --- /dev/null +++ b/CVE-2025-1795-3.12-gh-100884-email-_header_value_parser-don-t-enco.patch @@ -0,0 +1,70 @@ +From 9148b77e0af91cdacaa7fe3dfac09635c3fe9a74 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Sat, 17 Feb 2024 14:00:39 +0100 +Subject: [PATCH] [3.12] gh-100884: email/_header_value_parser: don't encode + list separators (GH-100885) (GH-115592) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +ListSeparator should not be encoded. This could happen when a long line +pushes its separator to the next line, which would have been encoded. +(cherry picked from commit 09fab93c3d857496c0bd162797fab816c311ee48) + +Co-authored-by: Thomas Weißschuh +--- + Lib/email/_header_value_parser.py | 3 ++- + Lib/test/test_email/test__header_value_parser.py | 5 +++++ + .../Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst | 2 ++ + 3 files changed, 9 insertions(+), 1 deletion(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst + +diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py +index 5b653f66c18..e4a342d446f 100644 +--- a/Lib/email/_header_value_parser.py ++++ b/Lib/email/_header_value_parser.py +@@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError): + # up other parse trees. Maybe should have tests for that, too. + DOT = ValueTerminal('.', 'dot') + ListSeparator = ValueTerminal(',', 'list-separator') ++ListSeparator.as_ew_allowed = False + RouteComponentMarker = ValueTerminal('@', 'route-component-marker') + + # +@@ -2022,7 +2023,7 @@ def get_address_list(value): + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value: # Must be a , at this point. +- address_list.append(ValueTerminal(',', 'list-separator')) ++ address_list.append(ListSeparator) + value = value[1:] + return address_list, value + +diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py +index bdb0e55f210..f7e80749c45 100644 +--- a/Lib/test/test_email/test__header_value_parser.py ++++ b/Lib/test/test_email/test__header_value_parser.py +@@ -2985,6 +2985,11 @@ def test_address_list_with_unicode_names_in_quotes(self): + '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' + ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + ++ def test_address_list_with_list_separator_after_fold(self): ++ to = '0123456789' * 8 + '@foo, ä ' ++ self._test(parser.get_address_list(to)[0], ++ '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= \n') ++ + # XXX Need tests with comments on various sides of a unicode token, + # and with unicode tokens in the comments. Spaces inside the quotes + # currently don't do the right thing. +diff --git a/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst +new file mode 100644 +index 00000000000..2a388178810 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst +@@ -0,0 +1,2 @@ ++email: fix misfolding of comma in address-lists over multiple lines in ++combination with unicode encoding. +-- +2.39.3 + diff --git a/python3.12.spec b/python3.12.spec index 5c1b9f4df27369b582262d79e3e12292cc1f9365..b5b0b83197c174a07037a3b7c4ae6dca47bf5bb3 100644 --- a/python3.12.spec +++ b/python3.12.spec @@ -65,7 +65,7 @@ Summary: Version %{pybasever} of the Python interpreter Name: python%{pybasever} Version: %{src_version} -Release: 12%{?dist} +Release: 13%{?dist} License: Python-2.0.1 URL: https://www.python.org/ @@ -86,6 +86,10 @@ Patch0006: CVE-2024-6923.patch Patch0007: CVE-2024-9287-3.12-gh-124651-Quote-template-strings-in-venv-activa.patch Patch0008: CVE-2024-12254-3.12-gh-127655-Ensure-_SelectorSocketTransport.write.patch Patch0009: CVE-2025-0938.patch +Patch0010: CVE-2025-1795-3.12-gh-100884-email-_header_value_parser-don-t-enco.patch +# fix bug introduced in CVE-2025-1795, Patch0012 is pre-patch for Patch0013 +Patch0012: 0001-3.12-gh-92081-Fix-for-email.generator.Generator-with.patch +Patch0013: 0001-3.12-gh-118643-Fix-AttributeError-in-the-email-modul.patch Patch3000: 00251-change-user-install-location.patch Patch3001: 00371-revert-bpo-1596321-fix-threading-_shutdown-for-the-main-thread-gh-28549-gh-28589.patch @@ -1076,6 +1080,12 @@ LD_LIBRARY_PATH=$(pwd)/normal $(pwd)/normal/python -m test.regrtest \ %endif %changelog +* Mon Mar 10 2025 Shuo Wang - 3.12.2-13 +- fix CVE-2025-1795 +- email/_header_value_parser: don't encode list separators (GH-100885) (GH-115592) +- Fix for email.generator.Generator with whitespace between encoded words. (GH-92281) (#119246) +- Fix regression introduced in gh-100884: AttributeError when re-fold a long address list. + * Tue Feb 25 2025 cunshunxia - 3.12.2-12 - fix CVE-2025-0938.