Skip to content
Open
9 changes: 9 additions & 0 deletions Doc/library/email.errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ The following exception classes are defined in the :mod:`!email.errors` module:
headers.


.. exception:: InvalidMailboxError()

Raised when serializing a message with an address header that contains
a mailbox incompatible with the policy in use.
(See :attr:`email.policy.EmailPolicy.utf8`.)

.. versionadded:: 3.15


.. exception:: MessageDefect()

This is the base class for all defects found when parsing email messages.
Expand Down
15 changes: 13 additions & 2 deletions Doc/library/email.policy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,22 @@ added matters. To illustrate::
.. attribute:: utf8

If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
headers by encoding them as "encoded words". If ``True``, follow
:rfc:`6532` and use ``utf-8`` encoding for headers. Messages
headers by encoding them as :rfc:`2047` "encoded words". If ``True``,
follow :rfc:`6532` and use ``utf-8`` encoding for headers. Messages
formatted in this way may be passed to SMTP servers that support
the ``SMTPUTF8`` extension (:rfc:`6531`).

When ``False``, the generator will raise an
:exc:`~email.errors.InvalidMailboxError` if any address header includes
a mailbox ("addr-spec") with non-ASCII characters. To use a mailbox with
an internationalized domain name, first encode the domain using the
third-party :pypi:`idna` or :pypi:`uts46` module or with
:mod:`encodings.idna`. It is not possible to use a non-ASCII username
("local-part") in a mailbox when ``utf8=False``.

.. versionchanged:: 3.14
Raises :exc:`~email.errors.InvalidMailboxError`. (Earlier versions
incorrectly applied :rfc:`2047` to non-ASCII addr-specs.)

.. attribute:: refold_source

Expand Down
11 changes: 11 additions & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2885,6 +2885,17 @@ def _refold_parse_tree(parse_tree, *, policy):
last_word_is_ew = False
continue

if want_encoding and part.token_type == 'addr-spec':
# RFC2047 forbids encoded-word in any part of an addr-spec.
if charset == 'unknown-8bit':
# Non-ASCII addr-spec came from parsed message; leave unchanged.
want_encoding = False
else:
raise errors.InvalidMailboxError(
f"Non-ASCII mailbox '{part}' is invalid"
" under current policy setting (utf8=False)"
)

if want_encoding and not wrap_as_ew_blocked:
if not part.as_ew_allowed:
want_encoding = False
Expand Down
4 changes: 4 additions & 0 deletions Lib/email/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class HeaderWriteError(MessageError):
"""Error while writing headers."""


class InvalidMailboxError(MessageError, ValueError):
"""A mailbox was not compatible with the policy in use."""


# These are parsing defects which the parser was able to work around.
class MessageDefect(ValueError):
"""Base class for a message defect."""
Expand Down
59 changes: 57 additions & 2 deletions Lib/test/test_email/test_generator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import re
import textwrap
import unittest
import random
Expand Down Expand Up @@ -295,6 +296,29 @@ def test_keep_long_encoded_newlines(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), self.typ(expected))

def test_non_ascii_addr_spec_raises(self):
# RFC2047 encoded-word is not permitted in any part of an addr-spec.
# (See also test_non_ascii_addr_spec_preserved below.)
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
cases = [
'wők@example.com',
'wok@exàmple.com',
'wők@exàmple.com',
'"Name, for display" <wők@example.com>',
'Näyttönimi <wők@example.com>',
]
for address in cases:
with self.subTest(address=address):
msg = EmailMessage()
msg['To'] = address
addr_spec = msg['To'].addresses[0].addr_spec
expected_error = (
fr"(?i)(?=.*non-ascii)(?=.*{re.escape(addr_spec)})(?=.*policy.*utf8)"
)
with self.assertRaisesRegex(
email.errors.InvalidMailboxError, expected_error
):
g.flatten(msg)
def _test_boundary_detection(self, linesep):
# Generate a boundary token in the same way as _make_boundary
token = random.randrange(sys.maxsize)
Expand Down Expand Up @@ -515,12 +539,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self):

def test_smtputf8_policy(self):
msg = EmailMessage()
msg['From'] = "Páolo <főo@bar.com>"
msg['From'] = "Páolo <főo@bàr.com>"
msg['To'] = 'Dinsdale'
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
msg.set_content("oh là là, know what I mean, know what I mean?")
expected = textwrap.dedent("""\
From: Páolo <főo@bar.com>
From: Páolo <főo@bàr.com>
To: Dinsdale
Subject: Nudge nudge, wink, wink \u1F609
Content-Type: text/plain; charset="utf-8"
Expand Down Expand Up @@ -555,6 +579,37 @@ def test_smtp_policy(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_non_ascii_addr_spec_preserved(self):
# A defective non-ASCII addr-spec parsed from the original
# message is left unchanged when flattening.
# (See also test_non_ascii_addr_spec_raises above.)
source = (
'To: jörg@example.com, "But a long name still works with refold_source" <jörg@example.com>'
).encode()
expected = (
b'To: j\xc3\xb6rg@example.com,\n'
b' "But a long name still works with refold_source" <j\xc3\xb6rg@example.com>\n'
b'\n'
)
msg = message_from_bytes(source, policy=policy.default)
s = io.BytesIO()
g = BytesGenerator(s, policy=policy.default)
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_idna_encoding_preserved(self):
# Nothing tries to decode a pre-encoded IDNA domain.
msg = EmailMessage()
msg["To"] = Address(
username='jörg',
domain='☕.example'.encode('idna').decode() # IDNA 2003
)
expected = 'To: jörg@xn--53h.example\n\n'.encode()
s = io.BytesIO()
g = BytesGenerator(s, policy=policy.default.clone(utf8=True))
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
a mailbox with non-ASCII characters in its domain. Under a policy with
:attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
Either apply an appropriate IDNA encoding to convert the domain to ASCII before
serialization, or use :data:`email.policy.SMTPUTF8` (or another policy with
``utf8=True``) to correctly pass through the internationalized domain name
as Unicode characters.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
The :mod:`email` module no longer incorrectly uses :rfc:`2047` encoding for
a mailbox with non-ASCII characters in its local-part. Under a policy with
:attr:`~email.policy.EmailPolicy.utf8` set ``False``, attempting to serialize
such a message will now raise an :exc:`~email.errors.InvalidMailboxError`.
There is no valid 7-bit encoding for an internationalized local-part. Use
:data:`email.policy.SMTPUTF8` (or another policy with ``utf8=True``) to
correctly pass through the local-part as Unicode characters.
Loading