diff --git a/Lib/encodings/viscii.py b/Lib/encodings/viscii.py new file mode 100644 index 00000000000000..063c59bbcb7458 --- /dev/null +++ b/Lib/encodings/viscii.py @@ -0,0 +1,311 @@ +"""Python Character Mapping Codec viscii generated from 'python-mappings/VISCII.TXT' with gencodec.py.""" # " + +import codecs + +### Codec APIs + + +class Codec(codecs.Codec): + def encode(self, input, errors="strict"): + return codecs.charmap_encode(input, errors, encoding_table) + + def decode(self, input, errors="strict"): + return codecs.charmap_decode(input, errors, decoding_table) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input, self.errors, encoding_table)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input, self.errors, decoding_table)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + pass + + +class StreamReader(Codec, codecs.StreamReader): + pass + + +### encodings module API + + +def getregentry(): + return codecs.CodecInfo( + name="viscii", + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + "\x00" # 0x00 -> NUL + "\x01" # 0x01 -> SOH + "\u1eb2" # 0x02 -> Ẳ + "\x03" # 0x03 -> ETX + "\x04" # 0x04 -> EOT + "\u1eb4" # 0x05 -> Ẵ + "\u1eaa" # 0x06 -> Ẫ + "\x07" # 0x07 -> BEL + "\x08" # 0x08 -> BS + "\t" # 0x09 -> TAB + "\n" # 0x0A -> LF + "\x0b" # 0x0B -> VT + "\x0c" # 0x0C -> FF + "\r" # 0x0D -> CR + "\x0e" # 0x0E -> SO + "\x0f" # 0x0F -> SI + "\x10" # 0x10 -> DLE + "\x11" # 0x11 -> DC1 + "\x12" # 0x12 -> DC2 + "\x13" # 0x13 -> DC3 + "\u1ef6" # 0x14 -> Ỷ + "\x15" # 0x15 -> NAK + "\x16" # 0x16 -> SYN + "\x17" # 0x17 -> ETB + "\x18" # 0x18 -> CAN + "\u1ef8" # 0x19 -> Ỹ + "\x1a" # 0x1A -> SUB + "\x1b" # 0x1B -> ESC + "\x1c" # 0x1C -> FS + "\x1d" # 0x1D -> GS + "\u1ef4" # 0x1E -> Ỵ + "\x1f" # 0x1F -> US + " " # 0x20 -> SPACE + "!" # 0x21 -> ! + '"' # 0x22 -> " + "#" # 0x23 -> # + "$" # 0x24 -> $ + "%" # 0x25 -> % + "&" # 0x26 -> & + "'" # 0x27 -> ' + "(" # 0x28 -> ( + ")" # 0x29 -> ) + "*" # 0x2A -> * + "+" # 0x2B -> + + "," # 0x2C -> , + "-" # 0x2D -> - + "." # 0x2E -> . + "/" # 0x2F -> / + "0" # 0x30 -> 0 + "1" # 0x31 -> 1 + "2" # 0x32 -> 2 + "3" # 0x33 -> 3 + "4" # 0x34 -> 4 + "5" # 0x35 -> 5 + "6" # 0x36 -> 6 + "7" # 0x37 -> 7 + "8" # 0x38 -> 8 + "9" # 0x39 -> 9 + ":" # 0x3A -> : + ";" # 0x3B -> ; + "<" # 0x3C -> < + "=" # 0x3D -> = + ">" # 0x3E -> > + "?" # 0x3F -> ? + "@" # 0x40 -> @ + "A" # 0x41 -> A + "B" # 0x42 -> B + "C" # 0x43 -> C + "D" # 0x44 -> D + "E" # 0x45 -> E + "F" # 0x46 -> F + "G" # 0x47 -> G + "H" # 0x48 -> H + "I" # 0x49 -> I + "J" # 0x4A -> J + "K" # 0x4B -> K + "L" # 0x4C -> L + "M" # 0x4D -> M + "N" # 0x4E -> N + "O" # 0x4F -> O + "P" # 0x50 -> P + "Q" # 0x51 -> Q + "R" # 0x52 -> R + "S" # 0x53 -> S + "T" # 0x54 -> T + "U" # 0x55 -> U + "V" # 0x56 -> V + "W" # 0x57 -> W + "X" # 0x58 -> X + "Y" # 0x59 -> Y + "Z" # 0x5A -> Z + "[" # 0x5B -> [ + "\\" # 0x5C -> \ + "]" # 0x5D -> ] + "^" # 0x5E -> ^ + "_" # 0x5F -> _ + "`" # 0x60 -> ` + "a" # 0x61 -> a + "b" # 0x62 -> b + "c" # 0x63 -> c + "d" # 0x64 -> d + "e" # 0x65 -> e + "f" # 0x66 -> f + "g" # 0x67 -> g + "h" # 0x68 -> h + "i" # 0x69 -> i + "j" # 0x6A -> j + "k" # 0x6B -> k + "l" # 0x6C -> l + "m" # 0x6D -> m + "n" # 0x6E -> n + "o" # 0x6F -> o + "p" # 0x70 -> p + "q" # 0x71 -> q + "r" # 0x72 -> r + "s" # 0x73 -> s + "t" # 0x74 -> t + "u" # 0x75 -> u + "v" # 0x76 -> v + "w" # 0x77 -> w + "x" # 0x78 -> x + "y" # 0x79 -> y + "z" # 0x7A -> z + "{" # 0x7B -> { + "|" # 0x7C -> | + "}" # 0x7D -> } + "~" # 0x7E -> ~ + "\x7f" # 0x7F -> DEL + "\u1ea0" # 0x80 -> Ạ + "\u1eae" # 0x81 -> Ắ + "\u1eb0" # 0x82 -> Ằ + "\u1eb6" # 0x83 -> Ặ + "\u1ea4" # 0x84 -> Ấ + "\u1ea6" # 0x85 -> Ầ + "\u1ea8" # 0x86 -> Ẩ + "\u1eac" # 0x87 -> Ậ + "\u1ebc" # 0x88 -> Ẽ + "\u1eb8" # 0x89 -> Ẹ + "\u1ebe" # 0x8A -> Ế + "\u1ec0" # 0x8B -> Ề + "\u1ec2" # 0x8C -> Ể + "\u1ec4" # 0x8D -> Ễ + "\u1ec6" # 0x8E -> Ệ + "\u1ed0" # 0x8F -> Ố + "\u1ed2" # 0x90 -> Ồ + "\u1ed4" # 0x91 -> Ổ + "\u1ed6" # 0x92 -> Ỗ + "\u1ed8" # 0x93 -> Ộ + "\u1ee2" # 0x94 -> Ợ + "\u1eda" # 0x95 -> Ớ + "\u1edc" # 0x96 -> Ờ + "\u1ede" # 0x97 -> Ở + "\u1eca" # 0x98 -> Ị + "\u1ece" # 0x99 -> Ỏ + "\u1ecc" # 0x9A -> Ọ + "\u1ec8" # 0x9B -> Ỉ + "\u1ee6" # 0x9C -> Ủ + "\u0168" # 0x9D -> Ũ + "\u1ee4" # 0x9E -> Ụ + "\u1ef2" # 0x9F -> Ỳ + "\xd5" # 0xA0 -> Õ + "\u1eaf" # 0xA1 -> ắ + "\u1eb1" # 0xA2 -> ằ + "\u1eb7" # 0xA3 -> ặ + "\u1ea5" # 0xA4 -> ấ + "\u1ea7" # 0xA5 -> ầ + "\u1ea9" # 0xA6 -> ẩ + "\u1ead" # 0xA7 -> ậ + "\u1ebd" # 0xA8 -> ẽ + "\u1eb9" # 0xA9 -> ẹ + "\u1ebf" # 0xAA -> ế + "\u1ec1" # 0xAB -> ề + "\u1ec3" # 0xAC -> ể + "\u1ec5" # 0xAD -> ễ + "\u1ec7" # 0xAE -> ệ + "\u1ed1" # 0xAF -> ố + "\u1ed3" # 0xB0 -> ồ + "\u1ed5" # 0xB1 -> ổ + "\u1ed7" # 0xB2 -> ỗ + "\u1ee0" # 0xB3 -> Ỡ + "\u01a0" # 0xB4 -> Ơ + "\u1ed9" # 0xB5 -> ộ + "\u1edd" # 0xB6 -> ờ + "\u1edf" # 0xB7 -> ở + "\u1ecb" # 0xB8 -> ị + "\u1ef0" # 0xB9 -> Ự + "\u1ee8" # 0xBA -> Ứ + "\u1eea" # 0xBB -> Ừ + "\u1eec" # 0xBC -> Ử + "\u01a1" # 0xBD -> ơ + "\u1edb" # 0xBE -> ớ + "\u01af" # 0xBF -> Ư + "\xc0" # 0xC0 -> À + "\xc1" # 0xC1 -> Á + "\xc2" # 0xC2 -> Â + "\xc3" # 0xC3 -> Ã + "\u1ea2" # 0xC4 -> Ả + "\u0102" # 0xC5 -> Ă + "\u1eb3" # 0xC6 -> ẳ + "\u1eb5" # 0xC7 -> ẵ + "\xc8" # 0xC8 -> È + "\xc9" # 0xC9 -> É + "\xca" # 0xCA -> Ê + "\u1eba" # 0xCB -> Ẻ + "\xcc" # 0xCC -> Ì + "\xcd" # 0xCD -> Í + "\u0128" # 0xCE -> Ĩ + "\u1ef3" # 0xCF -> ỳ + "\u0110" # 0xD0 -> Đ + "\u1ee9" # 0xD1 -> ứ + "\xd2" # 0xD2 -> Ò + "\xd3" # 0xD3 -> Ó + "\xd4" # 0xD4 -> Ô + "\u1ea1" # 0xD5 -> ạ + "\u1ef7" # 0xD6 -> ỷ + "\u1eeb" # 0xD7 -> ừ + "\u1eed" # 0xD8 -> ử + "\xd9" # 0xD9 -> Ù + "\xda" # 0xDA -> Ú + "\u1ef9" # 0xDB -> ỹ + "\u1ef5" # 0xDC -> ỵ + "\xdd" # 0xDD -> Ý + "\u1ee1" # 0xDE -> ỡ + "\u01b0" # 0xDF -> ư + "\xe0" # 0xE0 -> à + "\xe1" # 0xE1 -> á + "\xe2" # 0xE2 -> â + "\xe3" # 0xE3 -> ã + "\u1ea3" # 0xE4 -> ả + "\u0103" # 0xE5 -> ă + "\u1eef" # 0xE6 -> ữ + "\u1eab" # 0xE7 -> ẫ + "\xe8" # 0xE8 -> è + "\xe9" # 0xE9 -> é + "\xea" # 0xEA -> ê + "\u1ebb" # 0xEB -> ẻ + "\xec" # 0xEC -> ì + "\xed" # 0xED -> í + "\u0129" # 0xEE -> ĩ + "\u1ec9" # 0xEF -> ỉ + "\u0111" # 0xF0 -> đ + "\u1ef1" # 0xF1 -> ự + "\xf2" # 0xF2 -> ò + "\xf3" # 0xF3 -> ó + "\xf4" # 0xF4 -> ô + "\xf5" # 0xF5 -> õ + "\u1ecf" # 0xF6 -> ỏ + "\u1ecd" # 0xF7 -> ọ + "\u1ee5" # 0xF8 -> ụ + "\xf9" # 0xF9 -> ù + "\xfa" # 0xFA -> ú + "\u0169" # 0xFB -> ũ + "\u1ee7" # 0xFC -> ủ + "\xfd" # 0xFD -> ý + "\u1ee3" # 0xFE -> ợ + "\u1eee" # 0xFF -> Ữ +) + +### Encoding table +encoding_table = codecs.charmap_build(decoding_table) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 79c8a7ef886482..758cef8b243fe4 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2137,6 +2137,7 @@ def test_basic(self): "utf_16_le", "utf_7", "utf_8", + "viscii", ] if hasattr(codecs, "mbcs_encode"): diff --git a/Misc/NEWS.d/next/Library/2026-04-22-16-25-33.gh-issue-148865.pbhHbY.rst b/Misc/NEWS.d/next/Library/2026-04-22-16-25-33.gh-issue-148865.pbhHbY.rst new file mode 100644 index 00000000000000..c59cad4f4d6b1d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-22-16-25-33.gh-issue-148865.pbhHbY.rst @@ -0,0 +1,2 @@ +``codecs`` does not support the viscii charset. It now supports the viscii +charset. diff --git a/Tools/unicode/python-mappings/VISCII.TXT b/Tools/unicode/python-mappings/VISCII.TXT new file mode 100644 index 00000000000000..0b7e8ed71e3ec2 --- /dev/null +++ b/Tools/unicode/python-mappings/VISCII.TXT @@ -0,0 +1,264 @@ +# Name: VISCII (RFC1456) to Unicode +# Date: 2026-04-22 +# Authors: Henry Jones <2020henryijones@gmail.com> +# +# See RFC1456 for details. This encoding is a vietnamese specific +# encoding which intentionally replaces 6 control characters with +# printable characters. + +0x00 0x0000 # NUL +0x01 0x0001 # SOH +0x02 0x1EB2 # Ẳ +0x03 0x0003 # ETX +0x04 0x0004 # EOT +0x05 0x1EB4 # Ẵ +0x06 0x1EAA # Ẫ +0x07 0x0007 # BEL +0x08 0x0008 # BS +0x09 0x0009 # TAB +0x0A 0x000A # LF +0x0B 0x000B # VT +0x0C 0x000C # FF +0x0D 0x000D # CR +0x0E 0x000E # SO +0x0F 0x000F # SI +0x10 0x0010 # DLE +0x11 0x0011 # DC1 +0x12 0x0012 # DC2 +0x13 0x0013 # DC3 +0x14 0x1EF6 # Ỷ +0x15 0x0015 # NAK +0x16 0x0016 # SYN +0x17 0x0017 # ETB +0x18 0x0018 # CAN +0x19 0x1EF8 # Ỹ +0x1A 0x001A # SUB +0x1B 0x001B # ESC +0x1C 0x001C # FS +0x1D 0x001D # GS +0x1E 0x1EF4 # Ỵ +0x1F 0x001F # US +0x20 0x0020 # SPACE +0x21 0x0021 # ! +0x22 0x0022 # " +0x23 0x0023 # # +0x24 0x0024 # $ +0x25 0x0025 # % +0x26 0x0026 # & +0x27 0x0027 # ' +0x28 0x0028 # ( +0x29 0x0029 # ) +0x2A 0x002A # * +0x2B 0x002B # + +0x2C 0x002C # , +0x2D 0x002D # - +0x2E 0x002E # . +0x2F 0x002F # / +0x30 0x0030 # 0 +0x31 0x0031 # 1 +0x32 0x0032 # 2 +0x33 0x0033 # 3 +0x34 0x0034 # 4 +0x35 0x0035 # 5 +0x36 0x0036 # 6 +0x37 0x0037 # 7 +0x38 0x0038 # 8 +0x39 0x0039 # 9 +0x3A 0x003A # : +0x3B 0x003B # ; +0x3C 0x003C # < +0x3D 0x003D # = +0x3E 0x003E # > +0x3F 0x003F # ? +0x40 0x0040 # @ +0x41 0x0041 # A +0x42 0x0042 # B +0x43 0x0043 # C +0x44 0x0044 # D +0x45 0x0045 # E +0x46 0x0046 # F +0x47 0x0047 # G +0x48 0x0048 # H +0x49 0x0049 # I +0x4A 0x004A # J +0x4B 0x004B # K +0x4C 0x004C # L +0x4D 0x004D # M +0x4E 0x004E # N +0x4F 0x004F # O +0x50 0x0050 # P +0x51 0x0051 # Q +0x52 0x0052 # R +0x53 0x0053 # S +0x54 0x0054 # T +0x55 0x0055 # U +0x56 0x0056 # V +0x57 0x0057 # W +0x58 0x0058 # X +0x59 0x0059 # Y +0x5A 0x005A # Z +0x5B 0x005B # [ +0x5C 0x005C # \ +0x5D 0x005D # ] +0x5E 0x005E # ^ +0x5F 0x005F # _ +0x60 0x0060 # ` +0x61 0x0061 # a +0x62 0x0062 # b +0x63 0x0063 # c +0x64 0x0064 # d +0x65 0x0065 # e +0x66 0x0066 # f +0x67 0x0067 # g +0x68 0x0068 # h +0x69 0x0069 # i +0x6A 0x006A # j +0x6B 0x006B # k +0x6C 0x006C # l +0x6D 0x006D # m +0x6E 0x006E # n +0x6F 0x006F # o +0x70 0x0070 # p +0x71 0x0071 # q +0x72 0x0072 # r +0x73 0x0073 # s +0x74 0x0074 # t +0x75 0x0075 # u +0x76 0x0076 # v +0x77 0x0077 # w +0x78 0x0078 # x +0x79 0x0079 # y +0x7A 0x007A # z +0x7B 0x007B # { +0x7C 0x007C # | +0x7D 0x007D # } +0x7E 0x007E # ~ +0x7F 0x007F # DEL +0x80 0x1EA0 # Ạ +0x81 0x1EAE # Ắ +0x82 0x1EB0 # Ằ +0x83 0x1EB6 # Ặ +0x84 0x1EA4 # Ấ +0x85 0x1EA6 # Ầ +0x86 0x1EA8 # Ẩ +0x87 0x1EAC # Ậ +0x88 0x1EBC # Ẽ +0x89 0x1EB8 # Ẹ +0x8A 0x1EBE # Ế +0x8B 0x1EC0 # Ề +0x8C 0x1EC2 # Ể +0x8D 0x1EC4 # Ễ +0x8E 0x1EC6 # Ệ +0x8F 0x1ED0 # Ố +0x90 0x1ED2 # Ồ +0x91 0x1ED4 # Ổ +0x92 0x1ED6 # Ỗ +0x93 0x1ED8 # Ộ +0x94 0x1EE2 # Ợ +0x95 0x1EDA # Ớ +0x96 0x1EDC # Ờ +0x97 0x1EDE # Ở +0x98 0x1ECA # Ị +0x99 0x1ECE # Ỏ +0x9A 0x1ECC # Ọ +0x9B 0x1EC8 # Ỉ +0x9C 0x1EE6 # Ủ +0x9D 0x0168 # Ũ +0x9E 0x1EE4 # Ụ +0x9F 0x1EF2 # Ỳ +0xA0 0x00D5 # Õ +0xA1 0x1EAF # ắ +0xA2 0x1EB1 # ằ +0xA3 0x1EB7 # ặ +0xA4 0x1EA5 # ấ +0xA5 0x1EA7 # ầ +0xA6 0x1EA9 # ẩ +0xA7 0x1EAD # ậ +0xA8 0x1EBD # ẽ +0xA9 0x1EB9 # ẹ +0xAA 0x1EBF # ế +0xAB 0x1EC1 # ề +0xAC 0x1EC3 # ể +0xAD 0x1EC5 # ễ +0xAE 0x1EC7 # ệ +0xAF 0x1ED1 # ố +0xB0 0x1ED3 # ồ +0xB1 0x1ED5 # ổ +0xB2 0x1ED7 # ỗ +0xB3 0x1EE0 # Ỡ +0xB4 0x01A0 # Ơ +0xB5 0x1ED9 # ộ +0xB6 0x1EDD # ờ +0xB7 0x1EDF # ở +0xB8 0x1ECB # ị +0xB9 0x1EF0 # Ự +0xBA 0x1EE8 # Ứ +0xBB 0x1EEA # Ừ +0xBC 0x1EEC # Ử +0xBD 0x01A1 # ơ +0xBE 0x1EDB # ớ +0xBF 0x01AF # Ư +0xC0 0x00C0 # À +0xC1 0x00C1 # Á +0xC2 0x00C2 # Â +0xC3 0x00C3 # Ã +0xC4 0x1EA2 # Ả +0xC5 0x0102 # Ă +0xC6 0x1EB3 # ẳ +0xC7 0x1EB5 # ẵ +0xC8 0x00C8 # È +0xC9 0x00C9 # É +0xCA 0x00CA # Ê +0xCB 0x1EBA # Ẻ +0xCC 0x00CC # Ì +0xCD 0x00CD # Í +0xCE 0x0128 # Ĩ +0xCF 0x1EF3 # ỳ +0xD0 0x0110 # Đ +0xD1 0x1EE9 # ứ +0xD2 0x00D2 # Ò +0xD3 0x00D3 # Ó +0xD4 0x00D4 # Ô +0xD5 0x1EA1 # ạ +0xD6 0x1EF7 # ỷ +0xD7 0x1EEB # ừ +0xD8 0x1EED # ử +0xD9 0x00D9 # Ù +0xDA 0x00DA # Ú +0xDB 0x1EF9 # ỹ +0xDC 0x1EF5 # ỵ +0xDD 0x00DD # Ý +0xDE 0x1EE1 # ỡ +0xDF 0x01B0 # ư +0xE0 0x00E0 # à +0xE1 0x00E1 # á +0xE2 0x00E2 # â +0xE3 0x00E3 # ã +0xE4 0x1EA3 # ả +0xE5 0x0103 # ă +0xE6 0x1EEF # ữ +0xE7 0x1EAB # ẫ +0xE8 0x00E8 # è +0xE9 0x00E9 # é +0xEA 0x00EA # ê +0xEB 0x1EBB # ẻ +0xEC 0x00EC # ì +0xED 0x00ED # í +0xEE 0x0129 # ĩ +0xEF 0x1EC9 # ỉ +0xF0 0x0111 # đ +0xF1 0x1EF1 # ự +0xF2 0x00F2 # ò +0xF3 0x00F3 # ó +0xF4 0x00F4 # ô +0xF5 0x00F5 # õ +0xF6 0x1ECF # ỏ +0xF7 0x1ECD # ọ +0xF8 0x1EE5 # ụ +0xF9 0x00F9 # ù +0xFA 0x00FA # ú +0xFB 0x0169 # ũ +0xFC 0x1EE7 # ủ +0xFD 0x00FD # ý +0xFE 0x1EE3 # ợ +0xFF 0x1EEE # Ữ