changeset: 71247:16cbd84de848 user: Victor Stinner date: Fri Jul 08 01:45:13 2011 +0200 files: Doc/whatsnew/3.3.rst Lib/test/test_codecencodings_cn.py Lib/test/test_codecencodings_hk.py Lib/test/test_codecencodings_jp.py Lib/test/test_codecencodings_kr.py Lib/test/test_codecencodings_tw.py Lib/test/test_codecmaps_tw.py Misc/NEWS Modules/cjkcodecs/_codecs_cn.c Modules/cjkcodecs/_codecs_hk.c Modules/cjkcodecs/_codecs_jp.c Modules/cjkcodecs/_codecs_kr.c Modules/cjkcodecs/_codecs_tw.c description: Issue #12016: Multibyte CJK decoders now resynchronize faster They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'. diff -r 43fd627cc060 -r 16cbd84de848 Doc/whatsnew/3.3.rst --- a/Doc/whatsnew/3.3.rst Fri Jul 08 01:10:28 2011 +0200 +++ b/Doc/whatsnew/3.3.rst Fri Jul 08 01:45:13 2011 +0200 @@ -68,6 +68,29 @@ * Stub +codecs +------ + +Multibyte CJK decoders now resynchronize faster. They only ignore the first +byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', +'replace') gives '�\n' instead of '�'. + +(http://bugs.python.org/issue12016) + +Don't reset incremental encoders of CJK codecs at each call to their encode() +method anymore. For example: :: + + $ ./python -q + >>> import codecs + >>> encoder = codecs.getincrementalencoder('hz')('strict') + >>> b''.join(encoder.encode(x) for x in '\u52ff\u65bd\u65bc\u4eba\u3002 Bye.') + b'~{NpJ)l6HK!#~} Bye.' + +This example gives b'~{Np~}~{J)~}~{l6~}~{HK~}~{!#~} Bye.' with older Python +versions. + +(http://bugs.python.org/issue12100) + faulthandler ------------ diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecencodings_cn.py --- a/Lib/test/test_codecencodings_cn.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecencodings_cn.py Fri Jul 08 01:45:13 2011 +0200 @@ -15,8 +15,8 @@ # invalid bytes (b"abc\x81\x81\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), (b"\xc1\x64", "strict", None), ) @@ -28,8 +28,8 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"\x83\x34\x83\x31", "strict", None), ("\u30fb", "strict", None), @@ -42,11 +42,14 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), - (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"), + (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), ("\u30fb", "strict", b"\x819\xa79"), + (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), + (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), + (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), ) has_iso10646 = True @@ -74,9 +77,11 @@ '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 'Bye.\n'), # invalid bytes - (b'ab~cd', 'replace', 'ab\uFFFDd'), + (b'ab~cd', 'replace', 'ab\uFFFDcd'), (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), + (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), + (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), ) def test_main(): diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecencodings_hk.py --- a/Lib/test/test_codecencodings_hk.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecencodings_hk.py Fri Jul 08 01:45:13 2011 +0200 @@ -15,8 +15,8 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), ) diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecencodings_jp.py --- a/Lib/test/test_codecencodings_jp.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecencodings_jp.py Fri Jul 08 01:45:13 2011 +0200 @@ -15,50 +15,57 @@ # invalid bytes (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), (b"abc\xf8", "strict", None), - (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), - (b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"), + (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), + (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), + (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), + (b"ab\xEBxy", "replace", "ab\uFFFDxy"), + (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), + (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), # sjis vs cp932 (b"\\\x7e", "replace", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), ) +euc_commontests = ( + # invalid bytes + (b"abc\x80\x80\xc1\xc4", "strict", None), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"), + (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), + (b"abc\xc8", "strict", None), + (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"), + (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"), + (b"\xc1\x64", "strict", None), + (b"\xa1\xc0", "strict", "\uff3c"), + (b"\xa1\xc0\\", "strict", "\uff3c\\"), + (b"\x8eXY", "replace", "\ufffdXY"), +) + +class Test_EUC_JIS_2004(test_multibytecodec_support.TestBase, + unittest.TestCase): + encoding = 'euc_jis_2004' + tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') + codectests = euc_commontests + xmlcharnametest = ( + "\xab\u211c\xbb = \u2329\u1234\u232a", + b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" + ) + class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jisx0213' tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') - codectests = ( - # invalid bytes - (b"abc\x80\x80\xc1\xc4", "strict", None), - (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), - (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), - (b"abc\x8f\x83\x83", "replace", "abc\ufffd"), - (b"\xc1\x64", "strict", None), - (b"\xa1\xc0", "strict", "\uff3c"), - ) + codectests = euc_commontests xmlcharnametest = ( "\xab\u211c\xbb = \u2329\u1234\u232a", b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" ) -eucjp_commontests = ( - (b"abc\x80\x80\xc1\xc4", "strict", None), - (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), - (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), - (b"abc\x8f\x83\x83", "replace", "abc\ufffd"), - (b"\xc1\x64", "strict", None), -) - class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jp' tstring = test_multibytecodec_support.load_teststring('euc_jp') - codectests = eucjp_commontests + ( - (b"\xa1\xc0\\", "strict", "\uff3c\\"), + codectests = euc_commontests + ( ("\xa5", "strict", b"\x5c"), ("\u203e", "strict", b"\x7e"), ) @@ -66,8 +73,6 @@ shiftjis_commonenctests = ( (b"abc\x80\x80\x82\x84", "strict", None), (b"abc\xf8", "strict", None), - (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), ) @@ -75,20 +80,41 @@ encoding = 'shift_jis' tstring = test_multibytecodec_support.load_teststring('shift_jis') codectests = shiftjis_commonenctests + ( + (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), + (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), + (b"\\\x7e", "strict", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), + (b"abc\x81\x39", "replace", "abc\ufffd9"), + (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), + (b"abc\xFF\x58", "replace", "abc\ufffdX"), + ) + +class Test_SJIS_2004(test_multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'shift_jis_2004' + tstring = test_multibytecodec_support.load_teststring('shift_jis') + codectests = shiftjis_commonenctests + ( + (b"\\\x7e", "strict", "\xa5\u203e"), + (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), + (b"abc\xEA\xFC", "strict", "abc\u64bf"), + (b"\x81\x39xy", "replace", "\ufffd9xy"), + (b"\xFF\x58xy", "replace", "\ufffdXxy"), + (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), + (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), + (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), + ) + xmlcharnametest = ( + "\xab\u211c\xbb = \u2329\u1234\u232a", + b"\x85Gℜ\x85Q = ⟨ሴ⟩" ) class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jisx0213' tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') - codectests = ( - # invalid bytes - (b"abc\x80\x80\x82\x84", "strict", None), - (b"abc\xf8", "strict", None), - (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), - (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), + codectests = shiftjis_commonenctests + ( + (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), + (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), + # sjis vs cp932 (b"\\\x7e", "replace", "\xa5\u203e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecencodings_kr.py --- a/Lib/test/test_codecencodings_kr.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecencodings_kr.py Fri Jul 08 01:45:13 2011 +0200 @@ -15,8 +15,8 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), ) @@ -27,8 +27,8 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), # composed make-up sequence errors @@ -40,13 +40,14 @@ (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"), - (b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"), + (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'), (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None), - (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"), - (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"), - (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"), + (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'), + (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'), + (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'), + (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'), (b"\xc1\xc4", "strict", "\uc894"), ) @@ -57,9 +58,13 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"), + (b"\xD8abc", "replace", "\uFFFDabc"), + (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"), + (b"\x84bxy", "replace", "\uFFFDbxy"), + (b"\x8CBxy", "replace", "\uFFFDBxy"), ) def test_main(): diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecencodings_tw.py --- a/Lib/test/test_codecencodings_tw.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecencodings_tw.py Fri Jul 08 01:45:13 2011 +0200 @@ -15,8 +15,8 @@ # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), ) diff -r 43fd627cc060 -r 16cbd84de848 Lib/test/test_codecmaps_tw.py --- a/Lib/test/test_codecmaps_tw.py Fri Jul 08 01:10:28 2011 +0200 +++ b/Lib/test/test_codecmaps_tw.py Fri Jul 08 01:45:13 2011 +0200 @@ -23,6 +23,9 @@ (b'\xa2\xcc', '\u5341'), (b'\xa2\xce', '\u5345'), ] + codectests = ( + (b"\xFFxy", "replace", "\ufffdxy"), + ) def test_main(): support.run_unittest(__name__) diff -r 43fd627cc060 -r 16cbd84de848 Misc/NEWS --- a/Misc/NEWS Fri Jul 08 01:10:28 2011 +0200 +++ b/Misc/NEWS Fri Jul 08 01:45:13 2011 +0200 @@ -219,6 +219,10 @@ Library ------- +- Issue #12016: Multibyte CJK decoders now resynchronize faster. They only + ignore the first byte of an invalid byte sequence. For example, + b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'. + - Issue #12459: time.sleep() now raises a ValueError if the sleep length is negative, instead of an infinite sleep on Windows or raising an IOError on Linux for example, to have the same behaviour on all platforms. diff -r 43fd627cc060 -r 16cbd84de848 Modules/cjkcodecs/_codecs_cn.c --- a/Modules/cjkcodecs/_codecs_cn.c Fri Jul 08 01:10:28 2011 +0200 +++ b/Modules/cjkcodecs/_codecs_cn.c Fri Jul 08 01:45:13 2011 +0200 @@ -85,7 +85,7 @@ TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -141,7 +141,7 @@ REQUIRE_INBUF(2) GBK_DECODE(c, IN2, **outbuf) - else return 2; + else return 1; NEXT(2, 1) } @@ -267,7 +267,7 @@ c3 = IN3; c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; + return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; @@ -292,12 +292,12 @@ continue; } } - return 4; + return 1; } GBK_DECODE(c, c2, **outbuf) else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; + else return 1; NEXT(2, 1) } @@ -400,7 +400,7 @@ else if (c2 == '\n') ; /* line-continuation */ else - return 2; + return 1; NEXT(2, 0); continue; } @@ -419,7 +419,7 @@ NEXT(2, 1) } else - return 2; + return 1; } } diff -r 43fd627cc060 -r 16cbd84de848 Modules/cjkcodecs/_codecs_hk.c --- a/Modules/cjkcodecs/_codecs_hk.c Fri Jul 08 01:10:28 2011 +0200 +++ b/Modules/cjkcodecs/_codecs_hk.c Fri Jul 08 01:45:13 2011 +0200 @@ -161,7 +161,7 @@ case 0x8864: WRITE2(0x00ca, 0x030c); break; case 0x88a3: WRITE2(0x00ea, 0x0304); break; case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; + default: return 1; } NEXT(2, 2) /* all decoded codepoints are pairs, above. */ diff -r 43fd627cc060 -r 16cbd84de848 Modules/cjkcodecs/_codecs_jp.c --- a/Modules/cjkcodecs/_codecs_jp.c Fri Jul 08 01:10:28 2011 +0200 +++ b/Modules/cjkcodecs/_codecs_jp.c Fri Jul 08 01:45:13 2011 +0200 @@ -112,7 +112,7 @@ TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -120,7 +120,7 @@ c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; + else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || @@ -128,10 +128,10 @@ OUT1(0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else - return 2; + return 1; } else - return 2; + return 1; NEXT(2, 1) } @@ -256,7 +256,7 @@ NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -274,7 +274,7 @@ continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; + else return 1; NEXT(3, 1) } else { @@ -300,7 +300,7 @@ NEXT(2, 2) continue; } - else return 2; + else return 1; NEXT(2, 1) } } @@ -388,7 +388,7 @@ NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -401,7 +401,7 @@ NEXT(3, 1) } else - return 3; + return 1; } else { unsigned char c2; @@ -417,7 +417,7 @@ #endif TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ; - else return 2; + else return 1; NEXT(2, 1) } } @@ -502,7 +502,7 @@ REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -522,10 +522,10 @@ continue; } else - return 2; + return 1; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } @@ -645,7 +645,7 @@ REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -671,7 +671,7 @@ NEXT_OUT(2) } else - return 2; + return 1; NEXT_IN(2) } else { /* Plane 2 */ @@ -689,13 +689,13 @@ continue; } else - return 2; + return 1; NEXT(2, 1) } continue; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } diff -r 43fd627cc060 -r 16cbd84de848 Modules/cjkcodecs/_codecs_kr.c --- a/Modules/cjkcodecs/_codecs_kr.c Fri Jul 08 01:10:28 2011 +0200 +++ b/Modules/cjkcodecs/_codecs_kr.c Fri Jul 08 01:45:13 2011 +0200 @@ -123,7 +123,7 @@ if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; + return 1; c = (*inbuf)[3]; if (0xa1 <= c && c <= 0xbe) @@ -143,7 +143,7 @@ jong = NONE; if (cho == NONE || jung == NONE || jong == NONE) - return 8; + return 1; OUT1(0xac00 + cho*588 + jung*28 + jong); NEXT(8, 1) @@ -152,7 +152,7 @@ NEXT(2, 1) } else - return 2; + return 1; } return 0; @@ -208,7 +208,7 @@ REQUIRE_INBUF(2) TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } @@ -375,7 +375,7 @@ i_jong = johabidx_jongseong[c_jong]; if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; + return 1; /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { @@ -391,7 +391,7 @@ OUT1(0x3100 | johabjamo_jungseong[c_jung]) else - return 2; + return 1; } } else { if (i_jung == FILL) { @@ -399,7 +399,7 @@ OUT1(0x3100 | johabjamo_choseong[c_cho]) else - return 2; + return 1; } else OUT1(0xac00 + @@ -414,7 +414,7 @@ c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || (c2 & 0x7f) == 0x7f || (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; + return 1; else { unsigned char t1, t2; @@ -425,7 +425,7 @@ t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; + else return 1; NEXT(2, 1) } } diff -r 43fd627cc060 -r 16cbd84de848 Modules/cjkcodecs/_codecs_tw.c --- a/Modules/cjkcodecs/_codecs_tw.c Fri Jul 08 01:10:28 2011 +0200 +++ b/Modules/cjkcodecs/_codecs_tw.c Fri Jul 08 01:45:13 2011 +0200 @@ -55,7 +55,7 @@ TRYMAP_DEC(big5, **outbuf, c, IN2) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -109,7 +109,7 @@ TRYMAP_DEC(cp950ext, **outbuf, c, IN2); else TRYMAP_DEC(big5, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) }