changeset: 70349:cb9867dab15e parent: 70346:ca92fa2fe5c9 parent: 70348:7f2ab2f95a04 user: Victor Stinner date: Tue May 24 22:24:11 2011 +0200 files: Misc/NEWS Modules/cjkcodecs/multibytecodec.c description: (Merge 3.2) Issue #12100: Don't reset incremental encoders of CJK codecs at each call to their encode() method anymore, but continue to call the reset() method if the final argument is True. diff -r ca92fa2fe5c9 -r cb9867dab15e Lib/test/test_multibytecodec.py --- a/Lib/test/test_multibytecodec.py Tue May 24 21:32:40 2011 +0200 +++ b/Lib/test/test_multibytecodec.py Tue May 24 22:24:11 2011 +0200 @@ -256,6 +256,36 @@ # Any ISO 2022 codec will cause the segfault myunichr(x).encode('iso_2022_jp', 'ignore') +class TestStateful(unittest.TestCase): + text = '\u4E16\u4E16' + encoding = 'iso-2022-jp' + expected = b'\x1b$B@$@$' + expected_reset = b'\x1b$B@$@$\x1b(B' + + def test_encode(self): + self.assertEqual(self.text.encode(self.encoding), self.expected_reset) + + def test_incrementalencoder(self): + encoder = codecs.getincrementalencoder(self.encoding)() + output = b''.join( + encoder.encode(char) + for char in self.text) + self.assertEqual(output, self.expected) + + def test_incrementalencoder_final(self): + encoder = codecs.getincrementalencoder(self.encoding)() + last_index = len(self.text) - 1 + output = b''.join( + encoder.encode(char, index == last_index) + for index, char in enumerate(self.text)) + self.assertEqual(output, self.expected_reset) + +class TestHZStateful(TestStateful): + text = '\u804a\u804a' + encoding = 'hz' + expected = b'~{ADAD' + expected_reset = b'~{ADAD~}' + def test_main(): support.run_unittest(__name__) diff -r ca92fa2fe5c9 -r cb9867dab15e Misc/NEWS --- a/Misc/NEWS Tue May 24 21:32:40 2011 +0200 +++ b/Misc/NEWS Tue May 24 22:24:11 2011 +0200 @@ -161,6 +161,10 @@ Library ------- +- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to + their encode() method anymore, but continue to call the reset() method if the + final argument is True. + - Issue #12049: Add RAND_bytes() and RAND_pseudo_bytes() functions to the ssl module. diff -r ca92fa2fe5c9 -r cb9867dab15e Modules/cjkcodecs/multibytecodec.c --- a/Modules/cjkcodecs/multibytecodec.c Tue May 24 21:32:40 2011 +0200 +++ b/Modules/cjkcodecs/multibytecodec.c Tue May 24 22:24:11 2011 +0200 @@ -479,7 +479,7 @@ MultibyteEncodeBuffer buf; Py_ssize_t finalsize, r = 0; - if (datalen == 0) + if (datalen == 0 && !(flags & MBENC_RESET)) return PyBytes_FromStringAndSize(NULL, 0); buf.excobj = NULL; @@ -515,7 +515,7 @@ break; } - if (codec->encreset != NULL) + if (codec->encreset != NULL && (flags & MBENC_RESET)) for (;;) { Py_ssize_t outleft; @@ -785,8 +785,8 @@ inbuf_end = inbuf + datalen; r = multibytecodec_encode(ctx->codec, &ctx->state, - (const Py_UNICODE **)&inbuf, - datalen, ctx->errors, final ? MBENC_FLUSH : 0); + (const Py_UNICODE **)&inbuf, datalen, + ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); if (r == NULL) { /* recover the original pending buffer */ if (origpending > 0)