diff -r 48228fb874c1 Python/fileutils.c --- a/Python/fileutils.c Mon Nov 05 09:34:46 2012 +0200 +++ b/Python/fileutils.c Mon Nov 05 22:59:50 2012 +0100 @@ -7,6 +7,10 @@ #include #endif +#ifdef __APPLE__ +extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size); +#endif + PyObject * _Py_device_encoding(int fd) { @@ -244,6 +248,71 @@ char* return result; } +/* Encode a filename to the filesystem encoding with the + surrogateescape error handler: see _Py_wchar2char(). + + Return a pointer to a newly allocated byte string (use PyMem_Free() to free + the memory), or NULL on encoding or memory allocation error. */ +static char* +_Py_encode_filename(const wchar_t *path) +{ +#ifdef __APPLE__ + Py_ssize_t len; + PyObject *unicode, *bytes = NULL; + char *cpath; + + unicode = PyUnicode_FromWideChar(path, wcslen(path)); + if (unicode == NULL) { + Py_DECREF(unicode); + return NULL; + } + + bytes = PyUnicode_EncodeFSDefault(unicode); + Py_DECREF(unicode); + if (bytes == NULL) { + PyErr_Clear(); + return NULL; + } + + len = PyBytes_GET_SIZE(bytes); + cpath = PyMem_Malloc(len+1); + if (cpath == NULL) { + Py_DECREF(bytes); + return NULL; + } + memcpy(cpath, PyBytes_AsString(bytes), len + 1); + Py_DECREF(bytes); + return cpath; +#else + return _Py_wchar2char(path, NULL); +#endif +} + +/* Decode a filename from the filesystem encoding with the surrogateescape + error handler: see _Py_char2wchar(). + + Return a pointer to a newly allocated wide character string (use + PyMem_Free() to free the memory) and write the number of written wide + characters excluding the null character into *size if size is not NULL, or + NULL on error (decoding or memory allocation error). If size is not NULL, + *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding + error. */ +static wchar_t* +_Py_decode_filename(const char *path, size_t *len) +{ + +#ifdef __APPLE__ + wchar_t *wpath; + wpath = _Py_DecodeUTF8_surrogateescape(path, strlen(path)); + if (wpath == NULL) + return NULL; + *len = wcslen(wpath); + return wpath; +#else + return _Py_char2wchar(path, len); +#endif +} + /* In principle, this should use HAVE__WSTAT, and _wstat should be detected by autoconf. However, no current POSIX system provides that function, so testing for @@ -260,7 +329,7 @@ int { int err; char *fname; - fname = _Py_wchar2char(path, NULL); + fname = _Py_encode_filename(path); if (fname == NULL) { errno = EINVAL; return -1; @@ -319,7 +388,7 @@ FILE * errno = EINVAL; return NULL; } - cpath = _Py_wchar2char(path, NULL); + cpath = _Py_encode_filename(path); if (cpath == NULL) return NULL; f = fopen(cpath, cmode); @@ -384,7 +453,7 @@ int int res; size_t r1; - cpath = _Py_wchar2char(path, NULL); + cpath = _Py_encode_filename(path); if (cpath == NULL) { errno = EINVAL; return -1; @@ -398,7 +467,7 @@ int return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ - wbuf = _Py_char2wchar(cbuf, &r1); + wbuf = _Py_decode_filename(cbuf, &r1); if (wbuf == NULL) { errno = EINVAL; return -1; @@ -429,7 +498,7 @@ wchar_t* wchar_t *wresolved_path; char *res; size_t r; - cpath = _Py_wchar2char(path, NULL); + cpath = _Py_encode_filename(path); if (cpath == NULL) { errno = EINVAL; return NULL; @@ -439,7 +508,7 @@ wchar_t* if (res == NULL) return NULL; - wresolved_path = _Py_char2wchar(cresolved_path, &r); + wresolved_path = _Py_decode_filename(cresolved_path, &r); if (wresolved_path == NULL) { errno = EINVAL; return NULL; @@ -471,7 +540,7 @@ wchar_t* if (getcwd(fname, PATH_MAX) == NULL) return NULL; - wname = _Py_char2wchar(fname, &len); + wname = _Py_decode_filename(fname, &len); if (wname == NULL) return NULL; if (size <= len) {