From 6ec10d8501848a2f01bb66f775f77e1de8b37b9d Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Fri, 22 Jan 2021 12:02:02 +0100 Subject: [PATCH 1/3] Fix #70091: Phar does not mark UTF-8 filenames in ZIP archives The default encoding of filenames in a ZIP archive is IBM Code Page 437. Phar, however, only supports UTF-8 filenames. Therefore we have to mark non ASCII filenames as being stored in UTF-8 by setting the general purpose bit 11 (the language encoding flag). The effect of not setting this bit for non ASCII filenames can be seen in popular tools like 7-Zip and UnZip, but not when extracting the archives via ext/phar (which is agnostic to the filename encoding), or via ext/zip (which guesses the encoding). Thus we add a somewhat brittle low-level test case. --- ext/phar/tests/bug70091.phpt | 60 ++++++++++++++++++++++++++++++++++++ ext/phar/zip.c | 16 ++++++++++ 2 files changed, 76 insertions(+) create mode 100644 ext/phar/tests/bug70091.phpt diff --git a/ext/phar/tests/bug70091.phpt b/ext/phar/tests/bug70091.phpt new file mode 100644 index 0000000000000..94c2874d070f5 --- /dev/null +++ b/ext/phar/tests/bug70091.phpt @@ -0,0 +1,60 @@ +--TEST-- +Bug #70091 (Phar does not mark UTF-8 filenames in ZIP archives) +--SKIPIF-- + +--FILE-- +addFromString('föö', ''); +$phar->addFromString('foo', ''); +unset($phar); + +$stream = fopen(__DIR__ . '/bug70091.zip', 'r'); + +$data = fread($stream, 8); +var_dump(unpack('H8sig/@6/nflags', $data)); + +fseek($stream, 53); +$data = fread($stream, 8); +var_dump(unpack('H8sig/@6/nflags', $data)); + +fseek($stream, 104); +$data = fread($stream, 10); +var_dump(unpack('H8sig/@8/nflags', $data)); + +fseek($stream, 173); +$data = fread($stream, 10); +var_dump(unpack('H8sig/@8/nflags', $data)); +?> +--EXPECT-- +array(2) { + ["sig"]=> + string(8) "504b0304" + ["flags"]=> + int(8) +} +array(2) { + ["sig"]=> + string(8) "504b0304" + ["flags"]=> + int(0) +} +array(2) { + ["sig"]=> + string(8) "504b0102" + ["flags"]=> + int(8) +} +array(2) { + ["sig"]=> + string(8) "504b0102" + ["flags"]=> + int(0) +} +--CLEAN-- + diff --git a/ext/phar/zip.c b/ext/phar/zip.c index c52e87647d11a..78bdeedee08b5 100644 --- a/ext/phar/zip.c +++ b/ext/phar/zip.c @@ -810,6 +810,14 @@ int phar_open_or_create_zip(char *fname, size_t fname_len, char *alias, size_t a } /* }}} */ +static zend_bool is_ascii(const unsigned char *str, uint32_t len) +{ + while (--len) { + if (*str++ >= 0x80) return 0; + } + return 1; +} + struct _phar_zip_pass { php_stream *filefp; php_stream *centralfp; @@ -829,6 +837,7 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{ zend_off_t offset; int not_really_modified = 0; p = (struct _phar_zip_pass*) arg; + uint16_t general_purpose_flags; if (entry->is_mounted) { return ZEND_HASH_APPLY_KEEP; @@ -878,6 +887,13 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{ memcpy(central.datestamp, local.datestamp, sizeof(local.datestamp)); PHAR_SET_16(central.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); PHAR_SET_16(local.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); + if (!is_ascii(entry->filename, entry->filename_len)) { + // set language encoding flag + general_purpose_flags = PHAR_GET_16(central.flags); + PHAR_SET_16(central.flags, general_purpose_flags | (1 << 11)); + general_purpose_flags = PHAR_GET_16(local.flags); + PHAR_SET_16(local.flags, general_purpose_flags | (1 << 11)); + } PHAR_SET_32(central.offset, php_stream_tell(p->filefp)); /* do extra field for perms later */ From 0809a9c37d9ee687b1dcb48b892aa1be200952bc Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Tue, 26 Jan 2021 16:07:15 +0100 Subject: [PATCH 2/3] Unconditionally set UTF-8 flag --- ext/phar/zip.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/ext/phar/zip.c b/ext/phar/zip.c index 78bdeedee08b5..b6e50d572a191 100644 --- a/ext/phar/zip.c +++ b/ext/phar/zip.c @@ -810,14 +810,6 @@ int phar_open_or_create_zip(char *fname, size_t fname_len, char *alias, size_t a } /* }}} */ -static zend_bool is_ascii(const unsigned char *str, uint32_t len) -{ - while (--len) { - if (*str++ >= 0x80) return 0; - } - return 1; -} - struct _phar_zip_pass { php_stream *filefp; php_stream *centralfp; @@ -887,13 +879,11 @@ static int phar_zip_changed_apply_int(phar_entry_info *entry, void *arg) /* {{{ memcpy(central.datestamp, local.datestamp, sizeof(local.datestamp)); PHAR_SET_16(central.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); PHAR_SET_16(local.filename_len, entry->filename_len + (entry->is_dir ? 1 : 0)); - if (!is_ascii(entry->filename, entry->filename_len)) { - // set language encoding flag - general_purpose_flags = PHAR_GET_16(central.flags); - PHAR_SET_16(central.flags, general_purpose_flags | (1 << 11)); - general_purpose_flags = PHAR_GET_16(local.flags); - PHAR_SET_16(local.flags, general_purpose_flags | (1 << 11)); - } + // set language encoding flag (all filenames have to be UTF-8 anyway) + general_purpose_flags = PHAR_GET_16(central.flags); + PHAR_SET_16(central.flags, general_purpose_flags | (1 << 11)); + general_purpose_flags = PHAR_GET_16(local.flags); + PHAR_SET_16(local.flags, general_purpose_flags | (1 << 11)); PHAR_SET_32(central.offset, php_stream_tell(p->filefp)); /* do extra field for perms later */ From 05bfab463b54f32efa81cb97545893480432a3e6 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Tue, 26 Jan 2021 16:50:04 +0100 Subject: [PATCH 3/3] Fix test case --- ext/phar/tests/bug70091.phpt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/phar/tests/bug70091.phpt b/ext/phar/tests/bug70091.phpt index 94c2874d070f5..893b2eec4e045 100644 --- a/ext/phar/tests/bug70091.phpt +++ b/ext/phar/tests/bug70091.phpt @@ -40,7 +40,7 @@ array(2) { ["sig"]=> string(8) "504b0304" ["flags"]=> - int(0) + int(8) } array(2) { ["sig"]=> @@ -52,7 +52,7 @@ array(2) { ["sig"]=> string(8) "504b0102" ["flags"]=> - int(0) + int(8) } --CLEAN--