From f34d8289b1e88c000308ea559abb7a6dd56af487 Mon Sep 17 00:00:00 2001
From: auouymous <au@qzx.com>
Date: Mon, 29 Jul 2024 17:35:37 -0600
Subject: [PATCH] Limit the length of UTF-8 filenames.

sanitize_filename() only counts the number of characters and not the
multi-byte UTF-8 sequences. This patch converts the string to an array
of UTF-8 bytes and then removes each multi-byte sequence until the
length is below the limit.
---
 src/gpodder/util.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gpodder/util.py b/src/gpodder/util.py
index 9747209bc..0690af65b 100644
--- a/src/gpodder/util.py
+++ b/src/gpodder/util.py
@@ -1653,9 +1653,19 @@ def sanitize_filename(filename, max_length):
     >>> sanitize_filename('Cool feed (ogg)', 1)
     'C'
     """
-    if max_length > 0 and len(filename) > max_length:
+    if max_length > 0 and len(filename.encode('utf-8')) > max_length:
         logger.info('Limiting file/folder name "%s" to %d characters.', filename, max_length)
-        filename = filename[:max_length]
+        filename = filename.encode('utf-8')
+        length = len(filename)
+        while length > max_length:
+            # strip continuation bytes
+            while (filename[-1] & 0xC0) == 0x80:
+                filename = filename[:-1]
+                length -= 1
+            # strip leader byte
+            filename = filename[:-1]
+            length -= 1
+        filename = filename.decode('utf-8')
 
     # see #361 - at least slash must be removed
     filename = re.sub(r"[\"*/:<>?\\|]", "_", filename)