@@ -782,9 +782,27 @@ static int write_to_fd(int fd, const char *content, ssize_t len)
782782
783783 return 0 ;
784784}
785+ static pthread_mutex_t mutex_thread_access = PTHREAD_MUTEX_INITIALIZER ;
786+ static bool try_copy_file_range = true;
787+ static bool is_copy_file_range_available (void )
788+ {
789+ bool ret = true;
790+ pthread_mutex_lock (& mutex_thread_access );
791+ ret = try_copy_file_range ;
792+ pthread_mutex_unlock (& mutex_thread_access );
793+
794+ return ret ;
795+ }
796+
797+ static void disable_copy_file_range (void )
798+ {
799+ pthread_mutex_lock (& mutex_thread_access );
800+ try_copy_file_range = false;
801+ pthread_mutex_unlock (& mutex_thread_access );
802+ }
785803
786804#define BUFSIZE 8192
787- static int copy_file_data (int sfd , int dfd )
805+ static int copy_file_data_classic (int sfd , int dfd )
788806{
789807 char buffer [BUFSIZE ];
790808 ssize_t bytes_read ;
@@ -807,6 +825,97 @@ static int copy_file_data(int sfd, int dfd)
807825 return 0 ;
808826}
809827
828+ static int copy_file_data_range (int sfd , int dfd )
829+ {
830+ struct stat stat ;
831+
832+ if (fstat (sfd , & stat ) == -1 )
833+ return -1 ;
834+
835+ off_t len , ret ;
836+ len = stat .st_size ;
837+
838+ if (len == 0 )
839+ return 0 ;
840+
841+ do {
842+ ret = copy_file_range (sfd , NULL , dfd , NULL , len , 0 );
843+ if (ret < 0 && errno == EINTR )
844+ continue ;
845+ if (ret == -1 )
846+ return -1 ;
847+ // This is an implementation problem in copy_file_range. Handle it and return error so that classic copy can be retried
848+ if (ret == 0 && len > 0 ) {
849+ // Setting this error code to trigger a classic copy
850+ // https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
851+ // fallback to work around several kernel bugs where copy_file_range will fail to
852+ // copy any bytes and return 0 instead of an error if
853+ // - reading virtual files from the proc filesystem which appear to have 0 size
854+ // but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
855+ // - copying from an overlay filesystem in docker. reported to occur on fedora 32.
856+ errno = EINVAL ; // EINVAL Either fd_in or fd_out is not a regular file.
857+ return -1 ;
858+ }
859+ if (ret == 0 )
860+ break ;
861+
862+ len -= ret ;
863+ } while (len > 0 && ret > 0 );
864+
865+ return 0 ;
866+ }
867+
868+ static int copy_file_data (int sfd , int dfd )
869+ {
870+ bool use_copy_classic = !is_copy_file_range_available ();
871+ // https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
872+ // https://gitlab.gnome.org/GNOME/libglnx/-/blob/202b294e6079e23242e65e0426f8639841d1210b/glnx-fdio.c#L846
873+ // https://github.com/systemd/systemd/blob/e71b40fd0026c0884ca26eb4f0a9fbe4d9285cfa/src/shared/copy.c#L338
874+ // https://lwn.net/Articles/846403/
875+ int ret = -1 ;
876+ if (!use_copy_classic ) {
877+ ret = copy_file_data_range (sfd , dfd );
878+ // Write was successful
879+ if (0 == ret )
880+ return 0 ;
881+
882+ // https://github.com/rust-lang/rust/blob/0e5f5207881066973486e6a480fa46cfa22947e9/library/std/src/sys/pal/unix/kernel_copy.rs#L622
883+ // Try fallback io::copy if either:
884+ // - Kernel version is < 4.5 (ENOSYS¹)
885+ // - Files are mounted on different fs (EXDEV)
886+ // - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
887+ // - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM)
888+ // - copy_file_range cannot be used with pipes or device nodes (EINVAL)
889+ // - the writer fd was opened with O_APPEND (EBADF²)
890+ // and no bytes were written successfully yet. (All these errnos should
891+ // not be returned if something was already written, but they happen in
892+ // the wild, see #91152.)
893+ //
894+ // ¹ these cases should be detected by the initial probe but we handle them here
895+ // anyway in case syscall interception changes during runtime
896+ // ² actually invalid file descriptors would cause this too, but in that case
897+ // the fallback code path is expected to encounter the same error again
898+
899+ // Disable copy file range for the entire run because,
900+ // the rest of the files as part of this run will also have the similar file system.
901+ if (ret < 0 && (errno == ENOSYS || errno == EXDEV )) {
902+ disable_copy_file_range ();
903+ use_copy_classic = true;
904+ }
905+
906+ // Try classic for this file but copy_file_range could work for the next file.
907+ if (ret < 0 && (errno == EOPNOTSUPP || errno == EPERM ||
908+ errno == EINVAL || errno == EBADF )) {
909+ use_copy_classic = true;
910+ }
911+ }
912+
913+ if (use_copy_classic ) {
914+ ret = copy_file_data_classic (sfd , dfd );
915+ }
916+ return ret ;
917+ }
918+
810919static int copy_file_with_dirs_if_needed (const char * src , const char * dst_base ,
811920 const char * dst , bool try_enable_fsverity )
812921{
@@ -1020,7 +1129,7 @@ static int construct_compute_data(struct lcfs_node_s *node,
10201129}
10211130
10221131struct work_item_iterator {
1023- pthread_mutex_t mutex_node_iterator ;
1132+ pthread_mutex_t * mutex_node_iterator ;
10241133 int current_item ;
10251134 int errorcode ;
10261135 bool cancel_request ;
@@ -1035,26 +1144,26 @@ static struct work_item *get_next_work_item(struct work_collection *collection,
10351144 bool cancel = false;
10361145 struct work_item * ret = NULL ;
10371146
1038- pthread_mutex_lock (& ( iterator -> mutex_node_iterator ) );
1147+ pthread_mutex_lock (iterator -> mutex_node_iterator );
10391148 if (iterator -> cancel_request )
10401149 cancel = true;
10411150 else if (iterator -> current_item < collection -> count ) {
10421151 ret = & (collection -> items [iterator -> current_item ]);
10431152 iterator -> current_item ++ ;
10441153 }
1045- pthread_mutex_unlock (& ( iterator -> mutex_node_iterator ) );
1154+ pthread_mutex_unlock (iterator -> mutex_node_iterator );
10461155 return cancel ? NULL : ret ;
10471156}
10481157
10491158static void request_cancel (struct work_item_iterator * iterator , int errorcode )
10501159{
1051- pthread_mutex_lock (& ( iterator -> mutex_node_iterator ) );
1160+ pthread_mutex_lock (iterator -> mutex_node_iterator );
10521161 // Record only the first cancels error code
10531162 if (!iterator -> cancel_request ) {
10541163 iterator -> cancel_request = true;
10551164 iterator -> errorcode = errorcode ;
10561165 }
1057- pthread_mutex_unlock (& ( iterator -> mutex_node_iterator ) );
1166+ pthread_mutex_unlock (iterator -> mutex_node_iterator );
10581167}
10591168
10601169typedef int (* THREAD_PROCESS_PROC )(struct work_item * , void * );
@@ -1109,12 +1218,7 @@ static int execute_in_threads(const int requested_threads,
11091218 THREAD_PROCESS_PROC proc , void * data )
11101219{
11111220 struct work_item_iterator iterator ;
1112- int ret = pthread_mutex_init (& iterator .mutex_node_iterator , NULL );
1113- if (0 != ret ) {
1114- errno = ret ;
1115- return -1 ;
1116- }
1117-
1221+ iterator .mutex_node_iterator = & mutex_thread_access ;
11181222 iterator .current_item = 0 ;
11191223 iterator .errorcode = 0 ;
11201224 iterator .cancel_request = false;
@@ -1125,6 +1229,7 @@ static int execute_in_threads(const int requested_threads,
11251229 thread_info .collection = collection ;
11261230 thread_info .iterator = & iterator ;
11271231
1232+ int ret = -1 ;
11281233 cleanup_free pthread_t * threads = NULL ;
11291234 const int thread_count = requested_threads - 1 ;
11301235 if (thread_count >= 1 ) {
@@ -1201,7 +1306,6 @@ static int fill_store(const int thread_count, struct lcfs_node_s *node,
12011306 int ret = execute_in_threads (thread_count , & collection , process_copy ,
12021307 (void * )digest_store_path );
12031308 cleanup_work_items (& collection );
1204-
12051309 return ret ;
12061310}
12071311
@@ -1239,7 +1343,7 @@ static void usage(const char *argv0)
12391343 " --from-file The source is a dump file, not a directory\n"
12401344 " --min-version=N Use this minimal format version (default=%d)\n"
12411345 " --max-version=N Use this maxium format version (default=%d)\n"
1242- " --threads=N Use this to calculate digest and copy files in threads (default=%d)\n" ,
1346+ " --threads=N Use this to override the default number of threads used to calculate digest and copy files (default=%d)\n" ,
12431347 bin , LCFS_DEFAULT_VERSION_MIN , LCFS_DEFAULT_VERSION_MAX ,
12441348 get_cpu_count ());
12451349}
0 commit comments