Tasks #68431: Strided multi-fuse client write stalls - CephFS - Ceph

Actions

Copy link

Tasks #68431

closed

Tasks #63293: Implement fscrypt in libcephfs and cephfs-fuse

Tasks #64160: RMW race detection

Strided multi-fuse client write stalls

Added by Christopher Hoffman over 1 year ago. Updated over 1 year ago.

Status:

Resolved

Priority:

Normal

Assignee:

Christopher Hoffman

Category:

Target version:

% Done:

Reviewed:

Affected Versions:

Component(FS):

Labels (FS):

Pull request ID:

Tags (freeform):

Merge Commit:

Fixed In:

Released In:

Upkeep Timestamp:

Description

When there's two fuse clients doing strided writes inside a single fscrypt block, file access stalls

=============== 0 to - 256 size: 256 ==============
2024-10-07 17:10:49,660.660 INFO:__main__:> set -ex
dd of=/tmp/tmpzxzr7ds3/mnt.0/dir//file.log
=============== 256 to - 512 size: 256 ==============
2024-10-07 17:10:49,675.675 INFO:__main__:> set -ex
dd of=/tmp/tmpzxzr7ds3/mnt.1/dir//file.log bs=1 seek=256
=============== 512 to - 768 size: 256 ==============
2024-10-07 17:10:50,300.300 INFO:__main__:> set -ex
dd of=/tmp/tmpzxzr7ds3/mnt.0/dir//file.log bs=1 seek=512
=============== 768 to - 1024 size: 256 ==============
2024-10-07 17:10:50,888.888 INFO:__main__:> set -ex
dd of=/tmp/tmpzxzr7ds3/mnt.1/dir//file.log bs=1 seek=768

Related issues 1 (0 open — 1 closed)

Actions

Copy link

Updated by Christopher Hoffman over 1 year ago

Status changed from In Progress to Resolved
Parent task changed from #66577 to #63293

In write path, wrong size was used. Effective vs real size.

commit 614ca0eb9fb29ae73879634b55ebd2dab0927fdf (HEAD -> wip-fscrypt)
Author: Christopher Hoffman <choffman@redhat.com>
Date:   Tue Oct 22 17:34:27 2024 +0000

    client: Various fixes to fix multi-fuse client

    Provide various fixes in which size used in
    multi-fuse client tests.

    Fixes: https://tracker.ceph.com/issues/68431
    Signed-off-by: Christopher Hoffman <choffman@redhat.com>

diff --git a/src/client/Client.cc b/src/client/Client.cc
index d426e4365a3..550d11e2631 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -904,17 +904,17 @@ void Client::trim_dentry(Dentry *dn)
 void Client::update_inode_file_size(Inode *in, int issued, uint64_t size,
                                    uint64_t truncate_seq, uint64_t truncate_size)
 {
-  uint64_t prior_size = in->size;
+  uint64_t prior_size = in->effective_size();

   // In the case of a pending trunc size that is smaller than orig size
   // (i.e. truncating from 8M to 4M) passed truncate_seq will be larger
   // than inode truncate_seq. This shows passed size is latest.
   if (truncate_seq > in->truncate_seq ||
       (truncate_seq == in->truncate_seq && size > in->effective_size())) {
-    ldout(cct, 10) << "size " << in->size << " -> " << size << dendl;
+    ldout(cct, 10) << "size " << in->effective_size() << " -> " << size << dendl;
     if (in->is_fscrypt_enabled()) {
       in->set_effective_size(size);
-      size = fscrypt_block_from_ofs(size);
+      size = fscrypt_next_block_start(size);
     }
     in->size = size;
     in->reported_size = size;
@@ -8273,8 +8273,8 @@ int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,
     return -CEPHFS_EROFS;
   }
   if ((mask & CEPH_SETATTR_SIZE) &&
-      (uint64_t)stx->stx_size > in->size &&
-      is_quota_bytes_exceeded(in, (uint64_t)stx->stx_size - in->size,
+      (uint64_t)stx->stx_size > in->effective_size() &&
+      is_quota_bytes_exceeded(in, (uint64_t)stx->stx_size - in->effective_size(),
                              perms)) {
     return -CEPHFS_EDQUOT;
   }
@@ -8426,7 +8426,6 @@ int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,

     if (in->fscrypt_ctx &&
         (!(mask & CEPH_SETATTR_FSCRYPT_FILE))) {
-      stx_size = fscrypt_next_block_start(stx_size);
       ldout(cct,10) << "fscrypt: set file size: orig stx_size=" << stx->stx_size <<" new stx_size=" << stx_size << dendl;

       alt_aux.resize(sizeof(stx->stx_size));
@@ -8447,7 +8446,9 @@ int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,
         !(mask & CEPH_SETATTR_KILL_SGUID) &&
         stx_size >= in->size) {
       if (stx_size > in->size) {
-        in->size = in->reported_size = stx_size;
+        in->reported_size = stx_size;
+        in->set_effective_size(stx_size);
+        in->size = fscrypt_next_block_start(stx_size);
         in->cap_dirtier_uid = perms.uid();
         in->cap_dirtier_gid = perms.gid();
         in->mark_caps_dirty(CEPH_CAP_FILE_EXCL);
@@ -10617,8 +10618,8 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp,
       req->head.args.open.mask = DEBUG_GETATTR_CAPS;
     else
       req->head.args.open.mask = 0;
-    ldout(cct, 10) << "_open size=" << in->size << dendl;
-    req->head.args.open.old_size = in->size;   // for O_TRUNC
+    ldout(cct, 10) << "_open size=" << in->effective_size() << dendl;
+    req->head.args.open.old_size = in->effective_size();   // for O_TRUNC
     req->set_inode(in);
     result = make_request(req, perms);

@@ -11028,9 +11029,9 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)

   // short read?
   if (r >= 0 && r < wanted) {
-    if (pos < in->size) {
+    if (pos < in->effective_size()) {
       // zero up to known EOF
-      int64_t some = in->size - pos;
+      int64_t some = in->effective_size() - pos;
       if (some > left)
         some = left;
       auto z = buffer::ptr_node::create(some);
@@ -11051,7 +11052,7 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)
     }

     // eof?  short read.
-    if ((uint64_t)pos >= in->size)
+    if ((uint64_t)pos >= in->effective_size())
       goto success;

     wanted = left;
@@ -11261,7 +11262,7 @@ retry:
       }

       // eof?  short read.
-      if ((uint64_t)offset < in->size)
+      if ((uint64_t)offset < in->effective_size())
        goto retry;
     }
   }
@@ -11429,7 +11430,6 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
   std::vector<ObjectCacher::ObjHole> holes;
   r = objectcacher->file_read_ex(&in->oset, &in->layout, in->snapid,
                                  read_start, read_len, bl, 0, &holes, io_finish.get());
- 
   if (onfinish != nullptr) {
     // put the cap ref since we're releasing C_Read_Async_Finisher
     put_cap_ref(in, CEPH_CAP_FILE_CACHE);

Actions

Copy link