batcheval: ExportRequest does not return to the client when over the ElasticCPU limit if TargetBytes is unset · Issue #96684 · cockroachdb/cockroach

Recently, we added an elastic CPU limiter to mvccExportToWriter. This limiter checks if an ExportRequest is over its allotted CPU tokens, and if it is, returns from the method with a resume key. This return from mvccExportToWriter is expected to result in the return of an ExportResponse (along with the resume key) to the client. Signaling the RPC to return is a way of allowing the scheduler to take the goroutine off the CPU, thereby allowing other processes waiting on the CPU to be admitted.

Today, this return to the client is conditional on us breaking out from this loop

cockroach/pkg/kv/kvserver/batcheval/cmd_export.go

Lines 172 to 307 in 260f4fa

    
           for start := args.Key; start != nil; { 
        
           	destFile := &storage.MemFile{} 
        
           	opts := storage.MVCCExportOptions{ 
        
           		StartKey:           storage.MVCCKey{Key: start, Timestamp: resumeKeyTS}, 
        
           		EndKey:             args.EndKey, 
        
           		StartTS:            args.StartTime, 
        
           		EndTS:              h.Timestamp, 
        
           		ExportAllRevisions: exportAllRevisions, 
        
           		TargetSize:         targetSize, 
        
           		MaxSize:            maxSize, 
        
           		MaxIntents:         maxIntents, 
        
           		StopMidKey:         args.SplitMidKey, 
        
           	} 
        
           	var summary roachpb.BulkOpSummary 
        
           	var resume storage.MVCCKey 
        
           	var fingerprint uint64 
        
           	var err error 
        
           	if args.ExportFingerprint { 
        
           		// Default to stripping the tenant prefix from keys, and checksum from 
        
           		// values before fingerprinting so that the fingerprint is tenant 
        
           		// agnostic. 
        
           		opts.FingerprintOptions = storage.MVCCExportFingerprintOptions{ 
        
           			StripTenantPrefix:  true, 
        
           			StripValueChecksum: true, 
        
           		} 
        
           		var hasRangeKeys bool 
        
           		summary, resume, fingerprint, hasRangeKeys, err = storage.MVCCExportFingerprint(ctx, 
        
           			cArgs.EvalCtx.ClusterSettings(), reader, opts, destFile) 
        
           		if err != nil { 
        
           			return result.Result{}, maybeAnnotateExceedMaxSizeError(err) 
        
           		} 
        
           		// If no range keys were encountered during fingerprinting then we zero 
        
           		// out the underlying SST file as there is no use in sending an empty file 
        
           		// part of the ExportResponse. This frees up the memory used by the empty 
        
           		// SST file. 
        
           		if !hasRangeKeys { 
        
           			destFile = &storage.MemFile{} 
        
           		} 
        
           	} else { 
        
           		summary, resume, err = storage.MVCCExportToSST(ctx, cArgs.EvalCtx.ClusterSettings(), reader, 
        
           			opts, destFile) 
        
           		if err != nil { 
        
           			return result.Result{}, maybeAnnotateExceedMaxSizeError(err) 
        
           		} 
        
           	} 
        
           	data := destFile.Data() 
        
           	// NB: This should only happen in two cases: 
        
           	// 
        
           	// 1. There was nothing to export for this span. 
        
           	// 
        
           	// 2. We hit a resource constraint that led to an 
        
           	//    early exit and thus have a resume key despite 
        
           	//    not having data. 
        
           	if summary.DataSize == 0 { 
        
           		if resume.Key != nil { 
        
           			start = resume.Key 
        
           			resumeKeyTS = resume.Timestamp 
        
           			continue 
        
           		} else { 
        
           			break 
        
           		} 
        
           	} 
        
           	span := roachpb.Span{Key: start} 
        
           	if resume.Key != nil { 
        
           		span.EndKey = resume.Key 
        
           	} else { 
        
           		span.EndKey = args.EndKey 
        
           	} 
        
           	var exported roachpb.ExportResponse_File 
        
           	if args.ExportFingerprint { 
        
           		// A fingerprinting ExportRequest does not need to return the 
        
           		// BulkOpSummary or the exported Span. This is because we do not expect 
        
           		// the sender of a fingerprint ExportRequest to use anything but the 
        
           		// `Fingerprint` for point-keys and the SST file that contains the 
        
           		// rangekeys we encountered during ExportRequest evaluation. 
        
           		exported = roachpb.ExportResponse_File{ 
        
           			EndKeyTS:    resume.Timestamp, 
        
           			SST:         data, 
        
           			Fingerprint: fingerprint, 
        
           		} 
        
           	} else { 
        
           		exported = roachpb.ExportResponse_File{ 
        
           			Span:     span, 
        
           			EndKeyTS: resume.Timestamp, 
        
           			Exported: summary, 
        
           			SST:      data, 
        
           		} 
        
           	} 
        
           	reply.Files = append(reply.Files, exported) 
        
           	start = resume.Key 
        
           	resumeKeyTS = resume.Timestamp 
        
           	if h.TargetBytes > 0 { 
        
           		curSizeOfExportedSSTs += summary.DataSize 
        
           		// There could be a situation where the size of exported SSTs is larger 
        
           		// than the TargetBytes. In such a scenario, we want to report back 
        
           		// TargetBytes as the size of the processed SSTs otherwise the DistSender 
        
           		// will error out with an "exceeded limit". In every other case we want to 
        
           		// report back the actual size so that the DistSender can shrink the limit 
        
           		// for subsequent range requests. 
        
           		// This is semantically OK for two reasons: 
        
           		// 
        
           		// - DistSender does not parallelize requests with TargetBytes > 0. 
        
           		// 
        
           		// - DistSender uses NumBytes to shrink the limit for subsequent requests. 
        
           		// By returning TargetBytes, no more requests will be processed (and there 
        
           		// are no parallel running requests) which is what we expect. 
        
           		// 
        
           		// The ResumeSpan is what is used as the source of truth by the caller 
        
           		// issuing the request, and that contains accurate information about what 
        
           		// is left to be exported. 
        
           		targetSize := h.TargetBytes 
        
           		if curSizeOfExportedSSTs < targetSize { 
        
           			targetSize = curSizeOfExportedSSTs 
        
           		} 
        
           		reply.NumBytes = targetSize 
        
           		// NB: This condition means that we will allow another SST to be created 
        
           		// even if we have less room in our TargetBytes than the target size of 
        
           		// the next SST. In the worst case this could lead to us exceeding our 
        
           		// TargetBytes by SST target size + overage. 
        
           		if reply.NumBytes == h.TargetBytes { 
        
           			if resume.Key != nil { 
        
           				reply.ResumeSpan = &roachpb.Span{ 
        
           					Key:    resume.Key, 
        
           					EndKey: args.EndKey, 
        
           				} 
        
           				reply.ResumeReason = roachpb.RESUME_BYTE_LIMIT 
        
           			} 
        
           			break 
        
           		} 
        
           	} 
        
           }

. The only break in the loop is however inside this conditional

cockroach/pkg/kv/kvserver/batcheval/cmd_export.go

Line 268 in 260f4fa

if h.TargetBytes > 0 {

that checks if the request was sent with a TargetBytes. ExportRequests are not always sent with TargetBytes set and so even if the elastic limiter indicates we are over the CPU limit, we would not break from this loop and immediately retry the same export. This would likely lead to excessive thrashing since the scheduler is not being given a chance to offload the goroutine before retrying. There is no correlation between TargetBytes being set and paginating because of a CPU limit and so we should not rely on the break inside the conditional.

Jira issue: CRDB-24277

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

batcheval: ExportRequest does not return to the client when over the ElasticCPU limit if TargetBytes is unset #96684

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

	for start := args.Key; start != nil; {
	destFile := &storage.MemFile{}
	opts := storage.MVCCExportOptions{
	StartKey: storage.MVCCKey{Key: start, Timestamp: resumeKeyTS},
	EndKey: args.EndKey,
	StartTS: args.StartTime,
	EndTS: h.Timestamp,
	ExportAllRevisions: exportAllRevisions,
	TargetSize: targetSize,
	MaxSize: maxSize,
	MaxIntents: maxIntents,
	StopMidKey: args.SplitMidKey,
	}
	var summary roachpb.BulkOpSummary
	var resume storage.MVCCKey
	var fingerprint uint64
	var err error
	if args.ExportFingerprint {
	// Default to stripping the tenant prefix from keys, and checksum from
	// values before fingerprinting so that the fingerprint is tenant
	// agnostic.
	opts.FingerprintOptions = storage.MVCCExportFingerprintOptions{
	StripTenantPrefix: true,
	StripValueChecksum: true,
	}
	var hasRangeKeys bool
	summary, resume, fingerprint, hasRangeKeys, err = storage.MVCCExportFingerprint(ctx,
	cArgs.EvalCtx.ClusterSettings(), reader, opts, destFile)
	if err != nil {
	return result.Result{}, maybeAnnotateExceedMaxSizeError(err)
	}

	// If no range keys were encountered during fingerprinting then we zero
	// out the underlying SST file as there is no use in sending an empty file
	// part of the ExportResponse. This frees up the memory used by the empty
	// SST file.
	if !hasRangeKeys {
	destFile = &storage.MemFile{}
	}
	} else {
	summary, resume, err = storage.MVCCExportToSST(ctx, cArgs.EvalCtx.ClusterSettings(), reader,
	opts, destFile)
	if err != nil {
	return result.Result{}, maybeAnnotateExceedMaxSizeError(err)
	}
	}
	data := destFile.Data()

	// NB: This should only happen in two cases:
	//
	// 1. There was nothing to export for this span.
	//
	// 2. We hit a resource constraint that led to an
	// early exit and thus have a resume key despite
	// not having data.
	if summary.DataSize == 0 {
	if resume.Key != nil {
	start = resume.Key
	resumeKeyTS = resume.Timestamp
	continue
	} else {
	break
	}
	}

	span := roachpb.Span{Key: start}
	if resume.Key != nil {
	span.EndKey = resume.Key
	} else {
	span.EndKey = args.EndKey
	}

	var exported roachpb.ExportResponse_File
	if args.ExportFingerprint {
	// A fingerprinting ExportRequest does not need to return the
	// BulkOpSummary or the exported Span. This is because we do not expect
	// the sender of a fingerprint ExportRequest to use anything but the
	// `Fingerprint` for point-keys and the SST file that contains the
	// rangekeys we encountered during ExportRequest evaluation.
	exported = roachpb.ExportResponse_File{
	EndKeyTS: resume.Timestamp,
	SST: data,
	Fingerprint: fingerprint,
	}
	} else {
	exported = roachpb.ExportResponse_File{
	Span: span,
	EndKeyTS: resume.Timestamp,
	Exported: summary,
	SST: data,
	}
	}
	reply.Files = append(reply.Files, exported)
	start = resume.Key
	resumeKeyTS = resume.Timestamp

	if h.TargetBytes > 0 {
	curSizeOfExportedSSTs += summary.DataSize
	// There could be a situation where the size of exported SSTs is larger
	// than the TargetBytes. In such a scenario, we want to report back
	// TargetBytes as the size of the processed SSTs otherwise the DistSender
	// will error out with an "exceeded limit". In every other case we want to
	// report back the actual size so that the DistSender can shrink the limit
	// for subsequent range requests.
	// This is semantically OK for two reasons:
	//
	// - DistSender does not parallelize requests with TargetBytes > 0.
	//
	// - DistSender uses NumBytes to shrink the limit for subsequent requests.
	// By returning TargetBytes, no more requests will be processed (and there
	// are no parallel running requests) which is what we expect.
	//
	// The ResumeSpan is what is used as the source of truth by the caller
	// issuing the request, and that contains accurate information about what
	// is left to be exported.
	targetSize := h.TargetBytes
	if curSizeOfExportedSSTs < targetSize {
	targetSize = curSizeOfExportedSSTs
	}
	reply.NumBytes = targetSize
	// NB: This condition means that we will allow another SST to be created
	// even if we have less room in our TargetBytes than the target size of
	// the next SST. In the worst case this could lead to us exceeding our
	// TargetBytes by SST target size + overage.
	if reply.NumBytes == h.TargetBytes {
	if resume.Key != nil {
	reply.ResumeSpan = &roachpb.Span{
	Key: resume.Key,
	EndKey: args.EndKey,
	}
	reply.ResumeReason = roachpb.RESUME_BYTE_LIMIT
	}
	break
	}
	}
	}

batcheval: ExportRequest does not return to the client when over the ElasticCPU limit if TargetBytes is unset #96684

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions