Skip to content

keyspan: merging iter's span bounds can change when child iter is repositioned #1663

@nicktrav

Description

@nicktrav

Currently, the keyspan.MergingIter assumes that spans returned by child iters are stable. It relies on this assumption when storing the start key.

m.start = m.heapRoot().key
// There may be many entries all with the same user key. Spans in other
// levels may also start or end at this same user key. For eg:
// L1: [a, c) [c, d)
// L2: [c, e)
// If we're positioned at L1's end(c) end boundary, we want to advance
// to the first bound > c.
m.nextEntry()
for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot().key, m.start) == 0 {
m.nextEntry()
}
if len(m.heap.items) == 0 || m.err != nil {
break
}
// The current entry at the top of the heap is the first key > m.start.
// It must become the end bound for the span we will return to the user.
// In the above example, the root of the heap is L1's end(d).
m.end = m.heapRoot().key

#1662 diverges from this assumption where a DefragmentingIter is wrapped by a MergingIter. As the defragmenting iter returns Spans that are only valid until the next positioning method, when the MergingIter pulls from the heap to find the end key, the DefragmentingIter can be repositioned, causing the start key to be updated.

Full reproduction:

package pebble

import (
	"bytes"
	"strings"
	"testing"

	"github.com/cockroachdb/pebble/internal/base"
	"github.com/cockroachdb/pebble/internal/keyspan"
	"github.com/cockroachdb/pebble/internal/testkeys"
	"github.com/cockroachdb/pebble/sstable"
	"github.com/cockroachdb/pebble/vfs"
	"github.com/stretchr/testify/require"
)

func Test(t *testing.T) {
	equal := func(_ base.Compare, a, b keyspan.Span) bool { return true }
	transform := func(cmp base.Compare, in keyspan.Span, out *keyspan.Span) error {
		out.Start, out.End = in.Start, in.End
		out.Keys = append(out.Keys[:0], in.Keys...)
		return nil
	}

	var keys = []string{
		"a b",
		"b c",
		"c d",
		"x y",
		"y z",
	}

	// Create a table.
	fs := vfs.NewMem()
	f, err := fs.Create("test.sst")
	require.NoError(t, err)
	w := sstable.NewWriter(f, sstable.WriterOptions{TableFormat: sstable.TableFormatMax})

	for _, k := range keys {
		parts := strings.Split(k, " ")
		start, end := []byte(parts[0]), []byte(parts[1])
		err = w.Add(base.MakeInternalKey(start, 0, base.InternalKeyKindRangeDelete), end)
		require.NoError(t, err)
	}
	err = w.Close()
	require.NoError(t, err)

	// Construct an iterator over range keys in the table.
	f, err = fs.Open("test.sst")
	require.NoError(t, err)
	r, err := sstable.NewReader(f, sstable.ReaderOptions{})
	require.NoError(t, err)

	iter, err := r.NewRawRangeDelIter()
	require.NoError(t, err)

	cmp := testkeys.Comparer.Compare
	dIter := &keyspan.DefragmentingIter{}
	dIter.Init(cmp, iter, equal, keyspan.StaticDefragmentReducer)
	iter = dIter

	_ = transform
	// Using a merging iter results in a panic.
	//mIter := &keyspan.MergingIter{}
	//mIter.Init(cmp, transform, iter)
	//iter = mIter

	var b bytes.Buffer
	for s := iter.First(); s.Valid(); s = iter.Next() {
		b.WriteString(s.String() + "\n")
	}
	err = iter.Close()
	require.NoError(t, err)

	const want = `
a-d:{(#0,RANGEDEL)}
x-z:{(#0,RANGEDEL)}
`
	require.Equal(t, strings.TrimSpace(want), strings.TrimSpace(b.String()))
}
$ go test -tags invariants -run Test$ .

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions