Skip to content

Commit 0095b06

Browse files
authored
Merge pull request #213 from hiddeco/feat/readerat-fs
osfs: Add experimental WithMmap for read-only Opens
2 parents 13ded9e + 0dea49c commit 0095b06

16 files changed

Lines changed: 856 additions & 22 deletions

embedfs/concurrent_readat_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package embedfs
2+
3+
import (
4+
"embed"
5+
"sync"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
//go:embed testdata/concurrent.bin
12+
var concurrentFS embed.FS
13+
14+
// TestFileConcurrentReadAt asserts that concurrent ReadAt calls on a
15+
// shared embedfs file handle are safe under -race and return correct
16+
// per-byte results. embedfs wraps bytes.Reader, which is already
17+
// concurrent-safe for ReadAt; this test pins the billy.File.ReadAt
18+
// contract so future changes to the wrapper cannot regress it.
19+
func TestFileConcurrentReadAt(t *testing.T) {
20+
t.Parallel()
21+
22+
want, err := concurrentFS.ReadFile("testdata/concurrent.bin")
23+
require.NoError(t, err)
24+
require.Equal(t, 4096, len(want))
25+
26+
fs := New(&concurrentFS)
27+
f, err := fs.Open("testdata/concurrent.bin")
28+
require.NoError(t, err)
29+
t.Cleanup(func() { _ = f.Close() })
30+
31+
const workers = 8
32+
const iters = 200
33+
const bufLen = 64
34+
35+
var wg sync.WaitGroup
36+
wg.Add(workers)
37+
for w := range workers {
38+
go func() {
39+
defer wg.Done()
40+
buf := make([]byte, bufLen)
41+
for i := range iters {
42+
off := int64((w*131 + i*257) % (len(want) - bufLen))
43+
n, err := f.ReadAt(buf, off)
44+
require.NoError(t, err)
45+
require.Equal(t, bufLen, n)
46+
require.Equal(t, want[off:off+int64(n)], buf[:n])
47+
}
48+
}()
49+
}
50+
wg.Wait()
51+
}

embedfs/embed_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ func TestReadDir(t *testing.T) {
283283
name: "testdataDir w/ path",
284284
path: "testdata",
285285
fs: &testdataDir,
286-
want: []string{"empty.txt", "empty2.txt"},
286+
want: []string{"concurrent.bin", "empty.txt", "empty2.txt"},
287287
},
288288
{
289289
name: "testdataDir return no dir names",

embedfs/testdata/concurrent.bin

4 KB
Binary file not shown.

fs.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,12 @@ type File interface {
177177
Name() string
178178
io.Writer
179179
io.WriterAt
180+
// ReadAt must be safe for concurrent calls from multiple
181+
// goroutines on the same handle, matching the [io.ReaderAt]
182+
// contract documented in package io. Callers performing
183+
// concurrent random-access reads on a shared File rely on
184+
// this; backings that cannot honour it must not satisfy
185+
// [billy.File].
180186
io.ReaderAt
181187
io.Seeker
182188
// Truncate the file.

memfs/concurrent_readat_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package memfs
2+
3+
import (
4+
"sync"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
// TestFileConcurrentReadAt asserts that concurrent ReadAt calls on a
11+
// shared memfs file handle are safe under -race and return correct
12+
// per-byte results. memfs is the in-memory billy backing that go-git
13+
// and other consumers rely on for tests; the billy.File.ReadAt
14+
// contract documents concurrent-safety as required, and this test
15+
// pins memfs's compliance.
16+
func TestFileConcurrentReadAt(t *testing.T) {
17+
t.Parallel()
18+
19+
const size = 64 * 1024
20+
want := make([]byte, size)
21+
for i := range want {
22+
want[i] = byte(i % 251)
23+
}
24+
25+
fs := New()
26+
f, err := fs.Create("data")
27+
require.NoError(t, err)
28+
_, err = f.Write(want)
29+
require.NoError(t, err)
30+
require.NoError(t, f.Close())
31+
32+
rf, err := fs.Open("data")
33+
require.NoError(t, err)
34+
t.Cleanup(func() { _ = rf.Close() })
35+
36+
const workers = 8
37+
const iters = 200
38+
var wg sync.WaitGroup
39+
wg.Add(workers)
40+
for w := range workers {
41+
go func() {
42+
defer wg.Done()
43+
buf := make([]byte, 1024)
44+
for i := range iters {
45+
off := int64((w*131 + i*257) % (len(want) - len(buf)))
46+
n, err := rf.ReadAt(buf, off)
47+
require.NoError(t, err)
48+
require.Equal(t, len(buf), n)
49+
require.Equal(t, want[off:off+int64(n)], buf[:n])
50+
}
51+
}()
52+
}
53+
wg.Wait()
54+
}

osfs/mmap_file.go

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
//go:build darwin || linux
2+
3+
package osfs
4+
5+
import (
6+
"errors"
7+
"io"
8+
"math"
9+
"os"
10+
"runtime"
11+
"sync"
12+
13+
"golang.org/x/sys/unix"
14+
)
15+
16+
// mmapFile is a billy.File backed by a read-only memory map. It is
17+
// returned from BoundOS/RootOS.OpenFile when the filesystem was
18+
// constructed with [WithMmap] and the file is opened without write
19+
// flags. Read and Seek track a real cursor over the mapped bytes,
20+
// ReadAt is concurrent-safe (multiple goroutines may call it in
21+
// parallel against the same handle) and serialised against Close
22+
// via an RWMutex so munmap cannot run while a read is in flight.
23+
// Write/WriteAt/Truncate return [os.ErrPermission] — the file is
24+
// read-only by construction.
25+
type mmapFile struct {
26+
f *os.File
27+
data []byte
28+
name string
29+
30+
mu sync.RWMutex
31+
cursor int64
32+
closed bool
33+
}
34+
35+
// newMmapFile maps f read-only and returns an [*mmapFile] that owns
36+
// f. On success the returned handle is responsible for closing the
37+
// underlying [*os.File] via [(*mmapFile).Close].
38+
//
39+
// If mmap is unavailable for this particular file (zero size, size
40+
// beyond platform int, mmap rejected by the kernel for pipes/devices
41+
// etc.) the function returns [errMmapUnavailable] without closing f
42+
// so the caller can fall back to a regular [*file] wrapper.
43+
//
44+
// Any other error (e.g. fstat failing) is propagated as-is and f is
45+
// closed before returning — the caller must not use it.
46+
func newMmapFile(f *os.File, name string) (*mmapFile, error) {
47+
info, err := f.Stat()
48+
if err != nil {
49+
_ = f.Close()
50+
return nil, err
51+
}
52+
53+
size := info.Size()
54+
if size <= 0 || size > int64(math.MaxInt) {
55+
// unix.Mmap rejects size 0, and 32-bit platforms can't
56+
// represent very large mappings as an int. Either case
57+
// is fine for the regular fd wrapper.
58+
return nil, errMmapUnavailable
59+
}
60+
61+
data, err := unix.Mmap(int(f.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_SHARED)
62+
if err != nil {
63+
// Many failure modes here are legitimate (pipes, devices,
64+
// FS quirks). Defer to the fd wrapper.
65+
return nil, errMmapUnavailable
66+
}
67+
68+
m := &mmapFile{f: f, data: data, name: name}
69+
// Belt and braces for callers that forget to Close: the runtime
70+
// will munmap and close the fd when m becomes unreachable. Close
71+
// clears this finalizer on the orderly path.
72+
runtime.SetFinalizer(m, (*mmapFile).Close)
73+
return m, nil
74+
}
75+
76+
func (m *mmapFile) Name() string { return m.name }
77+
78+
// Stat returns the underlying *os.File's FileInfo unchanged so that
79+
// f.Stat().Name() matches the basename returned by the fd-backed
80+
// *file across both backings.
81+
func (m *mmapFile) Stat() (os.FileInfo, error) {
82+
return m.f.Stat()
83+
}
84+
85+
// Read implements [io.Reader]. It holds the write lock because it
86+
// mutates the shared cursor; concurrent Read+Read would otherwise
87+
// race on m.cursor even though both could read m.data under RLock.
88+
// Random-access callers should use ReadAt, which is the parallel API.
89+
func (m *mmapFile) Read(p []byte) (int, error) {
90+
if len(p) == 0 {
91+
return 0, nil
92+
}
93+
m.mu.Lock()
94+
defer m.mu.Unlock()
95+
if m.closed {
96+
return 0, os.ErrClosed
97+
}
98+
if m.cursor >= int64(len(m.data)) {
99+
return 0, io.EOF
100+
}
101+
n := copy(p, m.data[m.cursor:])
102+
m.cursor += int64(n)
103+
return n, nil
104+
}
105+
106+
func (m *mmapFile) ReadAt(p []byte, off int64) (int, error) {
107+
if len(p) == 0 {
108+
return 0, nil
109+
}
110+
m.mu.RLock()
111+
defer m.mu.RUnlock()
112+
if m.closed {
113+
return 0, os.ErrClosed
114+
}
115+
if off < 0 {
116+
return 0, &os.PathError{Op: "readat", Path: m.name, Err: errors.New("negative offset")}
117+
}
118+
if off >= int64(len(m.data)) {
119+
return 0, io.EOF
120+
}
121+
n := copy(p, m.data[off:])
122+
if n < len(p) {
123+
return n, io.EOF
124+
}
125+
return n, nil
126+
}
127+
128+
func (m *mmapFile) Seek(offset int64, whence int) (int64, error) {
129+
m.mu.Lock()
130+
defer m.mu.Unlock()
131+
if m.closed {
132+
return 0, os.ErrClosed
133+
}
134+
var abs int64
135+
switch whence {
136+
case io.SeekStart:
137+
abs = offset
138+
case io.SeekCurrent:
139+
abs = m.cursor + offset
140+
case io.SeekEnd:
141+
abs = int64(len(m.data)) + offset
142+
default:
143+
return 0, &os.PathError{Op: "seek", Path: m.name, Err: errors.New("invalid whence")}
144+
}
145+
if abs < 0 {
146+
return 0, &os.PathError{Op: "seek", Path: m.name, Err: errors.New("negative position")}
147+
}
148+
m.cursor = abs
149+
return abs, nil
150+
}
151+
152+
func (m *mmapFile) Write(p []byte) (int, error) {
153+
return 0, &os.PathError{Op: "write", Path: m.name, Err: os.ErrPermission}
154+
}
155+
156+
func (m *mmapFile) WriteAt(p []byte, off int64) (int, error) {
157+
return 0, &os.PathError{Op: "writeat", Path: m.name, Err: os.ErrPermission}
158+
}
159+
160+
func (m *mmapFile) Truncate(size int64) error {
161+
return &os.PathError{Op: "truncate", Path: m.name, Err: os.ErrPermission}
162+
}
163+
164+
func (m *mmapFile) Close() error {
165+
m.mu.Lock()
166+
defer m.mu.Unlock()
167+
if m.closed {
168+
return os.ErrClosed
169+
}
170+
m.closed = true
171+
runtime.SetFinalizer(m, nil)
172+
173+
munmapErr := unix.Munmap(m.data)
174+
m.data = nil
175+
closeErr := m.f.Close()
176+
return errors.Join(munmapErr, closeErr)
177+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//go:build !darwin && !linux && !wasm
2+
3+
package osfs
4+
5+
import (
6+
"testing"
7+
8+
"github.com/go-git/go-billy/v6"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func assertMmapBackingWhenAvailable(t *testing.T, f billy.File) {
13+
t.Helper()
14+
// mmap is unavailable on this platform; WithMmap is honoured as
15+
// best-effort and falls through to the fd-backed wrapper.
16+
_, ok := f.(*file)
17+
require.True(t, ok, "platform has no mmap path, expected *file, got %T", f)
18+
}

osfs/mmap_file_assert_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//go:build darwin || linux
2+
3+
package osfs
4+
5+
import (
6+
"testing"
7+
8+
"github.com/go-git/go-billy/v6"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func assertMmapBackingWhenAvailable(t *testing.T, f billy.File) {
13+
t.Helper()
14+
_, ok := f.(*mmapFile)
15+
require.True(t, ok, "WithMmap should select *mmapFile on this platform, got %T", f)
16+
}
17+
18+
// Ensure *mmapFile satisfies billy.File at compile-time on platforms
19+
// where it has a real implementation.
20+
var _ billy.File = (*mmapFile)(nil)

osfs/mmap_file_other.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//go:build !darwin && !linux && !js
2+
3+
package osfs
4+
5+
import "os"
6+
7+
// mmapFile is a stub on platforms without mmap support. The
8+
// real implementation lives in mmap_file.go (darwin || linux);
9+
// here the type exists solely so the wrapOpenedFile return
10+
// path that constructs it still type-checks. newMmapFile
11+
// always returns [errMmapUnavailable], so a value of this type
12+
// is never actually constructed at runtime — the method bodies
13+
// below are unreachable.
14+
type mmapFile struct{}
15+
16+
func newMmapFile(_ *os.File, _ string) (*mmapFile, error) {
17+
return nil, errMmapUnavailable
18+
}
19+
20+
func (m *mmapFile) Name() string { return "" }
21+
func (m *mmapFile) Stat() (os.FileInfo, error) { return nil, os.ErrInvalid }
22+
func (m *mmapFile) Read(p []byte) (int, error) { return 0, os.ErrInvalid }
23+
func (m *mmapFile) ReadAt(p []byte, off int64) (int, error) { return 0, os.ErrInvalid }
24+
func (m *mmapFile) Write(p []byte) (int, error) { return 0, os.ErrInvalid }
25+
func (m *mmapFile) WriteAt(p []byte, off int64) (int, error) { return 0, os.ErrInvalid }
26+
func (m *mmapFile) Seek(offset int64, whence int) (int64, error) { return 0, os.ErrInvalid }
27+
func (m *mmapFile) Truncate(size int64) error { return os.ErrInvalid }
28+
func (m *mmapFile) Close() error { return os.ErrInvalid }

0 commit comments

Comments
 (0)