Skip to content

Commit cbf3d43

Browse files
committed
drive: fix missing items when listing using --fast-list / ListR
This is caused by a bug in Google drive where, in some circumstances querying for "(A in parents) or (B in parents)" returns nothing whereas querying for "A in parents" and "B in parents" separately works fine. This has been reported here: https://issuetracker.google.com/issues/149522397 This workaround detects this condition by seeing if a listing for more than one directory at once returns nothing. If it does then it retries each one individually. This can potentially have a false positive if the user has multiple empty directories which are queried at once. The consequence of this will be that ListR is disabled for a while until the directories are found to be actually empty in which case it will be re-enabled. Fixes #3114 and Fixes #4289
1 parent e7bd392 commit cbf3d43

1 file changed

Lines changed: 74 additions & 16 deletions

File tree

backend/drive/drive.go

Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"strconv"
2525
"strings"
2626
"sync"
27+
"sync/atomic"
2728
"text/template"
2829
"time"
2930

@@ -68,6 +69,8 @@ const (
6869
minChunkSize = 256 * fs.KibiByte
6970
defaultChunkSize = 8 * fs.MebiByte
7071
partialFields = "id,name,size,md5Checksum,trashed,modifiedTime,createdTime,mimeType,parents,webViewLink,shortcutDetails"
72+
listRGrouping = 50 // number of IDs to search at once when using ListR
73+
listRInputBuffer = 1000 // size of input buffer when using ListR
7174
)
7275

7376
// Globals
@@ -558,6 +561,9 @@ type Fs struct {
558561
isTeamDrive bool // true if this is a team drive
559562
fileFields googleapi.Field // fields to fetch file info with
560563
m configmap.Mapper
564+
grouping int32 // number of IDs to search at once in ListR - read with atomic
565+
listRmu *sync.Mutex // protects listRempties
566+
listRempties map[string]struct{} // IDs of supposedly empty directories which triggered grouping disable
561567
}
562568

563569
type baseObject struct {
@@ -1079,11 +1085,14 @@ func NewFs(name, path string, m configmap.Mapper) (fs.Fs, error) {
10791085
}
10801086

10811087
f := &Fs{
1082-
name: name,
1083-
root: root,
1084-
opt: *opt,
1085-
pacer: newPacer(opt),
1086-
m: m,
1088+
name: name,
1089+
root: root,
1090+
opt: *opt,
1091+
pacer: newPacer(opt),
1092+
m: m,
1093+
grouping: listRGrouping,
1094+
listRmu: new(sync.Mutex),
1095+
listRempties: make(map[string]struct{}),
10871096
}
10881097
f.isTeamDrive = opt.TeamDriveID != ""
10891098
f.fileFields = f.getFileFields()
@@ -1634,15 +1643,17 @@ func (s listRSlices) Less(i, j int) bool {
16341643
// In each cycle it will read up to grouping entries from the in channel without blocking.
16351644
// If an error occurs it will be send to the out channel and then return. Once the in channel is closed,
16361645
// nil is send to the out channel and the function returns.
1637-
func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan listREntry, out chan<- error, cb func(fs.DirEntry) error, grouping int) {
1646+
func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in chan listREntry, out chan<- error, cb func(fs.DirEntry) error) {
16381647
var dirs []string
16391648
var paths []string
1649+
var grouping int32
16401650

16411651
for dir := range in {
16421652
dirs = append(dirs[:0], dir.id)
16431653
paths = append(paths[:0], dir.path)
1654+
grouping = atomic.LoadInt32(&f.grouping)
16441655
waitloop:
1645-
for i := 1; i < grouping; i++ {
1656+
for i := int32(1); i < grouping; i++ {
16461657
select {
16471658
case d, ok := <-in:
16481659
if !ok {
@@ -1655,13 +1666,15 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
16551666
}
16561667
listRSlices{dirs, paths}.Sort()
16571668
var iErr error
1669+
foundItems := false
16581670
_, err := f.list(ctx, dirs, "", false, false, false, func(item *drive.File) bool {
16591671
// shared with me items have no parents when at the root
16601672
if f.opt.SharedWithMe && len(item.Parents) == 0 && len(paths) == 1 && paths[0] == "" {
16611673
item.Parents = dirs
16621674
}
16631675
for _, parent := range item.Parents {
16641676
var i int
1677+
foundItems = true
16651678
earlyExit := false
16661679
// If only one item in paths then no need to search for the ID
16671680
// assuming google drive is doing its job properly.
@@ -1702,6 +1715,53 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
17021715
}
17031716
return false
17041717
})
1718+
// Found no items in more than one directory. Retry these as
1719+
// individual directories This is to work around a bug in google
1720+
// drive where (A in parents) or (B in parents) returns nothing
1721+
// sometimes. See #3114, #4289 and
1722+
// https://issuetracker.google.com/issues/149522397
1723+
if len(dirs) > 1 && !foundItems {
1724+
if atomic.SwapInt32(&f.grouping, 1) != 1 {
1725+
fs.Logf(f, "Disabling ListR to work around bug in drive as multi listing (%d) returned no entries", len(dirs))
1726+
}
1727+
var recycled = make([]listREntry, len(dirs))
1728+
f.listRmu.Lock()
1729+
for i := range dirs {
1730+
recycled[i] = listREntry{id: dirs[i], path: paths[i]}
1731+
// Make a note of these dirs - if they all turn
1732+
// out to be empty then we can re-enable grouping
1733+
f.listRempties[dirs[i]] = struct{}{}
1734+
}
1735+
f.listRmu.Unlock()
1736+
// recycle these in the background so we don't deadlock
1737+
// the listR runners if they all get here
1738+
wg.Add(len(recycled))
1739+
go func() {
1740+
for _, entry := range recycled {
1741+
in <- entry
1742+
}
1743+
fs.Debugf(f, "Recycled %d entries", len(recycled))
1744+
}()
1745+
}
1746+
// If using a grouping of 1 and dir was empty then check to see if it
1747+
// is part of the group that caused grouping to be disabled.
1748+
if grouping == 1 && len(dirs) == 1 && !foundItems {
1749+
f.listRmu.Lock()
1750+
if _, found := f.listRempties[dirs[0]]; found {
1751+
// Remove the ID
1752+
delete(f.listRempties, dirs[0])
1753+
// If no empties left => all the directories that
1754+
// triggered the grouping being set to 1 were actually
1755+
// empty so must have made a mistake
1756+
if len(f.listRempties) == 0 {
1757+
if atomic.SwapInt32(&f.grouping, listRGrouping) != listRGrouping {
1758+
fs.Logf(f, "Re-enabling ListR as previous detection was in error")
1759+
}
1760+
}
1761+
}
1762+
f.listRmu.Unlock()
1763+
}
1764+
17051765
for range dirs {
17061766
wg.Done()
17071767
}
@@ -1736,11 +1796,6 @@ func (f *Fs) listRRunner(ctx context.Context, wg *sync.WaitGroup, in <-chan list
17361796
// Don't implement this unless you have a more efficient way
17371797
// of listing recursively that doing a directory traversal.
17381798
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
1739-
const (
1740-
grouping = 50
1741-
inputBuffer = 1000
1742-
)
1743-
17441799
err = f.dirCache.FindRoot(ctx, false)
17451800
if err != nil {
17461801
return err
@@ -1753,7 +1808,7 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
17531808

17541809
mu := sync.Mutex{} // protects in and overflow
17551810
wg := sync.WaitGroup{}
1756-
in := make(chan listREntry, inputBuffer)
1811+
in := make(chan listREntry, listRInputBuffer)
17571812
out := make(chan error, fs.Config.Checkers)
17581813
list := walk.NewListRHelper(callback)
17591814
overflow := []listREntry{}
@@ -1766,6 +1821,9 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
17661821
job := listREntry{actualID(d.ID()), d.Remote()}
17671822
select {
17681823
case in <- job:
1824+
// Adding the wg after we've entered the item is
1825+
// safe here because we know when the callback
1826+
// is called we are holding a waitgroup.
17691827
wg.Add(1)
17701828
default:
17711829
overflow = append(overflow, job)
@@ -1779,7 +1837,7 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
17791837
in <- listREntry{directoryID, dir}
17801838

17811839
for i := 0; i < fs.Config.Checkers; i++ {
1782-
go f.listRRunner(ctx, &wg, in, out, cb, grouping)
1840+
go f.listRRunner(ctx, &wg, in, out, cb)
17831841
}
17841842
go func() {
17851843
// wait until the all directories are processed
@@ -1789,8 +1847,8 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
17891847
mu.Lock()
17901848
l := len(overflow)
17911849
// only fill half of the channel to prevent entries being put into overflow again
1792-
if l > inputBuffer/2 {
1793-
l = inputBuffer / 2
1850+
if l > listRInputBuffer/2 {
1851+
l = listRInputBuffer / 2
17941852
}
17951853
wg.Add(l)
17961854
for _, d := range overflow[:l] {

0 commit comments

Comments
 (0)