Skip to content

Commit ca1cbfe

Browse files
fmeumcopybara-github
authored andcommitted
Speculative fix for repo contents cache race
The error message observed in #26713 is consistent with `FileChannel#open` failing because the given path doesn't exist. This could happen if one Bazel process observed that `!entryDir.isDirectory()`, which is true if the path doesn't exist, and proceeded to delete the path while another process had just created the directory and is now opening the channel. Since the entry dir is never expected to be an existing non-directory unless the cache has been corrupted, this logic can be removed. Another possible source of `IOException` during normal operation is on an interrupt (such as the user hitting Ctrl+C). Instead, follow Skyframe best practices by surfacing this as an `InterruptedException` instead of a `FileLockInterruptionException`. Also document that concurrent use on the same path is not supported (it results in an `OverlappingFileLockException` if the lock is already held, regardless of whether that is in shared or exclusive mode) and why the current usages are safe. Fixes #26713 Closes #26914. PiperOrigin-RevId: 805338728 Change-Id: Ie808ebe6113b935180b93c21679d5398aa168802
1 parent 9656b8b commit ca1cbfe

4 files changed

Lines changed: 30 additions & 11 deletions

File tree

src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
396396
Profiler.instance()
397397
.profile(ProfilerTask.REPO_CACHE_GC_WAIT, "waiting to acquire repo cache lock")) {
398398
repositoryCache.getRepoContentsCache().acquireSharedLock();
399-
} catch (IOException e) {
399+
} catch (IOException | InterruptedException e) {
400400
throw new AbruptExitException(
401401
detailedExitCode(
402402
"could not acquire lock on repo contents cache", Code.BAD_REPO_CONTENTS_CACHE),

src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,10 @@ private Path ensureTrashDir() throws IOException {
157157
*/
158158
public Path moveToCache(
159159
Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash)
160-
throws IOException {
160+
throws IOException, InterruptedException {
161161
Preconditions.checkState(path != null);
162162

163163
Path entryDir = path.getRelative(predeclaredInputHash);
164-
if (!entryDir.isDirectory()) {
165-
entryDir.delete();
166-
}
167164
String counter = getNextCounterInDir(entryDir);
168165
Path cacheRecordedInputsFile = entryDir.getChild(counter + RECORDED_INPUTS_SUFFIX);
169166
Path cacheRepoDir = entryDir.getChild(counter);
@@ -188,8 +185,12 @@ public Path moveToCache(
188185
return cacheRepoDir;
189186
}
190187

191-
private static String getNextCounterInDir(Path entryDir) throws IOException {
188+
private static String getNextCounterInDir(Path entryDir)
189+
throws IOException, InterruptedException {
192190
Path counterFile = entryDir.getRelative("counter");
191+
// This use of FileSystemLock.get is safe since the predeclared input hash is part of entryDir's
192+
// path and in particular includes the canonical repository name. This ensures that the same
193+
// lock file will not be acquired concurrently by multiple threads, which isn't supported.
193194
try (var lock = FileSystemLock.get(entryDir.getRelative("lock"), LockMode.EXCLUSIVE)) {
194195
int c = 0;
195196
if (counterFile.exists()) {
@@ -205,7 +206,7 @@ private static String getNextCounterInDir(Path entryDir) throws IOException {
205206
}
206207
}
207208

208-
public void acquireSharedLock() throws IOException {
209+
public void acquireSharedLock() throws IOException, InterruptedException {
209210
Preconditions.checkState(path != null);
210211
Preconditions.checkState(sharedLock == null, "this process already has the shared lock");
211212
sharedLock = FileSystemLock.get(path.getRelative(LOCK_PATH), LockMode.SHARED);

src/main/java/com/google/devtools/build/lib/util/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ java_library(
4646
srcs = ["FileSystemLock.java"],
4747
deps = [
4848
":string_encoding",
49+
"//src/main/java/com/google/devtools/build/lib/concurrent:thread_safety",
4950
"//src/main/java/com/google/devtools/build/lib/vfs",
5051
"//third_party:guava",
5152
],

src/main/java/com/google/devtools/build/lib/util/FileSystemLock.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
// limitations under the License.
1414
package com.google.devtools.build.lib.util;
1515

16-
1716
import com.google.common.annotations.VisibleForTesting;
17+
import com.google.devtools.build.lib.concurrent.ThreadSafety;
1818
import com.google.devtools.build.lib.vfs.Path;
1919
import java.io.IOException;
2020
import java.nio.channels.FileChannel;
2121
import java.nio.channels.FileLock;
22+
import java.nio.channels.FileLockInterruptionException;
2223
import java.nio.file.StandardOpenOption;
2324

2425
/**
@@ -71,9 +72,12 @@ private static FileChannel prepareChannel(Path path) throws IOException {
7172
* Tries to acquires a lock on the given path with the given mode. Throws an exception if the lock
7273
* is already held by another process.
7374
*
75+
* <p>This method must not be called concurrently from multiple threads with the same path.
76+
*
7477
* @throws LockAlreadyHeldException if the lock is already exclusively held by another process
7578
* @throws IOException if another error occurred
7679
*/
80+
@ThreadSafety.ThreadHostile
7781
public static FileSystemLock tryGet(Path path, LockMode mode) throws IOException {
7882
FileChannel channel = prepareChannel(path);
7983
FileLock lock = channel.tryLock(0, Long.MAX_VALUE, mode == LockMode.SHARED);
@@ -83,10 +87,23 @@ public static FileSystemLock tryGet(Path path, LockMode mode) throws IOException
8387
return new FileSystemLock(channel, lock);
8488
}
8589

86-
/** Acquires a lock on the given path with the given mode. Blocks until the lock is acquired. */
87-
public static FileSystemLock get(Path path, LockMode mode) throws IOException {
90+
/**
91+
* Acquires a lock on the given path with the given mode. Blocks until the lock is acquired.
92+
*
93+
* <p>This method must not be called concurrently from multiple threads with the same path.
94+
*/
95+
@ThreadSafety.ThreadHostile
96+
public static FileSystemLock get(Path path, LockMode mode)
97+
throws IOException, InterruptedException {
8898
FileChannel channel = prepareChannel(path);
89-
FileLock lock = channel.lock(0, Long.MAX_VALUE, mode == LockMode.SHARED);
99+
FileLock lock;
100+
try {
101+
lock = channel.lock(0, Long.MAX_VALUE, mode == LockMode.SHARED);
102+
} catch (FileLockInterruptionException e) {
103+
Thread.interrupted(); // clear interrupt bit
104+
channel.close();
105+
throw new InterruptedException();
106+
}
90107
return new FileSystemLock(channel, lock);
91108
}
92109

0 commit comments

Comments
 (0)