|
13 | 13 | */ |
14 | 14 | package io.trino.plugin.lance.internal; |
15 | 15 |
|
| 16 | +import com.google.common.cache.CacheBuilder; |
| 17 | +import com.google.common.cache.CacheLoader; |
16 | 18 | import com.google.inject.Inject; |
| 19 | +import io.airlift.log.Logger; |
| 20 | +import io.trino.cache.NonEvictableLoadingCache; |
| 21 | +import io.trino.cache.SafeCaches; |
17 | 22 | import io.trino.plugin.lance.LanceColumnHandle; |
18 | 23 | import io.trino.plugin.lance.LanceConfig; |
19 | 24 | import io.trino.plugin.lance.LanceNamespaceProperties; |
|
38 | 43 | import java.util.LinkedHashMap; |
39 | 44 | import java.util.List; |
40 | 45 | import java.util.Map; |
| 46 | +import java.util.Objects; |
41 | 47 | import java.util.Set; |
| 48 | +import java.util.concurrent.ExecutionException; |
| 49 | +import java.util.concurrent.TimeUnit; |
42 | 50 | import java.util.stream.Collectors; |
43 | 51 |
|
44 | 52 | import static com.google.common.collect.ImmutableList.toImmutableList; |
45 | 53 |
|
46 | 54 | public class LanceReader |
47 | 55 | implements Closeable |
48 | 56 | { |
| 57 | + private static final Logger log = Logger.get(LanceReader.class); |
| 58 | + |
49 | 59 | // TODO: support multiple schemas |
50 | 60 | public static final String SCHEMA = "default"; |
51 | 61 | private static final String TABLE_PATH_SUFFIX = ".lance"; |
52 | 62 | private static final BufferAllocator allocator = new RootAllocator( |
53 | 63 | RootAllocator.configBuilder().from(RootAllocator.defaultConfig()).maxAllocation(4 * 1024 * 1024).build()); |
54 | 64 |
|
| 65 | + // Cache for dataset metadata (fragments) - shared across all LanceReader instances per worker JVM |
| 66 | + // Maximum 100 entries, expires 1 hour after last access (same as lance-spark) |
| 67 | + private static final NonEvictableLoadingCache<CacheKey, Map<Integer, Fragment>> FRAGMENT_CACHE = |
| 68 | + SafeCaches.buildNonEvictableCache( |
| 69 | + CacheBuilder.newBuilder() |
| 70 | + .maximumSize(100) |
| 71 | + .expireAfterAccess(1, TimeUnit.HOURS), |
| 72 | + new CacheLoader<>() |
| 73 | + { |
| 74 | + @Override |
| 75 | + public Map<Integer, Fragment> load(CacheKey key) |
| 76 | + { |
| 77 | + log.debug("Loading fragments for table: %s", key.getTablePath()); |
| 78 | + Dataset dataset = Dataset.open(key.getTablePath(), allocator); |
| 79 | + return dataset.getFragments().stream() |
| 80 | + .collect(Collectors.toMap(Fragment::getId, f -> f)); |
| 81 | + } |
| 82 | + }); |
| 83 | + |
| 84 | + // Cache for schema metadata - shared across all LanceReader instances per worker JVM |
| 85 | + private static final NonEvictableLoadingCache<CacheKey, Schema> SCHEMA_CACHE = |
| 86 | + SafeCaches.buildNonEvictableCache( |
| 87 | + CacheBuilder.newBuilder() |
| 88 | + .maximumSize(100) |
| 89 | + .expireAfterAccess(1, TimeUnit.HOURS), |
| 90 | + new CacheLoader<>() |
| 91 | + { |
| 92 | + @Override |
| 93 | + public Schema load(CacheKey key) |
| 94 | + { |
| 95 | + log.debug("Loading schema for table: %s", key.getTablePath()); |
| 96 | + try (Dataset dataset = Dataset.open(key.getTablePath(), allocator)) { |
| 97 | + return dataset.getSchema(); |
| 98 | + } |
| 99 | + } |
| 100 | + }); |
| 101 | + |
55 | 102 | private final String root; |
56 | 103 | private final LanceNamespace namespace; |
57 | 104 |
|
@@ -138,17 +185,46 @@ public List<Fragment> getFragments(LanceTableHandle tableHandle) |
138 | 185 | return getFragments(getTablePath(tableHandle.getTableName())); |
139 | 186 | } |
140 | 187 |
|
| 188 | + /** |
| 189 | + * Get a specific fragment by ID from the cache. |
| 190 | + * This is useful for workers that need to access a specific fragment for data reading. |
| 191 | + * |
| 192 | + * @param tablePath the path to the lance table |
| 193 | + * @param fragmentId the fragment ID to retrieve |
| 194 | + * @return the Fragment object, or null if not found |
| 195 | + */ |
| 196 | + public static Fragment getFragment(String tablePath, int fragmentId) |
| 197 | + { |
| 198 | + try { |
| 199 | + CacheKey key = new CacheKey(tablePath); |
| 200 | + Map<Integer, Fragment> fragments = FRAGMENT_CACHE.get(key); |
| 201 | + return fragments.get(fragmentId); |
| 202 | + } |
| 203 | + catch (ExecutionException e) { |
| 204 | + throw new RuntimeException("Failed to get fragment from cache for table: " + tablePath, e); |
| 205 | + } |
| 206 | + } |
| 207 | + |
141 | 208 | private static List<Fragment> getFragments(String tablePath) |
142 | 209 | { |
143 | | - try (Dataset dataset = Dataset.open(tablePath, allocator)) { |
144 | | - return dataset.getFragments(); |
| 210 | + try { |
| 211 | + CacheKey key = new CacheKey(tablePath); |
| 212 | + Map<Integer, Fragment> fragmentMap = FRAGMENT_CACHE.get(key); |
| 213 | + return List.copyOf(fragmentMap.values()); |
| 214 | + } |
| 215 | + catch (ExecutionException e) { |
| 216 | + throw new RuntimeException("Failed to get fragments from cache for table: " + tablePath, e); |
145 | 217 | } |
146 | 218 | } |
147 | 219 |
|
148 | 220 | private static Schema getSchema(String tablePath) |
149 | 221 | { |
150 | | - try (Dataset dataset = Dataset.open(tablePath, allocator)) { |
151 | | - return dataset.getSchema(); |
| 222 | + try { |
| 223 | + CacheKey key = new CacheKey(tablePath); |
| 224 | + return SCHEMA_CACHE.get(key); |
| 225 | + } |
| 226 | + catch (ExecutionException e) { |
| 227 | + throw new RuntimeException("Failed to get schema from cache for table: " + tablePath, e); |
152 | 228 | } |
153 | 229 | } |
154 | 230 |
|
@@ -176,4 +252,39 @@ public void close() |
176 | 252 | } |
177 | 253 | } |
178 | 254 | } |
| 255 | + |
| 256 | + /** |
| 257 | + * Cache key for dataset metadata caching. |
| 258 | + * Uses table path as the unique identifier. |
| 259 | + */ |
| 260 | + private static class CacheKey |
| 261 | + { |
| 262 | + private final String tablePath; |
| 263 | + |
| 264 | + CacheKey(String tablePath) |
| 265 | + { |
| 266 | + this.tablePath = tablePath; |
| 267 | + } |
| 268 | + |
| 269 | + public String getTablePath() |
| 270 | + { |
| 271 | + return tablePath; |
| 272 | + } |
| 273 | + |
| 274 | + @Override |
| 275 | + public boolean equals(Object o) |
| 276 | + { |
| 277 | + if (o == null || getClass() != o.getClass()) { |
| 278 | + return false; |
| 279 | + } |
| 280 | + CacheKey cacheKey = (CacheKey) o; |
| 281 | + return Objects.equals(tablePath, cacheKey.tablePath); |
| 282 | + } |
| 283 | + |
| 284 | + @Override |
| 285 | + public int hashCode() |
| 286 | + { |
| 287 | + return Objects.hash(tablePath); |
| 288 | + } |
| 289 | + } |
179 | 290 | } |
0 commit comments