|
1 | 1 | // Copyright © 2022 Apple Inc. |
2 | 2 |
|
| 3 | +#pragma once |
| 4 | + |
| 5 | +#include <ATen/mps/MPSAllocatorInterface.h> |
3 | 6 | #include <ATen/mps/MPSStream.h> |
4 | 7 | #include <cstdio> |
5 | 8 | #include <mutex> |
|
9 | 12 |
|
10 | 13 | // this implementation is based on CUDACachingAllocator. |
11 | 14 | // It utilizes Metal Heaps to improve the performance with buffer allocation. |
| 15 | +// Do not include this header. Use MPSAllocatorInterface.h instead. |
12 | 16 | // TODO: Unify the logic with CUDACachingAllocator and remove redundant code. |
13 | 17 | namespace at { |
14 | 18 | namespace mps { |
15 | | - |
16 | | -class IMpsAllocatorCallback { |
17 | | - public: |
18 | | - enum class EventType { |
19 | | - ALLOCATED, // buffer got allocated to be used immediately |
20 | | - RECYCLED, // buffer pulled from free list to be reused |
21 | | - FREED, // buffer put to free list for future recycling |
22 | | - RELEASED, // buffer memory released |
23 | | - }; |
24 | | - virtual ~IMpsAllocatorCallback() = default; |
25 | | - virtual void executeMPSAllocatorCallback(void* ptr, EventType event) = 0; |
26 | | -}; |
27 | | - |
28 | | -// MPS allocator will execute every registered callback when a block of memory is freed. |
29 | | -C10_DECLARE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback); |
30 | | -#define REGISTER_MPS_ALLOCATOR_CALLBACK(name, ...) \ |
31 | | - C10_REGISTER_CLASS(MPSAllocatorCallbacksRegistry, name, __VA_ARGS__); |
32 | | - |
33 | 19 | namespace HeapAllocator { |
34 | 20 |
|
35 | 21 | #define MB(x) round_page(x * 1048576UL) |
@@ -263,27 +249,44 @@ class MPSHeapAllocatorImpl |
263 | 249 |
|
264 | 250 | // interface exposed to at::Allocator |
265 | 251 | id<MTLBuffer> malloc(size_t size, uint32_t usage); |
| 252 | + // frees a buffer and returns it into buffer pool |
266 | 253 | void free(void* ptr); |
| 254 | + // releases all the cached buffers and their associated heaps |
267 | 255 | void emptyCache(); |
268 | | - // interface exposed to internal MPS operations |
| 256 | + // returns true if buffer was allocated from the shared pool |
269 | 257 | bool isSharedBuffer(void* ptr); |
270 | | - ssize_t getRequestedBufferSize(void* ptr); |
| 258 | + // get the requested unaligned size of an MTLBuffer |
| 259 | + ssize_t getUnalignedBufferSize(void* ptr); |
| 260 | + // set the shape of a base tensor from a view tensor |
271 | 261 | void setBufferShape(void* ptr, const IntArrayRef& shape); |
| 262 | + // retrieve the shape of a base tensor from a view tensor |
272 | 263 | IntArrayRef getBufferShape(void* ptr); |
| 264 | + // allocate a buffer from a specialized pool to import CPU scalars into GPU |
273 | 265 | id<MTLBuffer> allocScalarBufferWithValue(void* value, size_t size); |
274 | 266 | // this indicates how far (in Megabytes) the current total allocations are from the |
275 | 267 | // low watermark limit which is used to detect if we're under memory pressure |
276 | 268 | // This returns zero if we've reached the low watermark limit |
277 | 269 | ssize_t getLowWatermarkValue(); |
278 | | - |
279 | | - bool getDebugVerbosity() const { return m_debug_verbosity; } |
280 | | - size_t getMaxTotalAllowedSize() const { return m_max_total_allowed_size; } |
| 270 | + // (see m_low_watermark_ratio for description) |
| 271 | + void setLowWatermarkRatio(double ratio); |
| 272 | + // (see m_high_watermark_ratio for description) |
| 273 | + void setHighWatermarkRatio(double ratio); |
| 274 | + // (see m_low_watermark_limit for description) |
281 | 275 | size_t getLowWatermarkLimit() const { return m_low_watermark_limit; } |
| 276 | + // (see m_max_total_allowed_size for description) |
| 277 | + size_t getHighWatermarkLimit() const { return m_max_total_allowed_size; } |
| 278 | + // (see m_total_allocated_memory for description) |
| 279 | + size_t getTotalAllocatedMemory() const {return m_total_allocated_memory; } |
| 280 | + // (see enum DebugVerbosity for description) |
| 281 | + uint32_t getDebugVerbosity() const { return m_debug_verbosity; } |
| 282 | + // returns the device that we allocate from |
282 | 283 | inline id<MTLDevice> Device() const { return m_device; } |
283 | 284 |
|
284 | 285 | private: |
285 | 286 | // (see m_high_watermark_ratio for description) |
286 | 287 | constexpr static double default_high_watermark_ratio = 1.7; |
| 288 | + // we set the allowed upper bound to twice the size of recommendedMaxWorkingSetSize. |
| 289 | + constexpr static double default_high_watermark_upper_bound = 2.0; |
287 | 290 | // (see m_low_watermark_ratio for description) |
288 | 291 | // on unified memory, we could allocate beyond the recommendedMaxWorkingSetSize |
289 | 292 | constexpr static double default_low_watermark_ratio_unified = 1.4; |
@@ -375,17 +378,5 @@ class MPSHeapAllocatorImpl |
375 | 378 | }; |
376 | 379 |
|
377 | 380 | } // namespace HeapAllocator |
378 | | - |
379 | | -// interface exposed to internal MPS operations |
380 | | - |
381 | | -// get the requested non-aligned size of an MTL buffer |
382 | | -ssize_t get_requested_buffer_size(void* ptr); |
383 | | -// retrieve the shape of a base tensor from a view tensor |
384 | | -IntArrayRef get_buffer_shape(void* ptr); |
385 | | -// set the shape of a base tensor from a view tensor |
386 | | -void set_buffer_shape(void* ptr, const IntArrayRef& shape); |
387 | | -// allocate a buffer from a specialized pool to import CPU scalars into GPU |
388 | | -DataPtr allocate_scalar_buffer(void* value, size_t size); |
389 | | - |
390 | 381 | } // namespace mps |
391 | 382 | } // namespace at |
0 commit comments