Skip to content

Commit 431d64d

Browse files
author
Jonah Williams
authored
[Impeller] reland: write directly to device buffer. (flutter#49691)
Reland of flutter/engine#49505 --- part of flutter#140804 We can't use the existing host buffer abstraction as that requires us to collect all allocations up front. By itself, this isn't sufficient for flutter#140804 , because we'll need a way to mark ranges as dirty and/or flush if we don't have host coherent memory. But by itself this change should be beneficial as we'll create fewer device buffers and should do less allocation in general. The size of the device buffers is 1024 Kb, somewhat arbitrarily chosen.
1 parent fda0c29 commit 431d64d

File tree

102 files changed

+685
-481
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+685
-481
lines changed

ci/licenses_golden/excluded_files

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
../../../flutter/impeller/entity/contents/filters/directional_gaussian_blur_filter_contents_unittests.cc
147147
../../../flutter/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc
148148
../../../flutter/impeller/entity/contents/filters/inputs/filter_input_unittests.cc
149+
../../../flutter/impeller/entity/contents/host_buffer_unittests.cc
149150
../../../flutter/impeller/entity/contents/test
150151
../../../flutter/impeller/entity/contents/tiled_texture_contents_unittests.cc
151152
../../../flutter/impeller/entity/contents/vertices_contents_unittests.cc
@@ -187,7 +188,6 @@
187188
../../../flutter/impeller/renderer/compute_subgroup_unittests.cc
188189
../../../flutter/impeller/renderer/compute_unittests.cc
189190
../../../flutter/impeller/renderer/device_buffer_unittests.cc
190-
../../../flutter/impeller/renderer/host_buffer_unittests.cc
191191
../../../flutter/impeller/renderer/pipeline_descriptor_unittests.cc
192192
../../../flutter/impeller/renderer/pool_unittests.cc
193193
../../../flutter/impeller/renderer/renderer_dart_unittests.cc

impeller/aiks/aiks_context.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "impeller/aiks/aiks_context.h"
66

7+
#include "fml/closure.h"
78
#include "impeller/aiks/picture.h"
89
#include "impeller/typographer/typographer_context.h"
910

@@ -40,11 +41,18 @@ ContentContext& AiksContext::GetContentContext() const {
4041
return *content_context_;
4142
}
4243

43-
bool AiksContext::Render(const Picture& picture, RenderTarget& render_target) {
44+
bool AiksContext::Render(const Picture& picture,
45+
RenderTarget& render_target,
46+
bool reset_host_buffer) {
4447
if (!IsValid()) {
4548
return false;
4649
}
4750

51+
fml::ScopedCleanupClosure closure([&]() {
52+
if (reset_host_buffer) {
53+
content_context_->GetTransientsBuffer().Reset();
54+
}
55+
});
4856
if (picture.pass) {
4957
return picture.pass->Render(*content_context_, render_target);
5058
}

impeller/aiks/aiks_context.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ class AiksContext {
3939

4040
ContentContext& GetContentContext() const;
4141

42-
bool Render(const Picture& picture, RenderTarget& render_target);
42+
bool Render(const Picture& picture,
43+
RenderTarget& render_target,
44+
bool reset_host_buffer);
4345

4446
private:
4547
std::shared_ptr<Context> context_;

impeller/aiks/aiks_playground.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ bool AiksPlayground::OpenPlaygroundHere(AiksPlaygroundCallback callback) {
5353
if (!picture.has_value()) {
5454
return false;
5555
}
56-
return renderer.Render(*picture, render_target);
56+
return renderer.Render(*picture, render_target, true);
5757
});
5858
}
5959

impeller/aiks/picture.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ std::shared_ptr<Texture> Picture::RenderToTexture(
8484
return nullptr;
8585
}
8686

87-
if (!context.Render(*this, target)) {
87+
if (!context.Render(*this, target, false)) {
8888
VALIDATION_LOG << "Could not render Picture to Texture.";
8989
return nullptr;
9090
}

impeller/base/allocation.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#define FLUTTER_IMPELLER_BASE_ALLOCATION_H_
77

88
#include <cstdint>
9-
#include <limits>
109
#include <memory>
1110

1211
#include "flutter/fml/mapping.h"

impeller/core/buffer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ class Buffer {
1616
public:
1717
virtual ~Buffer();
1818

19-
virtual std::shared_ptr<const DeviceBuffer> GetDeviceBuffer(
20-
Allocator& allocator) const = 0;
19+
virtual std::shared_ptr<const DeviceBuffer> GetDeviceBuffer() const = 0;
2120
};
2221

2322
} // namespace impeller

impeller/core/device_buffer.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ DeviceBuffer::DeviceBuffer(DeviceBufferDescriptor desc) : desc_(desc) {}
1111
DeviceBuffer::~DeviceBuffer() = default;
1212

1313
// |Buffer|
14-
std::shared_ptr<const DeviceBuffer> DeviceBuffer::GetDeviceBuffer(
15-
Allocator& allocator) const {
14+
std::shared_ptr<const DeviceBuffer> DeviceBuffer::GetDeviceBuffer() const {
1615
return shared_from_this();
1716
}
1817

18+
void DeviceBuffer::Flush(std::optional<Range> range) const {}
19+
1920
BufferView DeviceBuffer::AsBufferView() const {
2021
BufferView view;
2122
view.buffer = shared_from_this();

impeller/core/device_buffer.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ class DeviceBuffer : public Buffer,
3838
uint16_t row_bytes) const;
3939

4040
// |Buffer|
41-
std::shared_ptr<const DeviceBuffer> GetDeviceBuffer(
42-
Allocator& allocator) const;
41+
std::shared_ptr<const DeviceBuffer> GetDeviceBuffer() const;
4342

4443
const DeviceBufferDescriptor& GetDeviceBufferDescriptor() const;
4544

4645
virtual uint8_t* OnGetContents() const = 0;
4746

47+
/// Make any pending writes visible to the GPU.
48+
///
49+
/// This method must be called if the device pointer provided by
50+
/// [OnGetContents] is written to without using [CopyHostBuffer]. On Devices
51+
/// with coherent host memory, this method will not perform extra work.
52+
///
53+
/// If the range is not provided, the entire buffer is flushed.
54+
virtual void Flush(std::optional<Range> range = std::nullopt) const;
55+
4856
protected:
4957
const DeviceBufferDescriptor desc_;
5058

impeller/core/host_buffer.cc

Lines changed: 122 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -4,143 +4,196 @@
44

55
#include "impeller/core/host_buffer.h"
66

7-
#include <algorithm>
87
#include <cstring>
9-
10-
#include "flutter/fml/logging.h"
8+
#include <tuple>
119

1210
#include "impeller/core/allocator.h"
1311
#include "impeller/core/buffer_view.h"
1412
#include "impeller/core/device_buffer.h"
13+
#include "impeller/core/device_buffer_descriptor.h"
14+
#include "impeller/core/formats.h"
1515

1616
namespace impeller {
1717

18-
std::shared_ptr<HostBuffer> HostBuffer::Create() {
19-
return std::shared_ptr<HostBuffer>(new HostBuffer());
18+
constexpr size_t kAllocatorBlockSize = 1024000; // 1024 Kb.
19+
20+
std::shared_ptr<HostBuffer> HostBuffer::Create(
21+
const std::shared_ptr<Allocator>& allocator) {
22+
return std::shared_ptr<HostBuffer>(new HostBuffer(allocator));
2023
}
2124

22-
HostBuffer::HostBuffer() = default;
25+
HostBuffer::HostBuffer(const std::shared_ptr<Allocator>& allocator)
26+
: allocator_(allocator) {
27+
DeviceBufferDescriptor desc;
28+
desc.size = kAllocatorBlockSize;
29+
desc.storage_mode = StorageMode::kHostVisible;
30+
for (auto i = 0u; i < kHostBufferArenaSize; i++) {
31+
device_buffers_[i].push_back(allocator->CreateBuffer(desc));
32+
}
33+
}
2334

2435
HostBuffer::~HostBuffer() = default;
2536

2637
void HostBuffer::SetLabel(std::string label) {
27-
state_->label = std::move(label);
38+
label_ = std::move(label);
2839
}
2940

3041
BufferView HostBuffer::Emplace(const void* buffer,
3142
size_t length,
3243
size_t align) {
33-
auto [device_buffer, range] = state_->Emplace(buffer, length, align);
44+
auto [data, range, device_buffer] = EmplaceInternal(buffer, length, align);
3445
if (!device_buffer) {
3546
return {};
3647
}
37-
return BufferView{state_, device_buffer, range};
48+
return BufferView{std::move(device_buffer), data, range};
3849
}
3950

4051
BufferView HostBuffer::Emplace(const void* buffer, size_t length) {
41-
auto [device_buffer, range] = state_->Emplace(buffer, length);
52+
auto [data, range, device_buffer] = EmplaceInternal(buffer, length);
4253
if (!device_buffer) {
4354
return {};
4455
}
45-
return BufferView{state_, device_buffer, range};
56+
return BufferView{std::move(device_buffer), data, range};
4657
}
4758

4859
BufferView HostBuffer::Emplace(size_t length,
4960
size_t align,
5061
const EmplaceProc& cb) {
51-
auto [buffer, range] = state_->Emplace(length, align, cb);
52-
if (!buffer) {
62+
auto [data, range, device_buffer] = EmplaceInternal(length, align, cb);
63+
if (!device_buffer) {
5364
return {};
5465
}
55-
return BufferView{state_, buffer, range};
56-
}
57-
58-
std::shared_ptr<const DeviceBuffer> HostBuffer::GetDeviceBuffer(
59-
Allocator& allocator) const {
60-
return state_->GetDeviceBuffer(allocator);
66+
return BufferView{std::move(device_buffer), data, range};
6167
}
6268

63-
void HostBuffer::Reset() {
64-
state_->Reset();
69+
HostBuffer::TestStateQuery HostBuffer::GetStateForTest() {
70+
return HostBuffer::TestStateQuery{
71+
.current_frame = frame_index_,
72+
.current_buffer = current_buffer_,
73+
.total_buffer_count = device_buffers_[frame_index_].size(),
74+
};
6575
}
6676

67-
size_t HostBuffer::GetSize() const {
68-
return state_->GetReservedLength();
69-
}
70-
71-
size_t HostBuffer::GetLength() const {
72-
return state_->GetLength();
77+
void HostBuffer::MaybeCreateNewBuffer(size_t required_size) {
78+
current_buffer_++;
79+
if (current_buffer_ >= device_buffers_[frame_index_].size()) {
80+
FML_DCHECK(required_size <= kAllocatorBlockSize);
81+
DeviceBufferDescriptor desc;
82+
desc.size = kAllocatorBlockSize;
83+
desc.storage_mode = StorageMode::kHostVisible;
84+
device_buffers_[frame_index_].push_back(allocator_->CreateBuffer(desc));
85+
}
86+
offset_ = 0;
7387
}
7488

75-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
76-
size_t length,
77-
size_t align,
78-
const EmplaceProc& cb) {
89+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
90+
HostBuffer::EmplaceInternal(size_t length,
91+
size_t align,
92+
const EmplaceProc& cb) {
7993
if (!cb) {
8094
return {};
8195
}
96+
97+
// If the requested allocation is bigger than the block size, create a one-off
98+
// device buffer and write to that.
99+
if (length > kAllocatorBlockSize) {
100+
DeviceBufferDescriptor desc;
101+
desc.size = length;
102+
desc.storage_mode = StorageMode::kHostVisible;
103+
auto device_buffer = allocator_->CreateBuffer(desc);
104+
if (!device_buffer) {
105+
return {};
106+
}
107+
if (cb) {
108+
cb(device_buffer->OnGetContents());
109+
device_buffer->Flush(Range{0, length});
110+
}
111+
return std::make_tuple(device_buffer->OnGetContents(), Range{0, length},
112+
device_buffer);
113+
}
114+
82115
auto old_length = GetLength();
83-
if (!Truncate(old_length + length)) {
84-
return {};
116+
if (old_length + length > kAllocatorBlockSize) {
117+
MaybeCreateNewBuffer(length);
85118
}
86-
generation++;
87-
cb(GetBuffer() + old_length);
119+
old_length = GetLength();
120+
121+
auto current_buffer = GetCurrentBuffer();
122+
cb(current_buffer->OnGetContents() + old_length);
123+
current_buffer->Flush(Range{old_length, length});
88124

89-
return std::make_pair(GetBuffer(), Range{old_length, length});
125+
offset_ += length;
126+
auto contents = current_buffer->OnGetContents();
127+
return std::make_tuple(contents, Range{old_length, length},
128+
std::move(current_buffer));
90129
}
91130

92-
std::shared_ptr<const DeviceBuffer>
93-
HostBuffer::HostBufferState::GetDeviceBuffer(Allocator& allocator) const {
94-
if (generation == device_buffer_generation) {
95-
return device_buffer;
96-
}
97-
auto new_buffer = allocator.CreateBufferWithCopy(GetBuffer(), GetLength());
98-
if (!new_buffer) {
99-
return nullptr;
131+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
132+
HostBuffer::EmplaceInternal(const void* buffer, size_t length) {
133+
// If the requested allocation is bigger than the block size, create a one-off
134+
// device buffer and write to that.
135+
if (length > kAllocatorBlockSize) {
136+
DeviceBufferDescriptor desc;
137+
desc.size = length;
138+
desc.storage_mode = StorageMode::kHostVisible;
139+
auto device_buffer = allocator_->CreateBuffer(desc);
140+
if (!device_buffer) {
141+
return {};
142+
}
143+
if (buffer) {
144+
if (!device_buffer->CopyHostBuffer(static_cast<const uint8_t*>(buffer),
145+
Range{0, length})) {
146+
return {};
147+
}
148+
}
149+
return std::make_tuple(device_buffer->OnGetContents(), Range{0, length},
150+
device_buffer);
100151
}
101-
new_buffer->SetLabel(label);
102-
device_buffer_generation = generation;
103-
device_buffer = std::move(new_buffer);
104-
return device_buffer;
105-
}
106152

107-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
108-
const void* buffer,
109-
size_t length) {
110153
auto old_length = GetLength();
111-
if (!Truncate(old_length + length)) {
112-
return {};
154+
if (old_length + length > kAllocatorBlockSize) {
155+
MaybeCreateNewBuffer(length);
113156
}
114-
generation++;
157+
old_length = GetLength();
158+
159+
auto current_buffer = GetCurrentBuffer();
115160
if (buffer) {
116-
::memmove(GetBuffer() + old_length, buffer, length);
161+
::memmove(current_buffer->OnGetContents() + old_length, buffer, length);
162+
current_buffer->Flush(Range{old_length, length});
117163
}
118-
return std::make_pair(GetBuffer(), Range{old_length, length});
164+
offset_ += length;
165+
auto contents = current_buffer->OnGetContents();
166+
return std::make_tuple(contents, Range{old_length, length},
167+
std::move(current_buffer));
119168
}
120169

121-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
122-
const void* buffer,
123-
size_t length,
124-
size_t align) {
170+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
171+
HostBuffer::EmplaceInternal(const void* buffer, size_t length, size_t align) {
125172
if (align == 0 || (GetLength() % align) == 0) {
126-
return Emplace(buffer, length);
173+
return EmplaceInternal(buffer, length);
127174
}
128175

129176
{
130-
auto [buffer, range] = Emplace(nullptr, align - (GetLength() % align));
177+
auto [buffer, range, device_buffer] =
178+
EmplaceInternal(nullptr, align - (GetLength() % align));
131179
if (!buffer) {
132180
return {};
133181
}
134182
}
135183

136-
return Emplace(buffer, length);
184+
return EmplaceInternal(buffer, length);
137185
}
138186

139-
void HostBuffer::HostBufferState::Reset() {
140-
generation += 1;
141-
device_buffer = nullptr;
142-
bool did_truncate = Truncate(0);
143-
FML_CHECK(did_truncate);
187+
void HostBuffer::Reset() {
188+
// When resetting the host buffer state at the end of the frame, check if
189+
// there are any unused buffers and remove them.
190+
while (device_buffers_[frame_index_].size() > current_buffer_ + 1) {
191+
device_buffers_[frame_index_].pop_back();
192+
}
193+
194+
offset_ = 0u;
195+
current_buffer_ = 0u;
196+
frame_index_ = (frame_index_ + 1) % kHostBufferArenaSize;
144197
}
145198

146199
} // namespace impeller

0 commit comments

Comments
 (0)