|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +#include "benchmark/benchmark.h" |
| 19 | + |
| 20 | +#include <algorithm> |
| 21 | +#include <cstdint> |
| 22 | +#include <cstring> |
| 23 | +#include <memory> |
| 24 | +#include <random> |
| 25 | +#include <string> |
| 26 | +#include <vector> |
| 27 | + |
| 28 | +#include "arrow/buffer.h" |
| 29 | +#include "arrow/io/compressed.h" |
| 30 | +#include "arrow/io/memory.h" |
| 31 | +#include "arrow/result.h" |
| 32 | +#include "arrow/testing/gtest_util.h" |
| 33 | +#include "arrow/util/compression.h" |
| 34 | +#include "arrow/util/config.h" |
| 35 | +#include "arrow/util/logging.h" |
| 36 | +#include "arrow/util/macros.h" |
| 37 | + |
| 38 | +namespace arrow::io { |
| 39 | + |
| 40 | +using ::arrow::Compression; |
| 41 | + |
| 42 | +std::vector<uint8_t> MakeCompressibleData(int data_size) { |
| 43 | + // XXX This isn't a real-world corpus so doesn't really represent the |
| 44 | + // comparative qualities of the algorithms |
| 45 | + |
| 46 | + // First make highly compressible data |
| 47 | + std::string base_data = |
| 48 | + "Apache Arrow is a cross-language development platform for in-memory data"; |
| 49 | + int nrepeats = static_cast<int>(1 + data_size / base_data.size()); |
| 50 | + |
| 51 | + std::vector<uint8_t> data(base_data.size() * nrepeats); |
| 52 | + for (int i = 0; i < nrepeats; ++i) { |
| 53 | + std::memcpy(data.data() + i * base_data.size(), base_data.data(), base_data.size()); |
| 54 | + } |
| 55 | + data.resize(data_size); |
| 56 | + |
| 57 | + // Then randomly mutate some bytes so as to make things harder |
| 58 | + std::mt19937 engine(42); |
| 59 | + std::exponential_distribution<> offsets(0.05); |
| 60 | + std::uniform_int_distribution<> values(0, 255); |
| 61 | + |
| 62 | + int64_t pos = 0; |
| 63 | + while (pos < data_size) { |
| 64 | + data[pos] = static_cast<uint8_t>(values(engine)); |
| 65 | + pos += static_cast<int64_t>(offsets(engine)); |
| 66 | + } |
| 67 | + |
| 68 | + return data; |
| 69 | +} |
| 70 | + |
| 71 | +// Using a non-zero copy buffer reader to benchmark the non-zero copy path. |
| 72 | +class NonZeroCopyBufferReader final : public InputStream { |
| 73 | + public: |
| 74 | + NonZeroCopyBufferReader(std::shared_ptr<Buffer> buffer) : reader_(std::move(buffer)) {} |
| 75 | + |
| 76 | + bool supports_zero_copy() const override { return false; } |
| 77 | + |
| 78 | + Result<int64_t> Read(int64_t nbytes, void* out) override { |
| 79 | + return reader_.Read(nbytes, out); |
| 80 | + } |
| 81 | + |
| 82 | + Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override { |
| 83 | + // Testing the non-zero copy path like reading from local file or Object store, |
| 84 | + // so we need to allocate a buffer and copy the data. |
| 85 | + ARROW_ASSIGN_OR_RAISE(auto buf, ::arrow::AllocateResizableBuffer(nbytes)); |
| 86 | + ARROW_ASSIGN_OR_RAISE(int64_t size, Read(nbytes, buf->mutable_data())); |
| 87 | + ARROW_RETURN_NOT_OK(buf->Resize(size)); |
| 88 | + return buf; |
| 89 | + } |
| 90 | + Status Close() override { return reader_.Close(); } |
| 91 | + Result<int64_t> Tell() const override { return reader_.Tell(); } |
| 92 | + bool closed() const override { return reader_.closed(); } |
| 93 | + |
| 94 | + private: |
| 95 | + ::arrow::io::BufferReader reader_; |
| 96 | +}; |
| 97 | + |
| 98 | +enum class BufferReadMode { ProvidedByCaller, ReturnedByCallee }; |
| 99 | + |
| 100 | +template <typename BufReader, BufferReadMode Mode> |
| 101 | +static void CompressedInputStreamBenchmark(::benchmark::State& state, |
| 102 | + Compression::type compression) { |
| 103 | + const int64_t input_size = state.range(0); |
| 104 | + const int64_t batch_size = state.range(1); |
| 105 | + |
| 106 | + const std::vector<uint8_t> data = MakeCompressibleData(static_cast<int>(input_size)); |
| 107 | + auto codec = ::arrow::util::Codec::Create(compression).ValueOrDie(); |
| 108 | + int64_t max_compress_len = |
| 109 | + codec->MaxCompressedLen(static_cast<int64_t>(data.size()), data.data()); |
| 110 | + std::shared_ptr<::arrow::ResizableBuffer> buf = |
| 111 | + ::arrow::AllocateResizableBuffer(max_compress_len).ValueOrDie(); |
| 112 | + const int64_t compressed_length = |
| 113 | + codec |
| 114 | + ->Compress(static_cast<int64_t>(data.size()), data.data(), max_compress_len, |
| 115 | + buf->mutable_data()) |
| 116 | + .ValueOrDie(); |
| 117 | + ABORT_NOT_OK(buf->Resize(compressed_length)); |
| 118 | + for (auto _ : state) { |
| 119 | + state.PauseTiming(); |
| 120 | + auto reader = std::make_shared<BufReader>(buf); |
| 121 | + [[maybe_unused]] std::unique_ptr<Buffer> read_buffer; |
| 122 | + if constexpr (Mode == BufferReadMode::ProvidedByCaller) { |
| 123 | + read_buffer = ::arrow::AllocateBuffer(batch_size).ValueOrDie(); |
| 124 | + } |
| 125 | + state.ResumeTiming(); |
| 126 | + // Put `CompressedInputStream::Make` in timing. |
| 127 | + auto input_stream = |
| 128 | + ::arrow::io::CompressedInputStream::Make(codec.get(), reader).ValueOrDie(); |
| 129 | + auto remaining_size = input_size; |
| 130 | + while (remaining_size > 0) { |
| 131 | + if constexpr (Mode == BufferReadMode::ProvidedByCaller) { |
| 132 | + auto value = input_stream->Read(batch_size, read_buffer->mutable_data()); |
| 133 | + ABORT_NOT_OK(value); |
| 134 | + remaining_size -= value.ValueOrDie(); |
| 135 | + } else { |
| 136 | + auto value = input_stream->Read(batch_size); |
| 137 | + ABORT_NOT_OK(value); |
| 138 | + remaining_size -= value.ValueOrDie()->size(); |
| 139 | + } |
| 140 | + } |
| 141 | + } |
| 142 | + state.SetBytesProcessed(input_size * state.iterations()); |
| 143 | +} |
| 144 | + |
| 145 | +template <Compression::type kCompression> |
| 146 | +static void CompressedInputStreamZeroCopyBufferProvidedByCaller( |
| 147 | + ::benchmark::State& state) { |
| 148 | + CompressedInputStreamBenchmark<::arrow::io::BufferReader, |
| 149 | + BufferReadMode::ProvidedByCaller>(state, kCompression); |
| 150 | +} |
| 151 | + |
| 152 | +template <Compression::type kCompression> |
| 153 | +static void CompressedInputStreamNonZeroCopyBufferProvidedByCaller( |
| 154 | + ::benchmark::State& state) { |
| 155 | + CompressedInputStreamBenchmark<NonZeroCopyBufferReader, |
| 156 | + BufferReadMode::ProvidedByCaller>(state, kCompression); |
| 157 | +} |
| 158 | + |
| 159 | +template <Compression::type kCompression> |
| 160 | +static void CompressedInputStreamZeroCopyBufferReturnedByCallee( |
| 161 | + ::benchmark::State& state) { |
| 162 | + CompressedInputStreamBenchmark<::arrow::io::BufferReader, |
| 163 | + BufferReadMode::ReturnedByCallee>(state, kCompression); |
| 164 | +} |
| 165 | + |
| 166 | +template <Compression::type kCompression> |
| 167 | +static void CompressedInputStreamNonZeroCopyBufferReturnedByCallee( |
| 168 | + ::benchmark::State& state) { |
| 169 | + CompressedInputStreamBenchmark<NonZeroCopyBufferReader, |
| 170 | + BufferReadMode::ReturnedByCallee>(state, kCompression); |
| 171 | +} |
| 172 | + |
| 173 | +static void CompressedInputArguments(::benchmark::internal::Benchmark* b) { |
| 174 | + b->ArgNames({"num_bytes", "batch_size"}) |
| 175 | + ->Args({8 * 1024, 8 * 1024}) |
| 176 | + ->Args({64 * 1024, 8 * 1024}) |
| 177 | + ->Args({64 * 1024, 64 * 1024}) |
| 178 | + ->Args({1024 * 1024, 8 * 1024}) |
| 179 | + ->Args({1024 * 1024, 64 * 1024}) |
| 180 | + ->Args({1024 * 1024, 1024 * 1024}); |
| 181 | +} |
| 182 | + |
| 183 | +#ifdef ARROW_WITH_LZ4 |
| 184 | +// Benchmark LZ4 because it's lightweight, which makes benchmarking focused on the |
| 185 | +// overhead of the compression input stream. |
| 186 | +BENCHMARK_TEMPLATE(CompressedInputStreamZeroCopyBufferProvidedByCaller, |
| 187 | + Compression::LZ4_FRAME) |
| 188 | + ->Apply(CompressedInputArguments); |
| 189 | +BENCHMARK_TEMPLATE(CompressedInputStreamNonZeroCopyBufferProvidedByCaller, |
| 190 | + Compression::LZ4_FRAME) |
| 191 | + ->Apply(CompressedInputArguments); |
| 192 | +BENCHMARK_TEMPLATE(CompressedInputStreamZeroCopyBufferReturnedByCallee, |
| 193 | + Compression::LZ4_FRAME) |
| 194 | + ->Apply(CompressedInputArguments); |
| 195 | +BENCHMARK_TEMPLATE(CompressedInputStreamNonZeroCopyBufferReturnedByCallee, |
| 196 | + Compression::LZ4_FRAME) |
| 197 | + ->Apply(CompressedInputArguments); |
| 198 | +#endif |
| 199 | + |
| 200 | +} // namespace arrow::io |
0 commit comments