Skip to content

Commit 1c1fa74

Browse files
authored
GH-39303: [Archery][Benchmarking] Allow setting C++ repetition min time (#39324)
### Rationale for this change We want to be able to increase the number of repetitions for each C++ micro-benchmark without increasing the total runtime. ### What changes are included in this PR? * Add a `--repetition-min-time` argument to set the repetition duration in seconds * Add a `--cpp-benchmark-extras` argument to pass arbitrary arguments to Google Benchmark executables * Add a couple tests with multiple benchmark repetitions ### Are these changes tested? Not entirely. Command-line argument passing is not unit-tested. ### Are there any user-facing changes? No. * Closes: #39303 Authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent faa9d80 commit 1c1fa74

6 files changed

Lines changed: 218 additions & 18 deletions

File tree

.github/workflows/archery.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ jobs:
6363
- name: Install pygit2 binary wheel
6464
run: pip install pygit2 --only-binary pygit2
6565
- name: Install Archery, Crossbow- and Test Dependencies
66-
run: pip install pytest responses -e dev/archery[all]
66+
run: |
67+
pip install -e dev/archery[all]
68+
pip install -r dev/archery/requirements-test.txt
6769
- name: Archery Unittests
6870
working-directory: dev/archery
6971
run: pytest -v archery

dev/archery/archery/benchmark/google.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ class GoogleBenchmarkCommand(Command):
3737
notably `--benchmark_filter`, `--benchmark_format`, etc...
3838
"""
3939

40-
def __init__(self, benchmark_bin, benchmark_filter=None):
40+
def __init__(self, benchmark_bin, benchmark_filter=None, benchmark_extras=None):
4141
self.bin = benchmark_bin
4242
self.benchmark_filter = benchmark_filter
43+
self.benchmark_extras = benchmark_extras or []
4344

4445
def list_benchmarks(self):
4546
argv = ["--benchmark_list_tests"]
@@ -49,16 +50,19 @@ def list_benchmarks(self):
4950
stderr=subprocess.PIPE)
5051
return str.splitlines(result.stdout.decode("utf-8"))
5152

52-
def results(self, repetitions=1):
53+
def results(self, repetitions=1, repetition_min_time=None):
5354
with NamedTemporaryFile() as out:
54-
argv = ["--benchmark_repetitions={}".format(repetitions),
55-
"--benchmark_out={}".format(out.name),
55+
argv = [f"--benchmark_repetitions={repetitions}",
56+
f"--benchmark_out={out.name}",
5657
"--benchmark_out_format=json"]
5758

59+
if repetition_min_time is not None:
60+
argv.append(f"--benchmark_min_time={repetition_min_time:.6f}")
61+
5862
if self.benchmark_filter:
59-
argv.append(
60-
"--benchmark_filter={}".format(self.benchmark_filter)
61-
)
63+
argv.append(f"--benchmark_filter={self.benchmark_filter}")
64+
65+
argv += self.benchmark_extras
6266

6367
self.run(*argv, check=True)
6468
return json.load(out)

dev/archery/archery/benchmark/runner.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,11 @@ def regex_filter(re_expr):
4242

4343
class BenchmarkRunner:
4444
def __init__(self, suite_filter=None, benchmark_filter=None,
45-
repetitions=DEFAULT_REPETITIONS):
45+
repetitions=DEFAULT_REPETITIONS, repetition_min_time=None):
4646
self.suite_filter = suite_filter
4747
self.benchmark_filter = benchmark_filter
4848
self.repetitions = repetitions
49+
self.repetition_min_time = repetition_min_time
4950

5051
@property
5152
def suites(self):
@@ -107,9 +108,10 @@ def __repr__(self):
107108
class CppBenchmarkRunner(BenchmarkRunner):
108109
""" Run suites from a CMakeBuild. """
109110

110-
def __init__(self, build, **kwargs):
111+
def __init__(self, build, benchmark_extras, **kwargs):
111112
""" Initialize a CppBenchmarkRunner. """
112113
self.build = build
114+
self.benchmark_extras = benchmark_extras
113115
super().__init__(**kwargs)
114116

115117
@staticmethod
@@ -142,14 +144,17 @@ def suites_binaries(self):
142144

143145
def suite(self, name, suite_bin):
144146
""" Returns the resulting benchmarks for a given suite. """
145-
suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter)
147+
suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter,
148+
self.benchmark_extras)
146149

147150
# Ensure there will be data
148151
benchmark_names = suite_cmd.list_benchmarks()
149152
if not benchmark_names:
150153
return None
151154

152-
results = suite_cmd.results(repetitions=self.repetitions)
155+
results = suite_cmd.results(
156+
repetitions=self.repetitions,
157+
repetition_min_time=self.repetition_min_time)
153158
benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
154159
return BenchmarkSuite(name, benchmarks)
155160

@@ -252,6 +257,7 @@ def suite(self, name):
252257
if not benchmark_names:
253258
return None
254259

260+
# TODO: support `repetition_min_time`
255261
results = suite_cmd.results(repetitions=self.repetitions)
256262
benchmarks = JavaMicrobenchmarkHarness.from_json(results)
257263
return BenchmarkSuite(name, benchmarks)

dev/archery/archery/cli.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,10 @@ def check_language(ctx, param, value):
377377
"Can be stacked. For language=java"),
378378
click.option("--cmake-extras", type=str, multiple=True,
379379
help="Extra flags/options to pass to cmake invocation. "
380-
"Can be stacked. For language=cpp")
380+
"Can be stacked. For language=cpp"),
381+
click.option("--cpp-benchmark-extras", type=str, multiple=True,
382+
help="Extra flags/options to pass to C++ benchmark executables. "
383+
"Can be stacked. For language=cpp"),
381384
]
382385

383386
cmd = java_toolchain_options(cmd)
@@ -440,12 +443,16 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
440443
@click.option("--repetitions", type=int, default=-1,
441444
help=("Number of repetitions of each benchmark. Increasing "
442445
"may improve result precision. "
443-
"[default: 1 for cpp, 5 for java"))
446+
"[default: 1 for cpp, 5 for java]"))
447+
@click.option("--repetition-min-time", type=float, default=None,
448+
help=("Minimum duration of each repetition in seconds. "
449+
"Currently only supported for language=cpp. "
450+
"[default: use runner-specific defaults]"))
444451
@click.pass_context
445452
def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
446453
java_home, java_options, build_extras, benchmark_extras,
447454
language, suite_filter, benchmark_filter, repetitions,
448-
**kwargs):
455+
repetition_min_time, cpp_benchmark_extras, **kwargs):
449456
""" Run benchmark suite.
450457
451458
This command will run the benchmark suite for a single build. This is
@@ -468,13 +475,18 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
468475
\b
469476
archery benchmark run
470477
478+
\b
479+
# Run the benchmarks on an existing build directory
480+
\b
481+
archery benchmark run /build/cpp
482+
471483
\b
472484
# Run the benchmarks on current previous commit
473485
\b
474486
archery benchmark run HEAD~1
475487
476488
\b
477-
# Run the benchmarks on current previous commit
489+
# Run the benchmarks on current git workspace and output results as a JSON file.
478490
\b
479491
archery benchmark run --output=run.json
480492
"""
@@ -488,8 +500,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
488500
repetitions = repetitions if repetitions != -1 else 1
489501
runner_base = CppBenchmarkRunner.from_rev_or_path(
490502
src, root, rev_or_path, conf,
491-
repetitions=repetitions,
492-
suite_filter=suite_filter, benchmark_filter=benchmark_filter)
503+
repetitions=repetitions, repetition_min_time=repetition_min_time,
504+
suite_filter=suite_filter, benchmark_filter=benchmark_filter,
505+
benchmark_extras=cpp_benchmark_extras)
493506

494507
elif language == "java":
495508
for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:

dev/archery/archery/tests/test_benchmarks.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,53 @@ def test_static_runner_from_json_not_a_regression():
8181
assert not comparison.regression
8282

8383

84+
def test_static_runner_from_json_multiple_values_not_a_regression():
85+
# Same as above, but with multiple repetitions
86+
archery_result = {
87+
"suites": [
88+
{
89+
"name": "arrow-value-parsing-benchmark",
90+
"benchmarks": [
91+
{
92+
"name": "FloatParsing<DoubleType>",
93+
"unit": "items_per_second",
94+
"less_is_better": False,
95+
"values": [
96+
93588476.22327498,
97+
94873831.3818328,
98+
95593675.20810866,
99+
95797325.6543961,
100+
96134728.05794072
101+
],
102+
"time_unit": "ns",
103+
"times": [
104+
10537.724568456104,
105+
10575.162068480413,
106+
10599.271208720838,
107+
10679.028059166194,
108+
10827.995119861762
109+
],
110+
"counters": {
111+
"family_index": 0,
112+
"per_family_instance_index": 0,
113+
"run_name": "FloatParsing<DoubleType>",
114+
"repetitions": 5,
115+
"repetition_index": 0,
116+
"threads": 1,
117+
"iterations": 10656
118+
}
119+
}
120+
]
121+
}
122+
]
123+
}
124+
125+
contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
126+
baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
127+
[comparison] = RunnerComparator(contender, baseline).comparisons
128+
assert not comparison.regression
129+
130+
84131
def test_static_runner_from_json_regression():
85132
archery_result = {
86133
"suites": [
@@ -114,6 +161,58 @@ def test_static_runner_from_json_regression():
114161
assert comparison.regression
115162

116163

164+
def test_static_runner_from_json_multiple_values_regression():
165+
# Same as above, but with multiple repetitions
166+
archery_result = {
167+
"suites": [
168+
{
169+
"name": "arrow-value-parsing-benchmark",
170+
"benchmarks": [
171+
{
172+
"name": "FloatParsing<DoubleType>",
173+
"unit": "items_per_second",
174+
"less_is_better": False,
175+
"values": [
176+
93588476.22327498,
177+
94873831.3818328,
178+
95593675.20810866,
179+
95797325.6543961,
180+
96134728.05794072
181+
],
182+
"time_unit": "ns",
183+
"times": [
184+
10537.724568456104,
185+
10575.162068480413,
186+
10599.271208720838,
187+
10679.028059166194,
188+
10827.995119861762
189+
],
190+
"counters": {
191+
"family_index": 0,
192+
"per_family_instance_index": 0,
193+
"run_name": "FloatParsing<DoubleType>",
194+
"repetitions": 5,
195+
"repetition_index": 0,
196+
"threads": 1,
197+
"iterations": 10656
198+
}
199+
}
200+
]
201+
}
202+
]
203+
}
204+
205+
contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
206+
207+
# introduce artificial regression
208+
values = archery_result['suites'][0]['benchmarks'][0]['values']
209+
values[:] = [v * 2 for v in values]
210+
baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
211+
212+
[comparison] = RunnerComparator(contender, baseline).comparisons
213+
assert comparison.regression
214+
215+
117216
def test_benchmark_median():
118217
assert median([10]) == 10
119218
assert median([1, 2, 3]) == 2
@@ -381,3 +480,77 @@ def test_omits_aggregates():
381480
benchmark = GoogleBenchmark(name, [observation1, observation2])
382481
result = json.dumps(benchmark, cls=JsonEncoder)
383482
assert json.loads(result) == archery_result
483+
484+
485+
def test_multiple_observations():
486+
name = "FloatParsing<DoubleType>"
487+
google_results = [
488+
{
489+
'cpu_time': 10627.38199641615,
490+
'family_index': 0,
491+
'items_per_second': 94096551.75067839,
492+
'iterations': 9487,
493+
'name': 'FloatParsing<DoubleType>',
494+
'per_family_instance_index': 0,
495+
'real_time': 10628.84905663701,
496+
'repetition_index': 0,
497+
'repetitions': 3,
498+
'run_name': 'FloatParsing<DoubleType>',
499+
'run_type': 'iteration',
500+
'threads': 1,
501+
'time_unit': 'ns'
502+
},
503+
{
504+
'cpu_time': 10633.318014124594,
505+
'family_index': 0,
506+
'items_per_second': 94044022.63448404,
507+
'iterations': 9487,
508+
'name': 'FloatParsing<DoubleType>',
509+
'per_family_instance_index': 0,
510+
'real_time': 10634.858754122948,
511+
'repetition_index': 1,
512+
'repetitions': 3,
513+
'run_name': 'FloatParsing<DoubleType>',
514+
'run_type': 'iteration',
515+
'threads': 1,
516+
'time_unit': 'ns'
517+
},
518+
{
519+
'cpu_time': 10664.315484347,
520+
'family_index': 0,
521+
'items_per_second': 93770669.24434038,
522+
'iterations': 9487,
523+
'name': 'FloatParsing<DoubleType>',
524+
'per_family_instance_index': 0,
525+
'real_time': 10665.584589337563,
526+
'repetition_index': 2,
527+
'repetitions': 3,
528+
'run_name': 'FloatParsing<DoubleType>',
529+
'run_type': 'iteration',
530+
'threads': 1,
531+
'time_unit': 'ns'
532+
}
533+
]
534+
535+
archery_result = {
536+
'counters': {
537+
'family_index': 0,
538+
'iterations': 9487,
539+
'per_family_instance_index': 0,
540+
'repetition_index': 2,
541+
'repetitions': 3,
542+
'run_name': 'FloatParsing<DoubleType>',
543+
'threads': 1
544+
},
545+
'less_is_better': False,
546+
'name': 'FloatParsing<DoubleType>',
547+
'time_unit': 'ns',
548+
'times': [10628.84905663701, 10634.858754122948, 10665.584589337563],
549+
'unit': 'items_per_second',
550+
'values': [93770669.24434038, 94044022.63448404, 94096551.75067839]
551+
}
552+
553+
observations = [GoogleBenchmarkObservation(**g) for g in google_results]
554+
benchmark = GoogleBenchmark(name, observations)
555+
result = json.dumps(benchmark, cls=JsonEncoder)
556+
assert json.loads(result) == archery_result

dev/archery/requirements-test.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pytest
2+
responses

0 commit comments

Comments
 (0)