Skip to content

Commit 899422e

Browse files
authored
GH-39301: [Archery][CI][Integration] Add nanoarrow to archery + integration setup (#39302)
### Rationale for this change The ability to add integration testing was added in nanoarrow however, the infrastructure for running these tests currently lives in the arrow monorepo. ### What changes are included in this PR? - Added the relevant code to Archery such that these tests can be run - Added the relevant scripts/environment variables to CI such that these tests run in the integration CI job ### Are these changes tested? Yes, via the "Integration" CI job. ### Are there any user-facing changes? No. This PR still needs #41264 for the integration tests to pass. * Closes: #39301 * GitHub Issue: #39301 Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net> Co-authored-by: Dewey Dunnington <dewey@voltrondata.com> Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
1 parent 7aea8bf commit 899422e

8 files changed

Lines changed: 223 additions & 3 deletions

File tree

.github/workflows/integration.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ jobs:
7575
with:
7676
repository: apache/arrow-rs
7777
path: rust
78+
- name: Checkout Arrow nanoarrow
79+
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
80+
with:
81+
repository: apache/arrow-nanoarrow
82+
path: nanoarrow
7883
- name: Free up disk space
7984
run: |
8085
ci/scripts/util_free_space.sh
@@ -97,6 +102,7 @@ jobs:
97102
run: >
98103
archery docker run \
99104
-e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \
105+
-e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \
100106
-e ARCHERY_INTEGRATION_WITH_RUST=1 \
101107
conda-integration
102108
- name: Docker Push

ci/scripts/integration_arrow_build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ build_dir=${2}
3030

3131
${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir}
3232

33+
${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir}
34+
3335
if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then
3436
${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir}
3537
fi

ci/scripts/nanoarrow_build.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
set -e
21+
22+
arrow_dir=${1}
23+
source_dir=${1}/nanoarrow
24+
build_dir=${2}/nanoarrow
25+
26+
# This file is used to build the nanoarrow binaries needed for the archery
27+
# integration tests. Testing of the nanoarrow implementation in normal CI is handled
28+
# by github workflows in the arrow-nanoarrow repository.
29+
30+
if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then
31+
echo "====================================================================="
32+
echo "Not building nanoarrow"
33+
echo "====================================================================="
34+
exit 0;
35+
elif [ ! -d "${source_dir}" ]; then
36+
echo "====================================================================="
37+
echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository"
38+
echo "to arrow/nanoarrow before running the integration tests:"
39+
echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow"
40+
echo "====================================================================="
41+
exit 1;
42+
fi
43+
44+
set -x
45+
46+
mkdir -p ${build_dir}
47+
pushd ${build_dir}
48+
49+
cmake ${source_dir} -DNANOARROW_BUILD_INTEGRATION_TESTS=ON
50+
cmake --build .
51+
52+
popd

dev/archery/archery/cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,9 @@ def _set_default(opt, default):
738738
help='Include JavaScript in integration tests')
739739
@click.option('--with-go', type=bool, default=False,
740740
help='Include Go in integration tests')
741+
@click.option('--with-nanoarrow', type=bool, default=False,
742+
help='Include nanoarrow in integration tests',
743+
envvar="ARCHERY_INTEGRATION_WITH_NANOARROW")
741744
@click.option('--with-rust', type=bool, default=False,
742745
help='Include Rust in integration tests',
743746
envvar="ARCHERY_INTEGRATION_WITH_RUST")
@@ -776,7 +779,7 @@ def integration(with_all=False, random_seed=12345, **args):
776779

777780
gen_path = args['write_generated_json']
778781

779-
languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust']
782+
languages = ['cpp', 'csharp', 'java', 'js', 'go', 'nanoarrow', 'rust']
780783
formats = ['ipc', 'flight', 'c_data']
781784

782785
enabled_languages = 0

dev/archery/archery/integration/datagen.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,17 +1928,20 @@ def _temp_path():
19281928
.skip_tester('C#')
19291929
.skip_tester('Java')
19301930
.skip_tester('JS')
1931+
.skip_tester('nanoarrow')
19311932
.skip_tester('Rust'),
19321933

19331934
generate_binary_view_case()
19341935
.skip_tester('Java')
19351936
.skip_tester('JS')
1937+
.skip_tester('nanoarrow')
19361938
.skip_tester('Rust'),
19371939

19381940
generate_list_view_case()
19391941
.skip_tester('C#') # Doesn't support large list views
19401942
.skip_tester('Java')
19411943
.skip_tester('JS')
1944+
.skip_tester('nanoarrow')
19421945
.skip_tester('Rust'),
19431946

19441947
generate_extension_case()

dev/archery/archery/integration/runner.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from .tester_java import JavaTester
3737
from .tester_js import JSTester
3838
from .tester_csharp import CSharpTester
39+
from .tester_nanoarrow import NanoarrowTester
3940
from .util import guid, printer
4041
from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC
4142
from ..utils.source import ARROW_ROOT_DEFAULT
@@ -541,8 +542,8 @@ def get_static_json_files():
541542

542543
def run_all_tests(with_cpp=True, with_java=True, with_js=True,
543544
with_csharp=True, with_go=True, with_rust=False,
544-
run_ipc=False, run_flight=False, run_c_data=False,
545-
tempdir=None, **kwargs):
545+
with_nanoarrow=False, run_ipc=False, run_flight=False,
546+
run_c_data=False, tempdir=None, **kwargs):
546547
tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')
547548

548549
testers: List[Tester] = []
@@ -562,6 +563,9 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
562563
if with_go:
563564
testers.append(GoTester(**kwargs))
564565

566+
if with_nanoarrow:
567+
testers.append(NanoarrowTester(**kwargs))
568+
565569
if with_rust:
566570
testers.append(RustTester(**kwargs))
567571

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import functools
19+
import os
20+
21+
from . import cdata
22+
from .tester import Tester, CDataExporter, CDataImporter
23+
from ..utils.source import ARROW_ROOT_DEFAULT
24+
25+
26+
_NANOARROW_PATH = os.environ.get(
27+
"ARROW_NANOARROW_PATH",
28+
os.path.join(ARROW_ROOT_DEFAULT, "nanoarrow/cdata"),
29+
)
30+
31+
_INTEGRATION_DLL = os.path.join(
32+
_NANOARROW_PATH, "libnanoarrow_c_data_integration" + cdata.dll_suffix
33+
)
34+
35+
36+
class NanoarrowTester(Tester):
37+
PRODUCER = False
38+
CONSUMER = False
39+
FLIGHT_SERVER = False
40+
FLIGHT_CLIENT = False
41+
C_DATA_SCHEMA_EXPORTER = True
42+
C_DATA_ARRAY_EXPORTER = True
43+
C_DATA_SCHEMA_IMPORTER = True
44+
C_DATA_ARRAY_IMPORTER = True
45+
46+
name = "nanoarrow"
47+
48+
def validate(self, json_path, arrow_path, quirks=None):
49+
raise NotImplementedError()
50+
51+
def json_to_file(self, json_path, arrow_path):
52+
raise NotImplementedError()
53+
54+
def stream_to_file(self, stream_path, file_path):
55+
raise NotImplementedError()
56+
57+
def file_to_stream(self, file_path, stream_path):
58+
raise NotImplementedError()
59+
60+
def make_c_data_exporter(self):
61+
return NanoarrowCDataExporter(self.debug, self.args)
62+
63+
def make_c_data_importer(self):
64+
return NanoarrowCDataImporter(self.debug, self.args)
65+
66+
67+
_nanoarrow_c_data_entrypoints = """
68+
const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
69+
const char* json_path, struct ArrowSchema* out);
70+
71+
const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
72+
const char* json_path, struct ArrowSchema* schema);
73+
74+
const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
75+
const char* json_path, int num_batch, struct ArrowArray* out);
76+
77+
const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
78+
const char* json_path, int num_batch, struct ArrowArray* batch);
79+
80+
int64_t nanoarrow_BytesAllocated(void);
81+
"""
82+
83+
84+
@functools.lru_cache
85+
def _load_ffi(ffi, lib_path=_INTEGRATION_DLL):
86+
ffi.cdef(_nanoarrow_c_data_entrypoints)
87+
dll = ffi.dlopen(lib_path)
88+
return dll
89+
90+
91+
class _CDataBase:
92+
def __init__(self, debug, args):
93+
self.debug = debug
94+
self.args = args
95+
self.ffi = cdata.ffi()
96+
self.dll = _load_ffi(self.ffi)
97+
98+
def _check_nanoarrow_error(self, na_error):
99+
"""
100+
Check a `const char*` error return from an integration entrypoint.
101+
102+
A null means success, a non-empty string is an error message.
103+
The string is statically allocated on the nanoarrow side and does not
104+
need to be released.
105+
"""
106+
assert self.ffi.typeof(na_error) is self.ffi.typeof("const char*")
107+
if na_error != self.ffi.NULL:
108+
error = self.ffi.string(na_error).decode("utf8", errors="replace")
109+
raise RuntimeError(f"nanoarrow C Data Integration call failed: {error}")
110+
111+
112+
class NanoarrowCDataExporter(CDataExporter, _CDataBase):
113+
def export_schema_from_json(self, json_path, c_schema_ptr):
114+
na_error = self.dll.nanoarrow_CDataIntegration_ExportSchemaFromJson(
115+
str(json_path).encode(), c_schema_ptr
116+
)
117+
self._check_nanoarrow_error(na_error)
118+
119+
def export_batch_from_json(self, json_path, num_batch, c_array_ptr):
120+
na_error = self.dll.nanoarrow_CDataIntegration_ExportBatchFromJson(
121+
str(json_path).encode(), num_batch, c_array_ptr
122+
)
123+
self._check_nanoarrow_error(na_error)
124+
125+
@property
126+
def supports_releasing_memory(self):
127+
return True
128+
129+
def record_allocation_state(self):
130+
return self.dll.nanoarrow_BytesAllocated()
131+
132+
133+
class NanoarrowCDataImporter(CDataImporter, _CDataBase):
134+
def import_schema_and_compare_to_json(self, json_path, c_schema_ptr):
135+
na_error = self.dll.nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
136+
str(json_path).encode(), c_schema_ptr
137+
)
138+
self._check_nanoarrow_error(na_error)
139+
140+
def import_batch_and_compare_to_json(self, json_path, num_batch, c_array_ptr):
141+
na_error = self.dll.nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
142+
str(json_path).encode(), num_batch, c_array_ptr
143+
)
144+
self._check_nanoarrow_error(na_error)
145+
146+
@property
147+
def supports_releasing_memory(self):
148+
return True

docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,9 +1751,11 @@ services:
17511751
volumes: *conda-volumes
17521752
environment:
17531753
<<: [*common, *ccache]
1754+
ARCHERY_INTEGRATION_WITH_NANOARROW: 0
17541755
ARCHERY_INTEGRATION_WITH_RUST: 0
17551756
# Tell Archery where Arrow binaries are located
17561757
ARROW_CPP_EXE_PATH: /build/cpp/debug
1758+
ARROW_NANOARROW_PATH: /build/nanoarrow
17571759
ARROW_RUST_EXE_PATH: /build/rust/debug
17581760
command:
17591761
["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&

0 commit comments

Comments
 (0)