1- // Copyright 2020 Google LLC
1+ // Copyright 2023 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -33,6 +33,11 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
3333// BigQuery storage API.
3434//
3535// The BigQuery storage API can be used to read data stored in BigQuery.
36+ //
37+ // The v1beta1 API is not yet officially deprecated, and will go through a full
38+ // deprecation cycle (https://cloud.google.com/products#product-launch-stages)
39+ // before the service is turned down. However, new code should use the v1 API
40+ // going forward.
3641service BigQueryStorage {
3742 option (google.api.default_host ) = "bigquerystorage.googleapis.com" ;
3843 option (google.api.oauth_scopes ) =
@@ -49,7 +54,7 @@ service BigQueryStorage {
4954 // reached the end of each stream in the session, then all the data in the
5055 // table has been read.
5156 //
52- // Read sessions automatically expire 24 hours after they are created and do
57+ // Read sessions automatically expire 6 hours after they are created and do
5358 // not require manual clean-up by the caller.
5459 rpc CreateReadSession (CreateReadSessionRequest ) returns (ReadSession ) {
5560 option (google.api.http ) = {
@@ -60,7 +65,8 @@ service BigQueryStorage {
6065 body : "*"
6166 }
6267 };
63- option (google.api.method_signature ) = "table_reference,parent,requested_streams" ;
68+ option (google.api.method_signature ) =
69+ "table_reference,parent,requested_streams" ;
6470 }
6571
6672 // Reads rows from the table in the format prescribed by the read session.
@@ -82,15 +88,16 @@ service BigQueryStorage {
8288 // Creates additional streams for a ReadSession. This API can be used to
8389 // dynamically adjust the parallelism of a batch processing task upwards by
8490 // adding additional workers.
85- rpc BatchCreateReadSessionStreams (BatchCreateReadSessionStreamsRequest ) returns (BatchCreateReadSessionStreamsResponse ) {
91+ rpc BatchCreateReadSessionStreams (BatchCreateReadSessionStreamsRequest )
92+ returns (BatchCreateReadSessionStreamsResponse ) {
8693 option (google.api.http ) = {
8794 post : "/v1beta1/{session.name=projects/*/sessions/*}"
8895 body : "*"
8996 };
9097 option (google.api.method_signature ) = "session,requested_streams" ;
9198 }
9299
93- // Triggers the graceful termination of a single stream in a ReadSession. This
100+ // Causes a single stream in a ReadSession to gracefully stop . This
94101 // API can be used to dynamically adjust the parallelism of a batch processing
95102 // task downwards without losing data.
96103 //
@@ -125,7 +132,8 @@ service BigQueryStorage {
125132 // completion.
126133 //
127134 // This method is guaranteed to be idempotent.
128- rpc SplitReadStream (SplitReadStreamRequest ) returns (SplitReadStreamResponse ) {
135+ rpc SplitReadStream (SplitReadStreamRequest )
136+ returns (SplitReadStreamResponse ) {
129137 option (google.api.http ) = {
130138 get : "/v1beta1/{original_stream.name=projects/*/streams/*}"
131139 };
@@ -193,6 +201,40 @@ message ReadSession {
193201 ShardingStrategy sharding_strategy = 9 ;
194202}
195203
204+ // Data format for input or output data.
205+ enum DataFormat {
206+ // Data format is unspecified.
207+ DATA_FORMAT_UNSPECIFIED = 0 ;
208+
209+ // Avro is a standard open source row based file format.
210+ // See https://avro.apache.org/ for more details.
211+ AVRO = 1 ;
212+
213+ // Arrow is a standard open source column-based message format.
214+ // See https://arrow.apache.org/ for more details.
215+ ARROW = 3 ;
216+ }
217+
218+ // Strategy for distributing data among multiple streams in a read session.
219+ enum ShardingStrategy {
220+ // Same as LIQUID.
221+ SHARDING_STRATEGY_UNSPECIFIED = 0 ;
222+
223+ // Assigns data to each stream based on the client's read rate. The faster the
224+ // client reads from a stream, the more data is assigned to the stream. In
225+ // this strategy, it's possible to read all data from a single stream even if
226+ // there are other streams present.
227+ LIQUID = 1 ;
228+
229+ // Assigns data to each stream such that roughly the same number of rows can
230+ // be read from each stream. Because the server-side unit for assigning data
231+ // is collections of rows, the API does not guarantee that each stream will
232+ // return the same number or rows. Additionally, the limits are enforced based
233+ // on the number of pre-filtering rows, so some filters can lead to lopsided
234+ // assignments.
235+ BALANCED = 2 ;
236+ }
237+
196238// Creates a new read session, which may include additional options such as
197239// requested parallelism, projection filters and constraints.
198240message CreateReadSessionRequest {
@@ -225,45 +267,14 @@ message CreateReadSessionRequest {
225267 TableReadOptions read_options = 4 ;
226268
227269 // Data output format. Currently default to Avro.
270+ // DATA_FORMAT_UNSPECIFIED not supported.
228271 DataFormat format = 5 ;
229272
230273 // The strategy to use for distributing data among multiple streams. Currently
231274 // defaults to liquid sharding.
232275 ShardingStrategy sharding_strategy = 7 ;
233276}
234277
235- // Data format for input or output data.
236- enum DataFormat {
237- // Data format is unspecified.
238- DATA_FORMAT_UNSPECIFIED = 0 ;
239-
240- // Avro is a standard open source row based file format.
241- // See https://avro.apache.org/ for more details.
242- AVRO = 1 ;
243-
244- ARROW = 3 ;
245- }
246-
247- // Strategy for distributing data among multiple streams in a read session.
248- enum ShardingStrategy {
249- // Same as LIQUID.
250- SHARDING_STRATEGY_UNSPECIFIED = 0 ;
251-
252- // Assigns data to each stream based on the client's read rate. The faster the
253- // client reads from a stream, the more data is assigned to the stream. In
254- // this strategy, it's possible to read all data from a single stream even if
255- // there are other streams present.
256- LIQUID = 1 ;
257-
258- // Assigns data to each stream such that roughly the same number of rows can
259- // be read from each stream. Because the server-side unit for assigning data
260- // is collections of rows, the API does not guarantee that each stream will
261- // return the same number or rows. Additionally, the limits are enforced based
262- // on the number of pre-filtering rows, so some filters can lead to lopsided
263- // assignments.
264- BALANCED = 2 ;
265- }
266-
267278// Requesting row data via `ReadRows` must provide Stream position information.
268279message ReadRowsRequest {
269280 // Required. Identifier of the position in the stream to start reading from.
@@ -349,6 +360,19 @@ message ReadRowsResponse {
349360 // Throttling status. If unset, the latest response still describes
350361 // the current throttling status.
351362 ThrottleStatus throttle_status = 5 ;
363+
364+ // The schema for the read. If read_options.selected_fields is set, the
365+ // schema may be different from the table schema as it will only contain
366+ // the selected fields. This schema is equivalent to the one returned by
367+ // CreateSession. This field is only populated in the first ReadRowsResponse
368+ // RPC.
369+ oneof schema {
370+ // Output only. Avro schema.
371+ AvroSchema avro_schema = 7 [(google.api.field_behavior ) = OUTPUT_ONLY ];
372+
373+ // Output only. Arrow schema.
374+ ArrowSchema arrow_schema = 8 [(google.api.field_behavior ) = OUTPUT_ONLY ];
375+ }
352376}
353377
354378// Information needed to request additional streams for an established read
0 commit comments