Skip to content

Commit 3d497fa

Browse files
Google APIscopybara-github
authored andcommitted
docs: update documentation for bigquerystorage v1beta1
PiperOrigin-RevId: 531289380
1 parent ea7678c commit 3d497fa

6 files changed

Lines changed: 116 additions & 48 deletions

File tree

google/cloud/bigquery/storage/v1beta1/arrow.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

google/cloud/bigquery/storage/v1beta1/avro.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

google/cloud/bigquery/storage/v1beta1/bigquerystorage_v1beta1.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,4 @@ authentication:
1919
oauth:
2020
canonical_scopes: |-
2121
https://www.googleapis.com/auth/bigquery,
22-
https://www.googleapis.com/auth/bigquery.readonly,
2322
https://www.googleapis.com/auth/cloud-platform

google/cloud/bigquery/storage/v1beta1/read_options.proto

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -21,19 +21,64 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
2121

2222
// Options dictating how we read a table.
2323
message TableReadOptions {
24-
// Optional. Names of the fields in the table that should be read. If empty,
25-
// all fields will be read. If the specified field is a nested field, all the
26-
// sub-fields in the field will be selected. The output field order is
27-
// unrelated to the order of fields in selected_fields.
24+
// Optional. The names of the fields in the table to be returned. If no
25+
// field names are specified, then all fields in the table are returned.
26+
//
27+
// Nested fields -- the child elements of a STRUCT field -- can be selected
28+
// individually using their fully-qualified names, and will be returned as
29+
// record fields containing only the selected nested fields. If a STRUCT
30+
// field is specified in the selected fields list, all of the child elements
31+
// will be returned.
32+
//
33+
// As an example, consider a table with the following schema:
34+
//
35+
// {
36+
// "name": "struct_field",
37+
// "type": "RECORD",
38+
// "mode": "NULLABLE",
39+
// "fields": [
40+
// {
41+
// "name": "string_field1",
42+
// "type": "STRING",
43+
// . "mode": "NULLABLE"
44+
// },
45+
// {
46+
// "name": "string_field2",
47+
// "type": "STRING",
48+
// "mode": "NULLABLE"
49+
// }
50+
// ]
51+
// }
52+
//
53+
// Specifying "struct_field" in the selected fields list will result in a
54+
// read session schema with the following logical structure:
55+
//
56+
// struct_field {
57+
// string_field1
58+
// string_field2
59+
// }
60+
//
61+
// Specifying "struct_field.string_field1" in the selected fields list will
62+
// result in a read session schema with the following logical structure:
63+
//
64+
// struct_field {
65+
// string_field1
66+
// }
67+
//
68+
// The order of the fields in the read session schema is derived from the
69+
// table schema and does not correspond to the order in which the fields are
70+
// specified in this list.
2871
repeated string selected_fields = 1;
2972

3073
// Optional. SQL text filtering statement, similar to a WHERE clause in
31-
// a query. Aggregates are not supported.
74+
// a SQL query. Aggregates are not supported.
3275
//
3376
// Examples: "int_field > 5"
3477
// "date_field = CAST('2014-9-27' as DATE)"
3578
// "nullable_field is not NULL"
3679
// "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
3780
// "numeric_field BETWEEN 1.0 AND 5.0"
81+
//
82+
// Restricted to a maximum length for 1 MB.
3883
string row_restriction = 2;
3984
}

google/cloud/bigquery/storage/v1beta1/storage.proto

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -33,6 +33,11 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
3333
// BigQuery storage API.
3434
//
3535
// The BigQuery storage API can be used to read data stored in BigQuery.
36+
//
37+
// The v1beta1 API is not yet officially deprecated, and will go through a full
38+
// deprecation cycle (https://cloud.google.com/products#product-launch-stages)
39+
// before the service is turned down. However, new code should use the v1 API
40+
// going forward.
3641
service BigQueryStorage {
3742
option (google.api.default_host) = "bigquerystorage.googleapis.com";
3843
option (google.api.oauth_scopes) =
@@ -49,7 +54,7 @@ service BigQueryStorage {
4954
// reached the end of each stream in the session, then all the data in the
5055
// table has been read.
5156
//
52-
// Read sessions automatically expire 24 hours after they are created and do
57+
// Read sessions automatically expire 6 hours after they are created and do
5358
// not require manual clean-up by the caller.
5459
rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
5560
option (google.api.http) = {
@@ -60,7 +65,8 @@ service BigQueryStorage {
6065
body: "*"
6166
}
6267
};
63-
option (google.api.method_signature) = "table_reference,parent,requested_streams";
68+
option (google.api.method_signature) =
69+
"table_reference,parent,requested_streams";
6470
}
6571

6672
// Reads rows from the table in the format prescribed by the read session.
@@ -82,15 +88,16 @@ service BigQueryStorage {
8288
// Creates additional streams for a ReadSession. This API can be used to
8389
// dynamically adjust the parallelism of a batch processing task upwards by
8490
// adding additional workers.
85-
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
91+
rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest)
92+
returns (BatchCreateReadSessionStreamsResponse) {
8693
option (google.api.http) = {
8794
post: "/v1beta1/{session.name=projects/*/sessions/*}"
8895
body: "*"
8996
};
9097
option (google.api.method_signature) = "session,requested_streams";
9198
}
9299

93-
// Triggers the graceful termination of a single stream in a ReadSession. This
100+
// Causes a single stream in a ReadSession to gracefully stop. This
94101
// API can be used to dynamically adjust the parallelism of a batch processing
95102
// task downwards without losing data.
96103
//
@@ -125,7 +132,8 @@ service BigQueryStorage {
125132
// completion.
126133
//
127134
// This method is guaranteed to be idempotent.
128-
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
135+
rpc SplitReadStream(SplitReadStreamRequest)
136+
returns (SplitReadStreamResponse) {
129137
option (google.api.http) = {
130138
get: "/v1beta1/{original_stream.name=projects/*/streams/*}"
131139
};
@@ -193,6 +201,40 @@ message ReadSession {
193201
ShardingStrategy sharding_strategy = 9;
194202
}
195203

204+
// Data format for input or output data.
205+
enum DataFormat {
206+
// Data format is unspecified.
207+
DATA_FORMAT_UNSPECIFIED = 0;
208+
209+
// Avro is a standard open source row based file format.
210+
// See https://avro.apache.org/ for more details.
211+
AVRO = 1;
212+
213+
// Arrow is a standard open source column-based message format.
214+
// See https://arrow.apache.org/ for more details.
215+
ARROW = 3;
216+
}
217+
218+
// Strategy for distributing data among multiple streams in a read session.
219+
enum ShardingStrategy {
220+
// Same as LIQUID.
221+
SHARDING_STRATEGY_UNSPECIFIED = 0;
222+
223+
// Assigns data to each stream based on the client's read rate. The faster the
224+
// client reads from a stream, the more data is assigned to the stream. In
225+
// this strategy, it's possible to read all data from a single stream even if
226+
// there are other streams present.
227+
LIQUID = 1;
228+
229+
// Assigns data to each stream such that roughly the same number of rows can
230+
// be read from each stream. Because the server-side unit for assigning data
231+
// is collections of rows, the API does not guarantee that each stream will
232+
// return the same number or rows. Additionally, the limits are enforced based
233+
// on the number of pre-filtering rows, so some filters can lead to lopsided
234+
// assignments.
235+
BALANCED = 2;
236+
}
237+
196238
// Creates a new read session, which may include additional options such as
197239
// requested parallelism, projection filters and constraints.
198240
message CreateReadSessionRequest {
@@ -225,45 +267,14 @@ message CreateReadSessionRequest {
225267
TableReadOptions read_options = 4;
226268

227269
// Data output format. Currently default to Avro.
270+
// DATA_FORMAT_UNSPECIFIED not supported.
228271
DataFormat format = 5;
229272

230273
// The strategy to use for distributing data among multiple streams. Currently
231274
// defaults to liquid sharding.
232275
ShardingStrategy sharding_strategy = 7;
233276
}
234277

235-
// Data format for input or output data.
236-
enum DataFormat {
237-
// Data format is unspecified.
238-
DATA_FORMAT_UNSPECIFIED = 0;
239-
240-
// Avro is a standard open source row based file format.
241-
// See https://avro.apache.org/ for more details.
242-
AVRO = 1;
243-
244-
ARROW = 3;
245-
}
246-
247-
// Strategy for distributing data among multiple streams in a read session.
248-
enum ShardingStrategy {
249-
// Same as LIQUID.
250-
SHARDING_STRATEGY_UNSPECIFIED = 0;
251-
252-
// Assigns data to each stream based on the client's read rate. The faster the
253-
// client reads from a stream, the more data is assigned to the stream. In
254-
// this strategy, it's possible to read all data from a single stream even if
255-
// there are other streams present.
256-
LIQUID = 1;
257-
258-
// Assigns data to each stream such that roughly the same number of rows can
259-
// be read from each stream. Because the server-side unit for assigning data
260-
// is collections of rows, the API does not guarantee that each stream will
261-
// return the same number or rows. Additionally, the limits are enforced based
262-
// on the number of pre-filtering rows, so some filters can lead to lopsided
263-
// assignments.
264-
BALANCED = 2;
265-
}
266-
267278
// Requesting row data via `ReadRows` must provide Stream position information.
268279
message ReadRowsRequest {
269280
// Required. Identifier of the position in the stream to start reading from.
@@ -349,6 +360,19 @@ message ReadRowsResponse {
349360
// Throttling status. If unset, the latest response still describes
350361
// the current throttling status.
351362
ThrottleStatus throttle_status = 5;
363+
364+
// The schema for the read. If read_options.selected_fields is set, the
365+
// schema may be different from the table schema as it will only contain
366+
// the selected fields. This schema is equivalent to the one returned by
367+
// CreateSession. This field is only populated in the first ReadRowsResponse
368+
// RPC.
369+
oneof schema {
370+
// Output only. Avro schema.
371+
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
372+
373+
// Output only. Arrow schema.
374+
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
375+
}
352376
}
353377

354378
// Information needed to request additional streams for an established read

google/cloud/bigquery/storage/v1beta1/table_reference.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)