docs: update documentation for bigquerystorage v1beta1

Google APIs · copybara-github · commit 3d497fa928b0 · 2023-05-11T13:22:24.000-07:00
PiperOrigin-RevId: 531289380
diff --git a/google/cloud/bigquery/storage/v1beta1/arrow.proto b/google/cloud/bigquery/storage/v1beta1/arrow.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/google/cloud/bigquery/storage/v1beta1/avro.proto b/google/cloud/bigquery/storage/v1beta1/avro.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/google/cloud/bigquery/storage/v1beta1/bigquerystorage_v1beta1.yaml b/google/cloud/bigquery/storage/v1beta1/bigquerystorage_v1beta1.yaml
@@ -19,5 +19,4 @@ authentication:
     oauth:
       canonical_scopes: |-
         https://www.googleapis.com/auth/bigquery,
-        https://www.googleapis.com/auth/bigquery.readonly,
         https://www.googleapis.com/auth/cloud-platform
diff --git a/google/cloud/bigquery/storage/v1beta1/read_options.proto b/google/cloud/bigquery/storage/v1beta1/read_options.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -21,19 +21,64 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
 
 // Options dictating how we read a table.
 message TableReadOptions {
-  // Optional. Names of the fields in the table that should be read. If empty,
-  // all fields will be read. If the specified field is a nested field, all the
-  // sub-fields in the field will be selected. The output field order is
-  // unrelated to the order of fields in selected_fields.
+  // Optional. The names of the fields in the table to be returned. If no
+  // field names are specified, then all fields in the table are returned.
+  //
+  // Nested fields -- the child elements of a STRUCT field -- can be selected
+  // individually using their fully-qualified names, and will be returned as
+  // record fields containing only the selected nested fields. If a STRUCT
+  // field is specified in the selected fields list, all of the child elements
+  // will be returned.
+  //
+  // As an example, consider a table with the following schema:
+  //
+  //   {
+  //       "name": "struct_field",
+  //       "type": "RECORD",
+  //       "mode": "NULLABLE",
+  //       "fields": [
+  //           {
+  //               "name": "string_field1",
+  //               "type": "STRING",
+  // .              "mode": "NULLABLE"
+  //           },
+  //           {
+  //               "name": "string_field2",
+  //               "type": "STRING",
+  //               "mode": "NULLABLE"
+  //           }
+  //       ]
+  //   }
+  //
+  // Specifying "struct_field" in the selected fields list will result in a
+  // read session schema with the following logical structure:
+  //
+  //   struct_field {
+  //       string_field1
+  //       string_field2
+  //   }
+  //
+  // Specifying "struct_field.string_field1" in the selected fields list will
+  // result in a read session schema with the following logical structure:
+  //
+  //   struct_field {
+  //       string_field1
+  //   }
+  //
+  // The order of the fields in the read session schema is derived from the
+  // table schema and does not correspond to the order in which the fields are
+  // specified in this list.
   repeated string selected_fields = 1;
 
   // Optional. SQL text filtering statement, similar to a WHERE clause in
-  // a query. Aggregates are not supported.
+  // a SQL query. Aggregates are not supported.
   //
   // Examples: "int_field > 5"
   //           "date_field = CAST('2014-9-27' as DATE)"
   //           "nullable_field is not NULL"
   //           "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
   //           "numeric_field BETWEEN 1.0 AND 5.0"
+  //
+  // Restricted to a maximum length for 1 MB.
   string row_restriction = 2;
 }
diff --git a/google/cloud/bigquery/storage/v1beta1/storage.proto b/google/cloud/bigquery/storage/v1beta1/storage.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -33,6 +33,11 @@ option java_package = "com.google.cloud.bigquery.storage.v1beta1";
 // BigQuery storage API.
 //
 // The BigQuery storage API can be used to read data stored in BigQuery.
+//
+// The v1beta1 API is not yet officially deprecated, and will go through a full
+// deprecation cycle (https://cloud.google.com/products#product-launch-stages)
+// before the service is turned down. However, new code should use the v1 API
+// going forward.
 service BigQueryStorage {
   option (google.api.default_host) = "bigquerystorage.googleapis.com";
   option (google.api.oauth_scopes) =
@@ -49,7 +54,7 @@ service BigQueryStorage {
   // reached the end of each stream in the session, then all the data in the
   // table has been read.
   //
-  // Read sessions automatically expire 24 hours after they are created and do
+  // Read sessions automatically expire 6 hours after they are created and do
   // not require manual clean-up by the caller.
   rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
     option (google.api.http) = {
@@ -60,7 +65,8 @@ service BigQueryStorage {
         body: "*"
       }
     };
-    option (google.api.method_signature) = "table_reference,parent,requested_streams";
+    option (google.api.method_signature) =
+        "table_reference,parent,requested_streams";
   }
 
   // Reads rows from the table in the format prescribed by the read session.
@@ -82,15 +88,16 @@ service BigQueryStorage {
   // Creates additional streams for a ReadSession. This API can be used to
   // dynamically adjust the parallelism of a batch processing task upwards by
   // adding additional workers.
-  rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
+  rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest)
+      returns (BatchCreateReadSessionStreamsResponse) {
     option (google.api.http) = {
       post: "/v1beta1/{session.name=projects/*/sessions/*}"
       body: "*"
     };
     option (google.api.method_signature) = "session,requested_streams";
   }
 
-  // Triggers the graceful termination of a single stream in a ReadSession. This
+  // Causes a single stream in a ReadSession to gracefully stop. This
   // API can be used to dynamically adjust the parallelism of a batch processing
   // task downwards without losing data.
   //
@@ -125,7 +132,8 @@ service BigQueryStorage {
   // completion.
   //
   // This method is guaranteed to be idempotent.
-  rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
+  rpc SplitReadStream(SplitReadStreamRequest)
+      returns (SplitReadStreamResponse) {
     option (google.api.http) = {
       get: "/v1beta1/{original_stream.name=projects/*/streams/*}"
     };
@@ -193,6 +201,40 @@ message ReadSession {
   ShardingStrategy sharding_strategy = 9;
 }
 
+// Data format for input or output data.
+enum DataFormat {
+  // Data format is unspecified.
+  DATA_FORMAT_UNSPECIFIED = 0;
+
+  // Avro is a standard open source row based file format.
+  // See https://avro.apache.org/ for more details.
+  AVRO = 1;
+
+  // Arrow is a standard open source column-based message format.
+  // See https://arrow.apache.org/ for more details.
+  ARROW = 3;
+}
+
+// Strategy for distributing data among multiple streams in a read session.
+enum ShardingStrategy {
+  // Same as LIQUID.
+  SHARDING_STRATEGY_UNSPECIFIED = 0;
+
+  // Assigns data to each stream based on the client's read rate. The faster the
+  // client reads from a stream, the more data is assigned to the stream. In
+  // this strategy, it's possible to read all data from a single stream even if
+  // there are other streams present.
+  LIQUID = 1;
+
+  // Assigns data to each stream such that roughly the same number of rows can
+  // be read from each stream. Because the server-side unit for assigning data
+  // is collections of rows, the API does not guarantee that each stream will
+  // return the same number or rows. Additionally, the limits are enforced based
+  // on the number of pre-filtering rows, so some filters can lead to lopsided
+  // assignments.
+  BALANCED = 2;
+}
+
 // Creates a new read session, which may include additional options such as
 // requested parallelism, projection filters and constraints.
 message CreateReadSessionRequest {
@@ -225,45 +267,14 @@ message CreateReadSessionRequest {
   TableReadOptions read_options = 4;
 
   // Data output format. Currently default to Avro.
+  // DATA_FORMAT_UNSPECIFIED not supported.
   DataFormat format = 5;
 
   // The strategy to use for distributing data among multiple streams. Currently
   // defaults to liquid sharding.
   ShardingStrategy sharding_strategy = 7;
 }
 
-// Data format for input or output data.
-enum DataFormat {
-  // Data format is unspecified.
-  DATA_FORMAT_UNSPECIFIED = 0;
-
-  // Avro is a standard open source row based file format.
-  // See https://avro.apache.org/ for more details.
-  AVRO = 1;
-
-  ARROW = 3;
-}
-
-// Strategy for distributing data among multiple streams in a read session.
-enum ShardingStrategy {
-  // Same as LIQUID.
-  SHARDING_STRATEGY_UNSPECIFIED = 0;
-
-  // Assigns data to each stream based on the client's read rate. The faster the
-  // client reads from a stream, the more data is assigned to the stream. In
-  // this strategy, it's possible to read all data from a single stream even if
-  // there are other streams present.
-  LIQUID = 1;
-
-  // Assigns data to each stream such that roughly the same number of rows can
-  // be read from each stream. Because the server-side unit for assigning data
-  // is collections of rows, the API does not guarantee that each stream will
-  // return the same number or rows. Additionally, the limits are enforced based
-  // on the number of pre-filtering rows, so some filters can lead to lopsided
-  // assignments.
-  BALANCED = 2;
-}
-
 // Requesting row data via `ReadRows` must provide Stream position information.
 message ReadRowsRequest {
   // Required. Identifier of the position in the stream to start reading from.
@@ -349,6 +360,19 @@ message ReadRowsResponse {
   // Throttling status. If unset, the latest response still describes
   // the current throttling status.
   ThrottleStatus throttle_status = 5;
+
+  // The schema for the read. If read_options.selected_fields is set, the
+  // schema may be different from the table schema as it will only contain
+  // the selected fields. This schema is equivalent to the one returned by
+  // CreateSession. This field is only populated in the first ReadRowsResponse
+  // RPC.
+  oneof schema {
+    // Output only. Avro schema.
+    AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. Arrow schema.
+    ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
 }
 
 // Information needed to request additional streams for an established read
diff --git a/google/cloud/bigquery/storage/v1beta1/table_reference.proto b/google/cloud/bigquery/storage/v1beta1/table_reference.proto
@@ -1,4 +1,4 @@
-// Copyright 2020 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// Copyright 2020 Google LLC`
	`1`	`+// Copyright 2023 Google LLC`
`2`	`2`	`//`
`3`	`3`	`// Licensed under the Apache License, Version 2.0 (the "License");`
`4`	`4`	`// you may not use this file except in compliance with the License.`