feat: add expiration for TTL for Memory and Sessions

Google APIs · copybara-github · commit c5334a83d696 · 2025-08-07T16:30:32.000-07:00
feat: add direct_memories_source
docs: Update comment for allowed values for config models

PiperOrigin-RevId: 792354273
diff --git a/google/cloud/aiplatform/v1beta1/memory_bank.proto b/google/cloud/aiplatform/v1beta1/memory_bank.proto
@@ -18,6 +18,7 @@ package google.cloud.aiplatform.v1beta1;
 
 import "google/api/field_behavior.proto";
 import "google/api/resource.proto";
+import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 
 option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
@@ -37,6 +38,20 @@ message Memory {
     singular: "memory"
   };
 
+  // The expiration of the Memory. If not set, the Memory will not be
+  // automatically deleted.
+  oneof expiration {
+    // Optional. Timestamp of when this resource is considered expired.
+    // This is *always* provided on output, regardless of what `expiration` was
+    // sent on input.
+    google.protobuf.Timestamp expire_time = 13
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Input only. The TTL for this resource. The expiration time is
+    // computed: now + TTL.
+    google.protobuf.Duration ttl = 14 [(google.api.field_behavior) = OPTIONAL];
+  }
+
   // Identifier. The resource name of the Memory.
   // Format:
   // `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}/memories/{memory}`
diff --git a/google/cloud/aiplatform/v1beta1/memory_bank_service.proto b/google/cloud/aiplatform/v1beta1/memory_bank_service.proto
@@ -312,6 +312,21 @@ message GenerateMemoriesRequest {
     repeated Event events = 1 [(google.api.field_behavior) = REQUIRED];
   }
 
+  // Defines a direct source of memories that should be uploaded to Memory Bank
+  // with consolidation.
+  message DirectMemoriesSource {
+    // A direct memory to upload to Memory Bank.
+    message DirectMemory {
+      // Required. The fact to consolidate with existing memories.
+      string fact = 1 [(google.api.field_behavior) = REQUIRED];
+    }
+
+    // Required. The direct memories to upload to Memory Bank. At most 5 direct
+    // memories are allowed per request.
+    repeated DirectMemory direct_memories = 1
+        [(google.api.field_behavior) = REQUIRED];
+  }
+
   // Source content used to generate memories.
   oneof source {
     // Defines a Vertex Session as the source content from which to generate
@@ -321,6 +336,11 @@ message GenerateMemoriesRequest {
     // Defines a direct source of content as the source content from which to
     // generate memories.
     DirectContentsSource direct_contents_source = 3;
+
+    // Defines a direct source of memories that should be uploaded to Memory
+    // Bank. This is similar to `CreateMemory`, but it allows for consolidation
+    // between these new memories and existing memories for the same scope.
+    DirectMemoriesSource direct_memories_source = 9;
   }
 
   // Required. The resource name of the ReasoningEngine to generate memories
diff --git a/google/cloud/aiplatform/v1beta1/reasoning_engine.proto b/google/cloud/aiplatform/v1beta1/reasoning_engine.proto
@@ -20,6 +20,8 @@ import "google/api/field_behavior.proto";
 import "google/api/resource.proto";
 import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
 import "google/cloud/aiplatform/v1beta1/env_var.proto";
+import "google/cloud/aiplatform/v1beta1/service_networking.proto";
+import "google/protobuf/duration.proto";
 import "google/protobuf/struct.proto";
 import "google/protobuf/timestamp.proto";
 
@@ -64,6 +66,34 @@ message ReasoningEngineSpec {
     // Service Agent.
     repeated SecretEnvVar secret_env = 2
         [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Configuration for PSC-I.
+    PscInterfaceConfig psc_interface_config = 4
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. The minimum number of application instances that will be kept
+    // running at all times. Defaults to 1.
+    optional int32 min_instances = 5 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. The maximum number of application instances that can be
+    // launched to handle increased traffic. Defaults to 100.
+    optional int32 max_instances = 6 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Resource limits for each container. Only 'cpu' and 'memory'
+    // keys are supported. Defaults to {"cpu": "4", "memory": "4Gi"}.
+    //
+    //   * The only supported values for CPU are '1', '2', '4', and '8'. For
+    //   more information, go to
+    //   https://cloud.google.com/run/docs/configuring/cpu.
+    //   * For supported 'memory' values and syntax, go to
+    //   https://cloud.google.com/run/docs/configuring/memory-limits
+    map<string, string> resource_limits = 7
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Concurrency for each container and agent server. Recommended
+    // value: 2 * cpu + 1. Defaults to 9.
+    optional int32 container_concurrency = 8
+        [(google.api.field_behavior) = OPTIONAL];
   }
 
   // Optional. The service account that the Reasoning Engine artifact runs as.
@@ -144,12 +174,50 @@ message ReasoningEngine {
 message ReasoningEngineContextSpec {
   // Specification for a Memory Bank.
   message MemoryBankConfig {
+    // Configuration for automatically setting the TTL ("time-to-live") of the
+    // memories in the Memory Bank.
+    message TtlConfig {
+      // Configuration for TTL of the memories in the Memory Bank based on the
+      // action that created or updated the memory.
+      message GranularTtlConfig {
+        // Optional. The TTL duration for memories uploaded via CreateMemory.
+        google.protobuf.Duration create_ttl = 1
+            [(google.api.field_behavior) = OPTIONAL];
+
+        // Optional. The TTL duration for memories newly generated via
+        // GenerateMemories
+        // ([GenerateMemoriesResponse.GeneratedMemory.Action.CREATED][google.cloud.aiplatform.v1beta1.GenerateMemoriesResponse.GeneratedMemory.Action.CREATED]).
+        google.protobuf.Duration generate_created_ttl = 2
+            [(google.api.field_behavior) = OPTIONAL];
+
+        // Optional. The TTL duration for memories updated via GenerateMemories
+        // ([GenerateMemoriesResponse.GeneratedMemory.Action.CREATED][google.cloud.aiplatform.v1beta1.GenerateMemoriesResponse.GeneratedMemory.Action.CREATED]).
+        // In the case of an UPDATE action, the `expire_time` of the existing
+        // memory will be updated to the new value (now + TTL).
+        google.protobuf.Duration generate_updated_ttl = 3
+            [(google.api.field_behavior) = OPTIONAL];
+      }
+
+      // Configuration for automatically setting the TTL of the memories in the
+      // Memory Bank.
+      oneof ttl {
+        // Optional. The default TTL duration of the memories in the Memory
+        // Bank. This applies to all operations that create or update a memory.
+        google.protobuf.Duration default_ttl = 1
+            [(google.api.field_behavior) = OPTIONAL];
+
+        // Optional. The granular TTL configuration of the memories in the
+        // Memory Bank.
+        GranularTtlConfig granular_ttl_config = 2
+            [(google.api.field_behavior) = OPTIONAL];
+      }
+    }
+
     // Configuration for how to generate memories.
     message GenerationConfig {
       // Required. The model used to generate memories.
       // Format:
-      // `projects/{project}/locations/{location}/publishers/google/models/{model}`
-      // or `projects/{project}/locations/{location}/endpoints/{endpoint}`.
+      // `projects/{project}/locations/{location}/publishers/google/models/{model}`.
       string model = 1 [
         (google.api.field_behavior) = REQUIRED,
         (google.api.resource_reference) = {
@@ -162,8 +230,7 @@ message ReasoningEngineContextSpec {
     message SimilaritySearchConfig {
       // Required. The model used to generate embeddings to lookup similar
       // memories. Format:
-      // `projects/{project}/locations/{location}/publishers/google/models/{model}`
-      // or `projects/{project}/locations/{location}/endpoints/{endpoint}`.
+      // `projects/{project}/locations/{location}/publishers/google/models/{model}`.
       string embedding_model = 1 [
         (google.api.field_behavior) = REQUIRED,
         (google.api.resource_reference) = {
@@ -181,6 +248,12 @@ message ReasoningEngineContextSpec {
     // `text-embedding-005`.
     SimilaritySearchConfig similarity_search_config = 2
         [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Configuration for automatic TTL ("time-to-live") of the
+    // memories in the Memory Bank. If not set, TTL will not be applied
+    // automatically. The TTL can be explicitly set by modifying the
+    // `expire_time` of each Memory resource.
+    TtlConfig ttl_config = 5 [(google.api.field_behavior) = OPTIONAL];
   }
 
   // Optional. Specification for a Memory Bank, which manages memories for the
diff --git a/google/cloud/aiplatform/v1beta1/session.proto b/google/cloud/aiplatform/v1beta1/session.proto
@@ -19,6 +19,7 @@ package google.cloud.aiplatform.v1beta1;
 import "google/api/field_behavior.proto";
 import "google/api/resource.proto";
 import "google/cloud/aiplatform/v1beta1/content.proto";
+import "google/protobuf/duration.proto";
 import "google/protobuf/struct.proto";
 import "google/protobuf/timestamp.proto";
 
@@ -39,6 +40,21 @@ message Session {
     singular: "session"
   };
 
+  // The expiration of the session.
+  oneof expiration {
+    // Optional. Timestamp of when this session is considered expired.
+    // This is *always* provided on output, regardless of what was sent
+    // on input.
+    google.protobuf.Timestamp expire_time = 13
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Input only. The TTL for this session.
+    google.protobuf.Duration ttl = 14 [
+      (google.api.field_behavior) = OPTIONAL,
+      (google.api.field_behavior) = INPUT_ONLY
+    ];
+  }
+
   // Identifier. The resource name of the session.
   // Format:
   // 'projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}/sessions/{session}'.
@@ -136,6 +152,9 @@ message EventMetadata {
   // Branch is used when multiple child agents shouldn't see their siblings'
   // conversation history.
   string branch = 6 [(google.api.field_behavior) = OPTIONAL];
+
+  // The custom metadata of the LlmResponse.
+  google.protobuf.Struct custom_metadata = 7;
 }
 
 // Actions are parts of events that are executed by the agent.
diff --git a/google/cloud/aiplatform/v1beta1/session_service.proto b/google/cloud/aiplatform/v1beta1/session_service.proto
@@ -282,6 +282,14 @@ message ListEventsRequest {
   // [SessionService.ListEvents][google.cloud.aiplatform.v1beta1.SessionService.ListEvents]
   // call.
   string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The standard list filter.
+  // Supported fields:
+  //    * `timestamp` range (i.e. `timestamp>="2025-01-31T11:30:00-04:00"` where
+  //    the timestamp is in RFC 3339 format)
+  //
+  // More detail in [AIP-160](https://google.aip.dev/160).
+  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Response message for