Skip to content

Commit 437d4e4

Browse files
VladaZakharovaUlada Zakharava
andauthored
Deprecate AutoMLTrainModelOperator for Vision and Video (#36473)
Co-authored-by: Ulada Zakharava <Vlada_Zakharava@epam.com>
1 parent 7a814f1 commit 437d4e4

File tree

11 files changed

+562
-525
lines changed

11 files changed

+562
-525
lines changed

airflow/providers/google/cloud/operators/automl.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,19 +118,17 @@ def __init__(
118118
self.impersonation_chain = impersonation_chain
119119

120120
def execute(self, context: Context):
121-
# Output warning if running AutoML Natural Language prediction job
122-
automl_nl_model_keys = [
123-
"text_classification_model_metadata",
124-
"text_extraction_model_metadata",
125-
"text_sentiment_dataset_metadata",
126-
]
127-
if any(key in automl_nl_model_keys for key in self.model):
121+
# Output warning if running not AutoML Translation prediction job
122+
if "translation_model_metadata" not in self.model:
128123
warnings.warn(
129-
"AutoMLTrainModelOperator for text prediction is deprecated. All the functionality of legacy "
130-
"AutoML Natural Language and new features are available on the Vertex AI platform. "
131-
"Please use `CreateAutoMLTextTrainingJobOperator`",
124+
"AutoMLTrainModelOperator for text, image and video prediction is deprecated. "
125+
"All the functionality of legacy "
126+
"AutoML Natural Language, Vision and Video Intelligence and new features are available "
127+
"on the Vertex AI platform. "
128+
"Please use `CreateAutoMLTextTrainingJobOperator`, `CreateAutoMLImageTrainingJobOperator` or"
129+
" `CreateAutoMLVideoTrainingJobOperator` from VertexAI.",
132130
AirflowProviderDeprecationWarning,
133-
stacklevel=2,
131+
stacklevel=3,
134132
)
135133
hook = CloudAutoMLHook(
136134
gcp_conn_id=self.gcp_conn_id,

docs/apache-airflow-providers-google/operators/cloud/automl.rst

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,38 @@ To create a Google AutoML model you can use
102102
The operator will wait for the operation to complete. Additionally the operator
103103
returns the id of model in :ref:`XCom <concepts:xcom>` under ``model_id`` key.
104104

105-
This Operator is deprecated when running for text prediction and will be removed soon.
106-
All the functionality of legacy AutoML Natural Language and new features are available on the
107-
Vertex AI platform. Please use
108-
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator`.
109-
When running Vertex AI Operator for training dat, please ensure that your data is correctly stored in Vertex AI
105+
This Operator is deprecated when running for text, video and vision prediction and will be removed soon.
106+
All the functionality of legacy AutoML Natural Language, Vision, Video Intelligence and new features are
107+
available on the Vertex AI platform. Please use
108+
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLTextTrainingJobOperator`,
109+
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLImageTrainingJobOperator` or
110+
:class:`~airflow.providers.google.cloud.operators.vertex_ai.auto_ml.CreateAutoMLVideoTrainingJobOperator`.
111+
112+
You can find example on how to use VertexAI operators for AutoML Natural Language classification here:
113+
114+
.. exampleinclude:: /../../tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py
115+
:language: python
116+
:dedent: 4
117+
:start-after: [START howto_cloud_create_text_classification_training_job_operator]
118+
:end-before: [END howto_cloud_create_text_classification_training_job_operator]
119+
120+
Additionally, you can find example on how to use VertexAI operators for AutoML Vision classification here:
121+
122+
.. exampleinclude:: /../../tests/system/providers/google/cloud/automl/example_automl_vision_classification.py
123+
:language: python
124+
:dedent: 4
125+
:start-after: [START howto_cloud_create_image_classification_training_job_operator]
126+
:end-before: [END howto_cloud_create_image_classification_training_job_operator]
127+
128+
Example on how to use VertexAI operators for AutoML Video Intelligence classification you can find here:
129+
130+
.. exampleinclude:: /../../tests/system/providers/google/cloud/automl/example_automl_video_classification.py
131+
:language: python
132+
:dedent: 4
133+
:start-after: [START howto_cloud_create_video_classification_training_job_operator]
134+
:end-before: [END howto_cloud_create_video_classification_training_job_operator]
135+
136+
When running Vertex AI Operator for training data, please ensure that your data is correctly stored in Vertex AI
110137
datasets. To create and import data to the dataset please use
111138
:class:`~airflow.providers.google.cloud.operators.vertex_ai.dataset.CreateDatasetOperator`
112139
and

tests/system/providers/google/cloud/automl/example_automl_nl_text_classification.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@
2222

2323
import os
2424
from datetime import datetime
25-
from typing import cast
2625

2726
from google.cloud.aiplatform import schema
2827
from google.protobuf.struct_pb2 import Value
2928

3029
from airflow.models.dag import DAG
31-
from airflow.models.xcom_arg import XComArg
3230
from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
3331
from airflow.providers.google.cloud.operators.gcs import (
3432
GCSCreateBucketOperator,
@@ -114,7 +112,7 @@
114112
import_configs=DATA_CONFIG,
115113
)
116114

117-
# [START howto_operator_automl_create_model]
115+
# [START howto_cloud_create_text_classification_training_job_operator]
118116
create_clss_training_job = CreateAutoMLTextTrainingJobOperator(
119117
task_id="create_clss_training_job",
120118
display_name=TEXT_CLSS_DISPLAY_NAME,
@@ -129,8 +127,7 @@
129127
region=GCP_AUTOML_LOCATION,
130128
project_id=GCP_PROJECT_ID,
131129
)
132-
# [END howto_operator_automl_create_model]
133-
model_id = cast(str, XComArg(create_clss_training_job, key="model_id"))
130+
# [END howto_cloud_create_text_classification_training_job_operator]
134131

135132
delete_clss_training_job = DeleteAutoMLTrainingJobOperator(
136133
task_id="delete_clss_training_job",

tests/system/providers/google/cloud/automl/example_automl_nl_text_extraction.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@
2222

2323
import os
2424
from datetime import datetime
25-
from typing import cast
2625

2726
from google.cloud.aiplatform import schema
2827
from google.protobuf.struct_pb2 import Value
2928

3029
from airflow.models.dag import DAG
31-
from airflow.models.xcom_arg import XComArg
3230
from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
3331
from airflow.providers.google.cloud.operators.gcs import (
3432
GCSCreateBucketOperator,
@@ -114,7 +112,7 @@
114112
import_configs=DATA_CONFIG,
115113
)
116114

117-
# [START howto_operator_automl_create_model]
115+
# [START howto_cloud_create_text_extraction_training_job_operator]
118116
create_extr_training_job = CreateAutoMLTextTrainingJobOperator(
119117
task_id="create_extr_training_job",
120118
display_name=TEXT_EXTR_DISPLAY_NAME,
@@ -129,8 +127,7 @@
129127
region=GCP_AUTOML_LOCATION,
130128
project_id=GCP_PROJECT_ID,
131129
)
132-
# [END howto_operator_automl_create_model]
133-
model_id = cast(str, XComArg(create_extr_training_job, key="model_id"))
130+
# [END howto_cloud_create_text_extraction_training_job_operator]
134131

135132
delete_extr_training_job = DeleteAutoMLTrainingJobOperator(
136133
task_id="delete_extr_training_job",

tests/system/providers/google/cloud/automl/example_automl_nl_text_sentiment.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@
2222

2323
import os
2424
from datetime import datetime
25-
from typing import cast
2625

2726
from google.cloud.aiplatform import schema
2827
from google.protobuf.struct_pb2 import Value
2928

3029
from airflow.models.dag import DAG
31-
from airflow.models.xcom_arg import XComArg
3230
from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
3331
from airflow.providers.google.cloud.operators.gcs import (
3432
GCSCreateBucketOperator,
@@ -115,7 +113,7 @@
115113
import_configs=DATA_CONFIG,
116114
)
117115

118-
# [START howto_operator_automl_create_model]
116+
# [START howto_cloud_create_text_sentiment_training_job_operator]
119117
create_sent_training_job = CreateAutoMLTextTrainingJobOperator(
120118
task_id="create_sent_training_job",
121119
display_name=TEXT_SENT_DISPLAY_NAME,
@@ -131,8 +129,7 @@
131129
region=GCP_AUTOML_LOCATION,
132130
project_id=GCP_PROJECT_ID,
133131
)
134-
# [END howto_operator_automl_create_model]
135-
model_id = cast(str, XComArg(create_sent_training_job, key="model_id"))
132+
# [END howto_cloud_create_text_sentiment_training_job_operator]
136133

137134
delete_sent_training_job = DeleteAutoMLTrainingJobOperator(
138135
task_id="delete_sent_training_job",
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
"""
19+
Example Airflow DAG that uses Google AutoML services.
20+
"""
21+
from __future__ import annotations
22+
23+
import os
24+
from datetime import datetime
25+
26+
from google.cloud.aiplatform import schema
27+
from google.protobuf.struct_pb2 import Value
28+
29+
from airflow.models.dag import DAG
30+
from airflow.providers.google.cloud.operators.gcs import (
31+
GCSCreateBucketOperator,
32+
GCSDeleteBucketOperator,
33+
GCSSynchronizeBucketsOperator,
34+
)
35+
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
36+
CreateAutoMLVideoTrainingJobOperator,
37+
DeleteAutoMLTrainingJobOperator,
38+
)
39+
from airflow.providers.google.cloud.operators.vertex_ai.dataset import (
40+
CreateDatasetOperator,
41+
DeleteDatasetOperator,
42+
ImportDataOperator,
43+
)
44+
from airflow.utils.trigger_rule import TriggerRule
45+
46+
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
47+
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
48+
DAG_ID = "example_automl_video_clss"
49+
REGION = "us-central1"
50+
VIDEO_DISPLAY_NAME = f"auto-ml-video-clss-{ENV_ID}"
51+
MODEL_DISPLAY_NAME = f"auto-ml-video-clss-model-{ENV_ID}"
52+
53+
RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
54+
VIDEO_GCS_BUCKET_NAME = f"bucket_video_clss_{ENV_ID}".replace("_", "-")
55+
56+
VIDEO_DATASET = {
57+
"display_name": f"video-dataset-{ENV_ID}",
58+
"metadata_schema_uri": schema.dataset.metadata.video,
59+
"metadata": Value(string_value="video-dataset"),
60+
}
61+
VIDEO_DATA_CONFIG = [
62+
{
63+
"import_schema_uri": schema.dataset.ioformat.video.classification,
64+
"gcs_source": {"uris": [f"gs://{VIDEO_GCS_BUCKET_NAME}/automl/classification.csv"]},
65+
},
66+
]
67+
68+
69+
# Example DAG for AutoML Video Intelligence Classification
70+
with DAG(
71+
DAG_ID,
72+
schedule="@once",
73+
start_date=datetime(2021, 1, 1),
74+
catchup=False,
75+
tags=["example", "automl", "video", "classification"],
76+
) as dag:
77+
create_bucket = GCSCreateBucketOperator(
78+
task_id="create_bucket",
79+
bucket_name=VIDEO_GCS_BUCKET_NAME,
80+
storage_class="REGIONAL",
81+
location=REGION,
82+
)
83+
84+
move_dataset_file = GCSSynchronizeBucketsOperator(
85+
task_id="move_dataset_to_bucket",
86+
source_bucket=RESOURCE_DATA_BUCKET,
87+
source_object="automl/datasets/video",
88+
destination_bucket=VIDEO_GCS_BUCKET_NAME,
89+
destination_object="automl",
90+
recursive=True,
91+
)
92+
93+
create_video_dataset = CreateDatasetOperator(
94+
task_id="video_dataset",
95+
dataset=VIDEO_DATASET,
96+
region=REGION,
97+
project_id=PROJECT_ID,
98+
)
99+
video_dataset_id = create_video_dataset.output["dataset_id"]
100+
101+
import_video_dataset = ImportDataOperator(
102+
task_id="import_video_data",
103+
dataset_id=video_dataset_id,
104+
region=REGION,
105+
project_id=PROJECT_ID,
106+
import_configs=VIDEO_DATA_CONFIG,
107+
)
108+
109+
# [START howto_cloud_create_video_classification_training_job_operator]
110+
create_auto_ml_video_training_job = CreateAutoMLVideoTrainingJobOperator(
111+
task_id="auto_ml_video_task",
112+
display_name=VIDEO_DISPLAY_NAME,
113+
prediction_type="classification",
114+
model_type="CLOUD",
115+
dataset_id=video_dataset_id,
116+
model_display_name=MODEL_DISPLAY_NAME,
117+
region=REGION,
118+
project_id=PROJECT_ID,
119+
)
120+
# [END howto_cloud_create_video_classification_training_job_operator]
121+
122+
delete_auto_ml_video_training_job = DeleteAutoMLTrainingJobOperator(
123+
task_id="delete_auto_ml_video_training_job",
124+
training_pipeline_id="{{ task_instance.xcom_pull(task_ids='auto_ml_video_task', "
125+
"key='training_id') }}",
126+
region=REGION,
127+
project_id=PROJECT_ID,
128+
trigger_rule=TriggerRule.ALL_DONE,
129+
)
130+
131+
delete_video_dataset = DeleteDatasetOperator(
132+
task_id="delete_video_dataset",
133+
dataset_id=video_dataset_id,
134+
region=REGION,
135+
project_id=PROJECT_ID,
136+
trigger_rule=TriggerRule.ALL_DONE,
137+
)
138+
139+
delete_bucket = GCSDeleteBucketOperator(
140+
task_id="delete_bucket",
141+
bucket_name=VIDEO_GCS_BUCKET_NAME,
142+
trigger_rule=TriggerRule.ALL_DONE,
143+
)
144+
145+
(
146+
# TEST SETUP
147+
[
148+
create_bucket >> move_dataset_file,
149+
create_video_dataset,
150+
]
151+
>> import_video_dataset
152+
# TEST BODY
153+
>> create_auto_ml_video_training_job
154+
# TEST TEARDOWN
155+
>> delete_auto_ml_video_training_job
156+
>> delete_video_dataset
157+
>> delete_bucket
158+
)
159+
160+
from tests.system.utils.watcher import watcher
161+
162+
# This test needs watcher in order to properly mark success/failure
163+
# when "tearDown" task with trigger rule is part of the DAG
164+
list(dag.tasks) >> watcher()
165+
166+
from tests.system.utils import get_test_run # noqa: E402
167+
168+
# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
169+
test_run = get_test_run(dag)

0 commit comments

Comments
 (0)