|
23 | 23 | """ |
24 | 24 | from __future__ import annotations |
25 | 25 |
|
| 26 | +import json |
| 27 | +import logging |
26 | 28 | import os |
27 | 29 | from datetime import datetime |
28 | 30 | from pathlib import Path |
29 | 31 |
|
| 32 | +from airflow.decorators import task |
| 33 | +from airflow.models import Connection |
30 | 34 | from airflow.models.dag import DAG |
| 35 | +from airflow.operators.bash import BashOperator |
31 | 36 | from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator |
32 | 37 | from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator |
| 38 | +from airflow.providers.google.suite.hooks.drive import GoogleDriveHook |
33 | 39 | from airflow.providers.google.suite.transfers.gcs_to_gdrive import GCSToGoogleDriveOperator |
| 40 | +from airflow.settings import Session |
34 | 41 | from airflow.utils.trigger_rule import TriggerRule |
35 | 42 |
|
36 | 43 | ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") |
37 | 44 | PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") |
38 | | -FOLDER_ID = os.environ.get("GCP_GDRIVE_FOLDER_ID", "abcd1234") |
| 45 | +FOLDER_ID = os.environ.get("GCP_GDRIVE_FOLDER_ID", None) |
39 | 46 |
|
40 | 47 | DAG_ID = "example_gcs_to_gdrive" |
41 | 48 |
|
42 | 49 | BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}" |
| 50 | +CONNECTION_ID = f"connection_{DAG_ID}_{ENV_ID}" |
43 | 51 |
|
44 | 52 | TMP_PATH = "tmp" |
45 | | - |
| 53 | +WORK_DIR = f"folder_{DAG_ID}_{ENV_ID}".replace("-", "_") |
46 | 54 | CURRENT_FOLDER = Path(__file__).parent |
47 | 55 | LOCAL_PATH = str(Path(CURRENT_FOLDER) / "resources") |
48 | | - |
49 | 56 | FILE_LOCAL_PATH = str(Path(LOCAL_PATH)) |
50 | 57 | FILE_NAME = "example_upload.txt" |
51 | 58 |
|
| 59 | +log = logging.getLogger(__name__) |
| 60 | + |
52 | 61 |
|
53 | 62 | with DAG( |
54 | 63 | DAG_ID, |
55 | 64 | schedule="@once", |
56 | 65 | start_date=datetime(2021, 1, 1), |
57 | 66 | catchup=False, |
58 | | - tags=["example", "gcs"], |
| 67 | + tags=["example", "gcs", "gdrive"], |
59 | 68 | ) as dag: |
| 69 | + |
| 70 | + @task |
| 71 | + def create_temp_gcp_connection(): |
| 72 | + conn = Connection( |
| 73 | + conn_id=CONNECTION_ID, |
| 74 | + conn_type="google_cloud_platform", |
| 75 | + ) |
| 76 | + conn_extra_json = json.dumps( |
| 77 | + { |
| 78 | + "scope": "https://www.googleapis.com/auth/drive," |
| 79 | + "https://www.googleapis.com/auth/cloud-platform" |
| 80 | + } |
| 81 | + ) |
| 82 | + conn.set_extra(conn_extra_json) |
| 83 | + |
| 84 | + session: Session = Session() |
| 85 | + if session.query(Connection).filter(Connection.conn_id == CONNECTION_ID).first(): |
| 86 | + log.warning("Connection %s already exists", CONNECTION_ID) |
| 87 | + return None |
| 88 | + session.add(conn) |
| 89 | + session.commit() |
| 90 | + |
| 91 | + create_temp_gcp_connection_task = create_temp_gcp_connection() |
| 92 | + |
60 | 93 | create_bucket = GCSCreateBucketOperator( |
61 | 94 | task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID |
62 | 95 | ) |
63 | 96 |
|
64 | | - upload_file = LocalFilesystemToGCSOperator( |
65 | | - task_id="upload_file", |
| 97 | + upload_file_1 = LocalFilesystemToGCSOperator( |
| 98 | + task_id="upload_file_1", |
66 | 99 | src=f"{FILE_LOCAL_PATH}/{FILE_NAME}", |
67 | 100 | dst=f"{TMP_PATH}/{FILE_NAME}", |
68 | 101 | bucket=BUCKET_NAME, |
69 | 102 | ) |
70 | 103 |
|
71 | 104 | upload_file_2 = LocalFilesystemToGCSOperator( |
72 | | - task_id="upload_fil_2", |
| 105 | + task_id="upload_file_2", |
73 | 106 | src=f"{FILE_LOCAL_PATH}/{FILE_NAME}", |
74 | 107 | dst=f"{TMP_PATH}/2_{FILE_NAME}", |
75 | 108 | bucket=BUCKET_NAME, |
76 | 109 | ) |
77 | 110 | # [START howto_operator_gcs_to_gdrive_copy_single_file] |
78 | 111 | copy_single_file = GCSToGoogleDriveOperator( |
79 | 112 | task_id="copy_single_file", |
| 113 | + gcp_conn_id=CONNECTION_ID, |
80 | 114 | source_bucket=BUCKET_NAME, |
81 | 115 | source_object=f"{TMP_PATH}/{FILE_NAME}", |
82 | | - destination_object=f"copied_tmp/copied_{FILE_NAME}", |
| 116 | + destination_object=f"{WORK_DIR}/copied_{FILE_NAME}", |
83 | 117 | ) |
84 | 118 | # [END howto_operator_gcs_to_gdrive_copy_single_file] |
85 | 119 |
|
86 | 120 | # [START howto_operator_gcs_to_gdrive_copy_single_file_into_folder] |
87 | 121 | copy_single_file_into_folder = GCSToGoogleDriveOperator( |
88 | 122 | task_id="copy_single_file_into_folder", |
| 123 | + gcp_conn_id=CONNECTION_ID, |
89 | 124 | source_bucket=BUCKET_NAME, |
90 | 125 | source_object=f"{TMP_PATH}/{FILE_NAME}", |
91 | | - destination_object=f"copied_tmp/copied_{FILE_NAME}", |
| 126 | + destination_object=f"{WORK_DIR}/copied_{FILE_NAME}", |
92 | 127 | destination_folder_id=FOLDER_ID, |
93 | 128 | ) |
94 | 129 | # [END howto_operator_gcs_to_gdrive_copy_single_file_into_folder] |
95 | 130 |
|
96 | 131 | # [START howto_operator_gcs_to_gdrive_copy_files] |
97 | 132 | copy_files = GCSToGoogleDriveOperator( |
98 | 133 | task_id="copy_files", |
| 134 | + gcp_conn_id=CONNECTION_ID, |
99 | 135 | source_bucket=BUCKET_NAME, |
100 | 136 | source_object=f"{TMP_PATH}/*", |
101 | | - destination_object="copied_tmp/", |
| 137 | + destination_object=f"{WORK_DIR}/", |
102 | 138 | ) |
103 | 139 | # [END howto_operator_gcs_to_gdrive_copy_files] |
104 | 140 |
|
105 | 141 | # [START howto_operator_gcs_to_gdrive_move_files] |
106 | 142 | move_files = GCSToGoogleDriveOperator( |
107 | 143 | task_id="move_files", |
| 144 | + gcp_conn_id=CONNECTION_ID, |
108 | 145 | source_bucket=BUCKET_NAME, |
109 | 146 | source_object=f"{TMP_PATH}/*.txt", |
| 147 | + destination_object=f"{WORK_DIR}/", |
110 | 148 | move_object=True, |
111 | 149 | ) |
112 | 150 | # [END howto_operator_gcs_to_gdrive_move_files] |
113 | 151 |
|
| 152 | + @task(trigger_rule=TriggerRule.ALL_DONE) |
| 153 | + def remove_files_from_drive(): |
| 154 | + service = GoogleDriveHook(gcp_conn_id=CONNECTION_ID).get_conn() |
| 155 | + root_path = ( |
| 156 | + service.files() |
| 157 | + .list(q=f"name = '{WORK_DIR}' and mimeType = 'application/vnd.google-apps.folder'") |
| 158 | + .execute() |
| 159 | + ) |
| 160 | + if files := root_path["files"]: |
| 161 | + batch = service.new_batch_http_request() |
| 162 | + for file in files: |
| 163 | + log.info("Preparing to remove file: {}", file) |
| 164 | + batch.add(service.files().delete(fileId=file["id"])) |
| 165 | + batch.execute() |
| 166 | + log.info("Selected files removed.") |
| 167 | + |
| 168 | + remove_files_from_drive_task = remove_files_from_drive() |
| 169 | + |
114 | 170 | delete_bucket = GCSDeleteBucketOperator( |
115 | 171 | task_id="delete_bucket", bucket_name=BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE |
116 | 172 | ) |
117 | 173 |
|
| 174 | + delete_temp_gcp_connection_task = BashOperator( |
| 175 | + task_id="delete_temp_gcp_connection", |
| 176 | + bash_command=f"airflow connections delete {CONNECTION_ID}", |
| 177 | + trigger_rule=TriggerRule.ALL_DONE, |
| 178 | + ) |
| 179 | + |
| 180 | + # TEST SETUP |
| 181 | + create_bucket >> [upload_file_1, upload_file_2] |
118 | 182 | ( |
119 | | - # TEST SETUP |
120 | | - create_bucket |
121 | | - >> upload_file |
122 | | - >> upload_file_2 |
| 183 | + [upload_file_1, upload_file_2, create_temp_gcp_connection_task] |
123 | 184 | # TEST BODY |
124 | 185 | >> copy_single_file |
| 186 | + >> copy_single_file_into_folder |
125 | 187 | >> copy_files |
126 | 188 | >> move_files |
127 | 189 | # TEST TEARDOWN |
128 | | - >> delete_bucket |
| 190 | + >> remove_files_from_drive_task |
| 191 | + >> [delete_bucket, delete_temp_gcp_connection_task] |
129 | 192 | ) |
130 | 193 |
|
131 | 194 | from tests.system.utils.watcher import watcher |
|
0 commit comments