Skip to content

Commit 23e03db

Browse files
authored
Add method to get metadata from GCS blob in GCSHook (#38398)
* Adding get metadata to gcs hook * unit test * Spelling and rm fstrings * test for blob not found * fix pytest raises, add match regex
1 parent a12a4a5 commit 23e03db

File tree

2 files changed

+47
-0
lines changed
  • airflow/providers/google/cloud/hooks
  • tests/providers/google/cloud/hooks

2 files changed

+47
-0
lines changed

airflow/providers/google/cloud/hooks/gcs.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,27 @@ def get_md5hash(self, bucket_name: str, object_name: str) -> str:
10101010
self.log.info("The md5Hash of %s is %s", object_name, blob_md5hash)
10111011
return blob_md5hash
10121012

1013+
def get_metadata(self, bucket_name: str, object_name: str) -> dict | None:
1014+
"""
1015+
Get the metadata of an object in Google Cloud Storage.
1016+
1017+
:param bucket_name: Name of the Google Cloud Storage bucket where the object is.
1018+
:param object_name: The name of the object containing the desired metadata
1019+
:return: The metadata associated with the object
1020+
"""
1021+
self.log.info("Retrieving the metadata dict of object (%s) in bucket (%s)", object_name, bucket_name)
1022+
client = self.get_conn()
1023+
bucket = client.bucket(bucket_name)
1024+
blob = bucket.get_blob(blob_name=object_name)
1025+
if blob is None:
1026+
raise ValueError("Object (%s) not found in bucket (%s)", object_name, bucket_name)
1027+
blob_metadata = blob.metadata
1028+
if blob_metadata:
1029+
self.log.info("Retrieved metadata of object (%s) with %s fields", object_name, len(blob_metadata))
1030+
else:
1031+
self.log.info("Metadata of object (%s) is empty or it does not exist", object_name)
1032+
return blob_metadata
1033+
10131034
@GoogleBaseHook.fallback_to_default_project_id
10141035
def create_bucket(
10151036
self,

tests/providers/google/cloud/hooks/test_gcs.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,32 @@ def test_object_get_md5hash(self, mock_service):
565565

566566
assert response == returned_file_md5hash
567567

568+
@mock.patch(GCS_STRING.format("GCSHook.get_conn"))
569+
def test_object_get_metadata(self, mock_service):
570+
test_bucket = "test_bucket"
571+
test_object = "test_object"
572+
returned_file_metadata = {"test_metadata_key": "test_metadata_val"}
573+
574+
bucket_method = mock_service.return_value.bucket
575+
get_blob_method = bucket_method.return_value.get_blob
576+
get_blob_method.return_value.metadata = returned_file_metadata
577+
578+
response = self.gcs_hook.get_metadata(bucket_name=test_bucket, object_name=test_object)
579+
580+
assert response == returned_file_metadata
581+
582+
@mock.patch(GCS_STRING.format("GCSHook.get_conn"))
583+
def test_nonexisting_object_get_metadata(self, mock_service):
584+
test_bucket = "test_bucket"
585+
test_object = "test_object"
586+
587+
bucket_method = mock_service.return_value.bucket
588+
get_blob_method = bucket_method.return_value.get_blob
589+
get_blob_method.return_value = None
590+
591+
with pytest.raises(ValueError, match=r"Object \((.*?)\) not found in bucket \((.*?)\)"):
592+
self.gcs_hook.get_metadata(bucket_name=test_bucket, object_name=test_object)
593+
568594
@mock.patch("google.cloud.storage.Bucket")
569595
@mock.patch(GCS_STRING.format("GCSHook.get_conn"))
570596
def test_create_bucket(self, mock_service, mock_bucket):

0 commit comments

Comments
 (0)