Skip to content

Commit b4b9cf5

Browse files
authored
Check for missing references to operator guides (#13059)
1 parent fa9c6b4 commit b4b9cf5

File tree

5 files changed

+72
-52
lines changed

5 files changed

+72
-52
lines changed

airflow/providers/google/cloud/operators/dataprep.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ class DataprepRunJobGroupOperator(BaseOperator):
106106
To get recipe_id please follow the Dataprep API documentation
107107
https://clouddataprep.com/documentation/api#operation/runJobGroup
108108
109+
.. seealso::
110+
For more information on how to use this operator, take a look at the guide:
111+
:ref:`howto/operator:DataprepRunJobGroupOperator`
112+
109113
:param recipe_id: The identifier for the recipe you would like to run.
110114
:type recipe_id: int
111115
"""

airflow/providers/google/cloud/transfers/mysql_to_gcs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
class MySQLToGCSOperator(BaseSQLToGCSOperator):
3434
"""Copy data from MySQL to Google Cloud Storage in JSON or CSV format.
3535
36+
.. seealso::
37+
For more information on how to use this operator, take a look at the guide:
38+
:ref:`howto/operator:MySQLToGCSOperator`
39+
3640
:param mysql_conn_id: Reference to a specific MySQL hook.
3741
:type mysql_conn_id: str
3842
:param ensure_utc: Ensure TIMESTAMP columns exported as UTC. If set to

airflow/providers/google/cloud/transfers/s3_to_gcs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class S3ToGCSOperator(S3ListOperator):
3131
Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage
3232
destination path.
3333
34+
.. seealso::
35+
For more information on how to use this operator, take a look at the guide:
36+
:ref:`howto/operator:S3ToGCSOperator`
37+
3438
:param bucket: The S3 bucket where to find the objects. (templated)
3539
:type bucket: str
3640
:param prefix: Prefix string which filters objects whose name begin with

docs/apache-airflow-providers-google/operators/transfer/s3_to_gcs.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
under the License.
1717
1818
19+
.. _howto/operator:S3ToGCSOperator:
1920

2021
Transfer Data from Amazon S3 to Google Cloud Storage
2122
====================================================
@@ -32,8 +33,6 @@ Prerequisite Tasks
3233

3334
.. include::/howto/operator/google/_partials/prerequisite_tasks.rst
3435
35-
.. _howto/operator:S3ToGCSOperator:
36-
3736
Use the :class:`~airflow.providers.google.cloud.transfers.s3_to_gcs.S3ToGCSOperator`
3837
to transfer data from Amazon S3 to Google Cloud Storage.
3938

docs/exts/docs_build/lint_checks.py

Lines changed: 59 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from itertools import chain
2323
from typing import Iterable, List, Optional, Set
2424

25+
from docs.exts.docs_build.docs_builder import ALL_PROVIDER_YAMLS
2526
from docs.exts.docs_build.errors import DocBuildError # pylint: disable=no-name-in-module
2627

2728
ROOT_PROJECT_DIR = os.path.abspath(
@@ -31,14 +32,14 @@
3132
DOCS_DIR = os.path.join(ROOT_PROJECT_DIR, "docs")
3233

3334

34-
def find_existing_guide_operator_names(src_dir: str) -> Set[str]:
35+
def find_existing_guide_operator_names(src_dir_pattern: str) -> Set[str]:
3536
"""
3637
Find names of existing operators.
3738
:return names of existing operators.
3839
"""
3940
operator_names = set()
4041

41-
paths = glob(f"{src_dir}/**/*.rst", recursive=True)
42+
paths = glob(src_dir_pattern, recursive=True)
4243
for path in paths:
4344
with open(path) as f:
4445
operator_names |= set(re.findall(".. _howto/operator:(.+?):", f.read()))
@@ -49,72 +50,80 @@ def find_existing_guide_operator_names(src_dir: str) -> Set[str]:
4950
def extract_ast_class_def_by_name(ast_tree, class_name):
5051
"""
5152
Extracts class definition by name
53+
5254
:param ast_tree: AST tree
5355
:param class_name: name of the class.
5456
:return: class node found
5557
"""
58+
for node in ast.walk(ast_tree):
59+
if isinstance(node, ast.ClassDef) and node.name == class_name:
60+
return node
5661

57-
class ClassVisitor(ast.NodeVisitor):
58-
"""Visitor."""
59-
60-
def __init__(self):
61-
self.found_class_node = None
62-
63-
def visit_ClassDef(self, node): # pylint: disable=invalid-name
64-
"""
65-
Visit class definition.
66-
:param node: node.
67-
:return:
68-
"""
69-
if node.name == class_name:
70-
self.found_class_node = node
62+
return None
7163

72-
visitor = ClassVisitor()
73-
visitor.visit(ast_tree)
7464

75-
return visitor.found_class_node
65+
def _generate_missing_guide_error(path, line_no, operator_name):
66+
return DocBuildError(
67+
file_path=path,
68+
line_no=line_no,
69+
message=(
70+
f"Link to the guide is missing in operator's description: {operator_name}.\n"
71+
f"Please add link to the guide to the description in the following form:\n"
72+
f"\n"
73+
f".. seealso::\n"
74+
f" For more information on how to use this operator, take a look at the guide:\n"
75+
f" :ref:`howto/operator:{operator_name}`\n"
76+
),
77+
)
7678

7779

7880
def check_guide_links_in_operator_descriptions() -> List[DocBuildError]:
7981
"""Check if there are links to guides in operator's descriptions."""
80-
# TODO: We should also check the guides in the provider documentations.
81-
# For now, we are only checking the core documentation.
82-
# This is easiest to do after the content has been fully migrated.
8382
build_errors = []
8483

85-
def generate_build_error(path, line_no, operator_name):
86-
return DocBuildError(
87-
file_path=path,
88-
line_no=line_no,
89-
message=(
90-
f"Link to the guide is missing in operator's description: {operator_name}.\n"
91-
f"Please add link to the guide to the description in the following form:\n"
92-
f"\n"
93-
f".. seealso::\n"
94-
f" For more information on how to use this operator, take a look at the guide:\n"
95-
f" :ref:`apache-airflow:howto/operator:{operator_name}`\n"
84+
build_errors.extend(
85+
_check_missing_guide_references(
86+
operator_names=find_existing_guide_operator_names(
87+
f"{DOCS_DIR}/apache-airflow/howto/operator/**/*.rst"
88+
),
89+
python_module_paths=chain(
90+
glob(f"{ROOT_PACKAGE_DIR}/operators/*.py"),
91+
glob(f"{ROOT_PACKAGE_DIR}/sensors/*.py"),
9692
),
9793
)
94+
)
9895

99-
# Extract operators for which there are existing .rst guides
100-
operator_names = find_existing_guide_operator_names(f"{DOCS_DIR}/howto/operator")
96+
for provider in ALL_PROVIDER_YAMLS:
97+
operator_names = {
98+
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators/**/*.rst"),
99+
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators.rst"),
100+
}
101+
102+
# Extract all potential python modules that can contain operators
103+
python_module_paths = chain(
104+
glob(f"{provider['package-dir']}/**/operators/*.py", recursive=True),
105+
glob(f"{provider['package-dir']}/**/sensors/*.py", recursive=True),
106+
glob(f"{provider['package-dir']}/**/transfers/*.py", recursive=True),
107+
)
101108

102-
# Extract all potential python modules that can contain operators
103-
python_module_paths = chain(
104-
glob(f"{ROOT_PACKAGE_DIR}/operators/*.py"),
105-
glob(f"{ROOT_PACKAGE_DIR}/sensors/*.py"),
106-
glob(f"{ROOT_PACKAGE_DIR}/providers/**/operators/*.py", recursive=True),
107-
glob(f"{ROOT_PACKAGE_DIR}/providers/**/sensors/*.py", recursive=True),
108-
glob(f"{ROOT_PACKAGE_DIR}/providers/**/transfers/*.py", recursive=True),
109-
)
109+
build_errors.extend(
110+
_check_missing_guide_references(
111+
operator_names=operator_names, python_module_paths=python_module_paths
112+
)
113+
)
114+
115+
return build_errors
116+
117+
118+
def _check_missing_guide_references(operator_names, python_module_paths) -> List[DocBuildError]:
119+
build_errors = []
110120

111121
for py_module_path in python_module_paths:
112122
with open(py_module_path) as f:
113123
py_content = f.read()
114124

115125
if "This module is deprecated" in py_content:
116126
continue
117-
118127
for existing_operator in operator_names:
119128
if f"class {existing_operator}" not in py_content:
120129
continue
@@ -130,12 +139,12 @@ def generate_build_error(path, line_no, operator_name):
130139
if "This class is deprecated." in docstring:
131140
continue
132141

133-
if f":ref:`apache-airflow:howto/operator:{existing_operator}`" in ast.get_docstring(
134-
class_def
135-
) or f":ref:`howto/operator:{existing_operator}`" in ast.get_docstring(class_def):
142+
if f":ref:`howto/operator:{existing_operator}`" in ast.get_docstring(class_def):
136143
continue
137144

138-
build_errors.append(generate_build_error(py_module_path, class_def.lineno, existing_operator))
145+
build_errors.append(
146+
_generate_missing_guide_error(py_module_path, class_def.lineno, existing_operator)
147+
)
139148
return build_errors
140149

141150

@@ -193,7 +202,7 @@ def find_modules(deprecated_only: bool = False) -> Set[str]:
193202

194203
def check_exampleinclude_for_example_dags() -> List[DocBuildError]:
195204
"""Checks all exampleincludes for example dags."""
196-
all_docs_files = glob(f"${DOCS_DIR}/**/*rst", recursive=True)
205+
all_docs_files = glob(f"${DOCS_DIR}/**/*.rst", recursive=True)
197206
build_errors = []
198207
for doc_file in all_docs_files:
199208
build_error = assert_file_not_contains(
@@ -211,7 +220,7 @@ def check_exampleinclude_for_example_dags() -> List[DocBuildError]:
211220

212221
def check_enforce_code_block() -> List[DocBuildError]:
213222
"""Checks all code:: blocks."""
214-
all_docs_files = glob(f"{DOCS_DIR}/**/*rst", recursive=True)
223+
all_docs_files = glob(f"{DOCS_DIR}/**/*.rst", recursive=True)
215224
build_errors = []
216225
for doc_file in all_docs_files:
217226
build_error = assert_file_not_contains(

0 commit comments

Comments
 (0)