Skip to content

Commit cc87ef4

Browse files
add a --query option to unify target selection
use the ast module to parse query expressions!! [ci skip-rust-tests] answer some review comments. per-file targets are breaking it now [ci skip-rust-tests] add do_not_generate_subtargets workaround for subtargets [ci skip-rust-tests] review feedback [ci skip-rust-tests] attempt to use @_uncacheable_rule [ci skip-rust-tests] fix @_uncacheable_rule fix error about git as RootRule add a lengthy TODO to fix query_rules() initialization [ci skip-rust-tests]
1 parent 22cc05f commit cc87ef4

7 files changed

Lines changed: 374 additions & 28 deletions

File tree

src/python/pants/engine/internals/graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ class OwnersRequest:
182182
"""A request for the owners of a set of file paths."""
183183

184184
sources: Tuple[str, ...]
185+
do_not_generate_subtargets: bool = False
185186

186187

187188
class Owners(Collection[Address]):
@@ -221,7 +222,7 @@ async def find_owners(owners_request: OwnersRequest) -> Owners:
221222
if bfa.rel_path not in sources_set and not matching_files:
222223
continue
223224
deleted_files_matched = bool(set(matching_files) - all_source_files)
224-
if deleted_files_matched:
225+
if deleted_files_matched or owners_request.do_not_generate_subtargets:
225226
original_addresses_due_to_deleted_files.add(candidate_tgt.address)
226227
continue
227228
# Else, we generate subtargets for greater precision. We use those subtargets, unless

src/python/pants/engine/query.py

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2+
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3+
4+
import ast
5+
from abc import ABC, abstractmethod
6+
from dataclasses import dataclass
7+
from typing import Any, Dict, Tuple, Type, TypeVar
8+
9+
from pants.build_graph.address import Address
10+
from pants.engine.collection import Collection
11+
from pants.engine.internals.graph import Owners, OwnersRequest
12+
from pants.engine.rules import RootRule, rule
13+
from pants.engine.selectors import Get
14+
from pants.engine.unions import UnionMembership, UnionRule, union
15+
from pants.scm.git import Git
16+
from pants.scm.subsystems.changed import (
17+
ChangedFiles,
18+
ChangedFilesRequest,
19+
ChangedOptions,
20+
ChangedAddresses,
21+
ChangedRequest,
22+
DependeesOption,
23+
)
24+
from pants.util.meta import classproperty
25+
from pants.util.strutil import safe_shlex_split
26+
27+
28+
@union
29+
class QueryComponent(ABC):
30+
31+
@classproperty
32+
@abstractmethod
33+
def function_name(cls):
34+
"""The initial argument of a shlexed query expression.
35+
36+
If the user provides --query='<name> <args...>' on the command line, and `<name>` matches this
37+
property, the .parse_from_args() method is invoked with `<args...>` (shlexed, so split by
38+
spaces).
39+
"""
40+
41+
@classmethod
42+
@abstractmethod
43+
def parse_from_args(cls, *args):
44+
"""Create an instance of this class from variadic positional string arguments.
45+
46+
This method should raise an error if the args are incorrect or invalid.
47+
"""
48+
49+
50+
class QueryAddresses(Collection[Address]):
51+
pass
52+
53+
54+
@dataclass(frozen=True)
55+
class OwnerOf(QueryComponent):
56+
files: Tuple[str]
57+
58+
function_name = 'owner_of'
59+
60+
@classmethod
61+
def parse_from_args(cls, *args):
62+
return cls(files=tuple([str(f) for f in args]))
63+
64+
65+
@rule
66+
async def owner_of_request(owner_of: OwnerOf) -> QueryAddresses:
67+
request = OwnersRequest(sources=owner_of.files, do_not_generate_subtargets=True)
68+
owners = await Get(Owners, OwnersRequest, request)
69+
return QueryAddresses(owners)
70+
71+
72+
@dataclass(frozen=True)
73+
class ChangesSince(QueryComponent):
74+
since: str
75+
dependees: DependeesOption
76+
77+
function_name = 'since'
78+
79+
@classmethod
80+
def parse_from_args(cls, since, dependees=DependeesOption.NONE):
81+
return cls(since=str(since),
82+
dependees=DependeesOption(dependees))
83+
84+
85+
@rule
86+
async def since_request(
87+
git: Git,
88+
since: ChangesSince,
89+
) -> QueryAddresses:
90+
changed_options = ChangedOptions(
91+
since=since.since,
92+
diffspec=None,
93+
dependees=since.dependees,
94+
)
95+
changed_files = await Get(ChangedFiles, ChangedFilesRequest(changed_options, git))
96+
changed = await Get(ChangedAddresses, ChangedRequest(
97+
sources=tuple(changed_files),
98+
dependees=changed_options.dependees,
99+
do_not_generate_subtargets=True,
100+
))
101+
return QueryAddresses(changed)
102+
103+
104+
@dataclass(frozen=True)
105+
class ChangesForDiffspec(QueryComponent):
106+
diffspec: str
107+
dependees: DependeesOption
108+
109+
function_name = 'changes_for_diffspec'
110+
111+
@classmethod
112+
def parse_from_args(cls, diffspec, dependees=DependeesOption.NONE):
113+
return cls(diffspec=str(diffspec),
114+
dependees=DependeesOption(dependees))
115+
116+
117+
@rule
118+
async def changes_for_diffspec_request(
119+
git: Git,
120+
changes_for_diffspec: ChangesForDiffspec,
121+
) -> QueryAddresses:
122+
changed_options = ChangedOptions(
123+
since=None,
124+
diffspec=changes_for_diffspec.diffspec,
125+
dependees=changes_for_diffspec.dependees,
126+
)
127+
changed_files = await Get(ChangedFiles, ChangedFilesRequest(changed_options, git))
128+
changed = await Get(ChangedAddresses, ChangedRequest(
129+
sources=tuple(changed_files),
130+
dependees=changed_options.dependees,
131+
do_not_generate_subtargets=True,
132+
))
133+
return QueryAddresses(changed)
134+
135+
136+
_T = TypeVar('_T', bound=QueryComponent)
137+
138+
139+
@dataclass(frozen=True)
140+
class KnownQueryExpressions:
141+
components: Dict[str, Type[_T]]
142+
143+
144+
@rule
145+
def known_query_expressions(union_membership: UnionMembership) -> KnownQueryExpressions:
146+
return KnownQueryExpressions({
147+
union_member.function_name: union_member
148+
for union_member in union_membership[QueryComponent]
149+
})
150+
151+
152+
@dataclass(frozen=True)
153+
class QueryParseInput:
154+
expr: str
155+
156+
157+
class QueryParseError(Exception): pass
158+
159+
160+
@dataclass(frozen=True)
161+
class QueryComponentWrapper:
162+
underlying: _T
163+
164+
165+
@dataclass(frozen=True)
166+
class ParsedPythonesqueFunctionCall:
167+
"""Representation of a limited form of python named function calls."""
168+
function_name: str
169+
positional_args: Tuple[Any, ...]
170+
keyword_args: Dict[str, Any]
171+
172+
173+
def _parse_python_arg(arg_value: ast.AST) -> Any:
174+
"""Convert an AST node for the argument of a function call into its literal value."""
175+
return ast.literal_eval(arg_value)
176+
177+
178+
def _parse_python_esque_function_call(expr: str) -> ParsedPythonesqueFunctionCall:
179+
"""Parse a string into a description of a python function call expression."""
180+
try:
181+
query_expression = ast.parse(expr).body[0].value
182+
except Exception as e:
183+
raise QueryParseError(f'Error parsing query expression: {e}') from e
184+
185+
if not isinstance(query_expression, ast.Call):
186+
type_name = type(query_expression).__name__
187+
raise QueryParseError(
188+
f'Query expression must be a single function call, but received {type_name}: '
189+
f'{ast.dump(query_expression)}.')
190+
191+
func_expr = query_expression.func
192+
if not isinstance(func_expr, ast.Name):
193+
raise QueryParseError('Function call in query expression should just be a name, but '
194+
f'received {type(func_expr).__name__}: {ast.dump(func_expr)}.')
195+
function_name = func_expr.id
196+
197+
positional_args = [_parse_python_arg(x) for x in query_expression.args]
198+
keyword_args = {
199+
k.arg: _parse_python_arg(k.value)
200+
for k in query_expression.keywords
201+
}
202+
203+
return ParsedPythonesqueFunctionCall(
204+
function_name=function_name,
205+
positional_args=positional_args,
206+
keyword_args=keyword_args,
207+
)
208+
209+
210+
# TODO: allow returning an @union to avoid having to use this QueryComponentWrapper for type
211+
# erasure.
212+
@rule
213+
def parse_query_expr(s: QueryParseInput, known: KnownQueryExpressions) -> QueryComponentWrapper:
214+
"""Parse the input string and attempt to find a query function matching the function call.
215+
216+
:return: A query component which can be resolved into `BuildFileAddresses` in the v2 engine.
217+
"""
218+
try:
219+
parsed_function_call = _parse_python_esque_function_call(s.expr)
220+
except Exception as e:
221+
raise QueryParseError(f'Error parsing expression {s}: {e}.') from e
222+
223+
name = parsed_function_call.function_name
224+
args = parsed_function_call.positional_args
225+
kwargs = parsed_function_call.keyword_args
226+
227+
selected_function = known.components.get(name, None)
228+
if selected_function:
229+
return QueryComponentWrapper(selected_function.parse_from_args(*args, **kwargs))
230+
else:
231+
raise QueryParseError(
232+
f'Query function with name {name} not found (in expr {s})! '
233+
f'The known functions are: {known}.')
234+
235+
236+
def rules():
237+
return [
238+
RootRule(OwnerOf),
239+
RootRule(ChangesSince),
240+
RootRule(QueryParseInput),
241+
RootRule(ChangesForDiffspec),
242+
known_query_expressions,
243+
UnionRule(QueryComponent, OwnerOf),
244+
UnionRule(QueryComponent, ChangesSince),
245+
UnionRule(QueryComponent, ChangesForDiffspec),
246+
owner_of_request,
247+
since_request,
248+
changes_for_diffspec_request,
249+
parse_query_expr,
250+
]

src/python/pants/init/extension_loader.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from pants.base.exceptions import BackendConfigurationError
1111
from pants.build_graph.build_configuration import BuildConfiguration
12+
from pants.engine.query import rules as query_rules
1213
from pants.util.ordered_set import FrozenOrderedSet
1314

1415

@@ -113,6 +114,13 @@ def load_build_configuration_from_source(
113114
for backend_package in backend_packages:
114115
load_backend(build_configuration, backend_package)
115116

117+
# TODO: query_rules() needs to be registered here instead of in engine_initializer.py because it
118+
# declares @union members, which are only loaded into UnionMembership when the
119+
# BuildConfiguration is first created (it's now frozen after that). Since --query requires
120+
# @union members to be registered in UnionMembership to work, it has to be declared here for now
121+
# until we can add union rules after the fact.
122+
build_configuration.register_rules(query_rules())
123+
116124

117125
def load_backend(build_configuration: BuildConfiguration.Builder, backend_package: str) -> None:
118126
"""Installs the given backend package into the build configuration.

src/python/pants/init/specs_calculator.py

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,18 @@
1717
Specs,
1818
)
1919
from pants.engine.internals.scheduler import SchedulerSession
20+
from pants.engine.query import QueryAddresses, QueryComponentWrapper, QueryParseInput
2021
from pants.engine.selectors import Params
2122
from pants.option.options import Options
2223
from pants.option.options_bootstrapper import OptionsBootstrapper
23-
from pants.scm.subsystems.changed import ChangedAddresses, ChangedOptions, ChangedRequest
24+
from pants.scm.git import Git
25+
from pants.scm.subsystems.changed import (
26+
ChangedAddresses,
27+
ChangedFiles,
28+
ChangedFilesRequest,
29+
ChangedOptions,
30+
ChangedRequest,
31+
)
2432
from pants.util.ordered_set import OrderedSet
2533

2634
logger = logging.getLogger(__name__)
@@ -74,17 +82,25 @@ def create(
7482
exclude_patterns: Optional[Iterable[str]] = None,
7583
tags: Optional[Iterable[str]] = None,
7684
) -> Specs:
85+
# Determine the literal specs.
7786
specs = cls.parse_specs(
7887
raw_specs=options.specs,
7988
build_root=build_root,
8089
exclude_patterns=exclude_patterns,
8190
tags=tags,
8291
)
8392

93+
# Determine `Changed` arguments directly from options to support pre-`Subsystem`
94+
# initialization paths.
8495
changed_options = ChangedOptions.from_options(options.for_scope("changed"))
8596

97+
# Parse --query expressions into objects which can be resolved into BuildFileAddresses via
98+
# v2 rules.
99+
query_expr_strings = options.for_global_scope().query
100+
86101
logger.debug("specs are: %s", specs)
87102
logger.debug("changed_options are: %s", changed_options)
103+
logger.debug("query exprs are: %s", query_expr_strings)
88104

89105
if specs.provided and changed_options.provided:
90106
changed_name = "--changed-since" if changed_options.since else "--changed-diffspec"
@@ -99,17 +115,19 @@ def create(
99115
"use only one."
100116
)
101117

102-
if not changed_options.provided:
118+
if not (changed_options.provided or query_expr_strings):
103119
return specs
104120

105-
scm = get_scm()
106-
if not scm:
121+
git = get_scm()
122+
if not git:
107123
raise InvalidSpecConstraint(
108-
"The `--changed-*` options are not available without a recognized SCM (usually "
109-
"Git)."
124+
"{} are not available without a recognized SCM (currently just git)."
110125
)
126+
assert isinstance(git, Git)
127+
(changed_files,) = session.product_request(ChangedFiles, [
128+
Params(ChangedFilesRequest(changed_options, git=git))])
111129
changed_request = ChangedRequest(
112-
sources=tuple(changed_options.changed_files(scm=scm)),
130+
sources=tuple(changed_files),
113131
dependees=changed_options.dependees,
114132
)
115133
(changed_addresses,) = session.product_request(
@@ -125,6 +143,36 @@ def create(
125143
filesystem_specs.append(FilesystemLiteralSpec(file_name))
126144
else:
127145
address_specs.append(SingleAddress(address.spec_path, address.target_name))
146+
147+
148+
if query_expr_strings:
149+
# TODO(#7346): deprecate --owner-of and --changed-* in favor of --query versions, allow
150+
# pipelining of successive query expressions with the command-line target specs as the
151+
# initial input!
152+
if len(query_expr_strings) > 1:
153+
raise ValueError("Only one --query argument is currently supported! "
154+
f"Received: {query_expr_strings}.")
155+
156+
# TODO: allow returning @union types to avoid this double synchronous engine invocation!
157+
exprs = session.product_request(
158+
QueryComponentWrapper, [QueryParseInput(s) for s in query_expr_strings]
159+
)
160+
exprs = [ex.underlying for ex in exprs]
161+
162+
(expr_addresses,) = session.product_request(
163+
QueryAddresses, [Params(git, exprs[0], options_bootstrapper)]
164+
)
165+
logger.debug("expr addresses: %s", expr_addresses)
166+
dependencies = tuple(
167+
SingleAddress(a.spec_path, a.target_name) for a in expr_addresses
168+
)
169+
return Specs(
170+
address_specs=AddressSpecs(
171+
dependencies=dependencies, exclude_patterns=exclude_patterns, tags=tags
172+
),
173+
filesystem_specs=FilesystemSpecs(filesystem_specs),
174+
)
175+
128176
return Specs(
129177
address_specs=AddressSpecs(
130178
address_specs, exclude_patterns=exclude_patterns, tags=tags,

0 commit comments

Comments
 (0)