Skip to content

Commit 84f4e79

Browse files
feat: allow to mark input files of rules as ancient via the API or command line interface (and thereby also via workflow specific profiles). Putting this into a workflow specific profile (or specifying as argument) allows to overrule rerun triggers caused by file modification dates where the user knows better.
1 parent c98b2e7 commit 84f4e79

3 files changed

Lines changed: 63 additions & 12 deletions

File tree

snakemake/cli.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
__email__ = "johannes.koester@uni-due.de"
44
__license__ = "MIT"
55

6+
from collections import defaultdict
67
import os
78
import re
89
import sys
@@ -102,6 +103,32 @@ def fallback(orig_value):
102103
)
103104

104105

106+
def parse_consider_ancient(args):
107+
errmsg = (
108+
"Invalid --consider-ancient definition: entries have to be defined as "
109+
"RULE=INPUTITEMS pairs, with INPUTITEMS being a list of input items of the "
110+
"rule (given as name or index (0-based)), separated by commas."
111+
)
112+
113+
def parse_item(item):
114+
try:
115+
return int(item)
116+
except ValueError:
117+
if item.isidentifier():
118+
return item
119+
else:
120+
raise ValueError(f"{errmsg} (Unparsable value: {repr(item)})")
121+
122+
consider_ancient = defaultdict(set)
123+
124+
if args is not None:
125+
for entry in args:
126+
rule, items = parse_key_value_arg(entry, errmsg=errmsg, strip_quotes=True)
127+
items = items.split(",")
128+
consider_ancient[rule] = {parse_item(item) for item in items}
129+
return consider_ancient
130+
131+
105132
def parse_set_resources(args):
106133
errmsg = (
107134
"Invalid resource definition: entries have to be defined as RULE:RESOURCE=VALUE, with "
@@ -730,6 +757,19 @@ def get_argument_parser(profiles=None):
730757
"output in your workflow updated."
731758
),
732759
)
760+
group_exec.add_argument(
761+
"--consider-ancient",
762+
metavar="RULE=INPUTITEMS",
763+
nargs="+",
764+
default=dict(),
765+
parse_func=parse_consider_ancient,
766+
help="Consider given input items of given rules as ancient, i.e. not triggering "
767+
"re-runs if they are newer than the output files. "
768+
"Putting this into a workflow specific profile (or specifying as argument) "
769+
"allows to overrule rerun triggers caused by file modification dates where the "
770+
"user knows better.",
771+
)
772+
733773
group_exec.add_argument(
734774
"--prioritize",
735775
"-P",

snakemake/rules.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,15 @@ def set_input(self, *input, **kwinput):
272272
Arguments
273273
input -- the list of input files
274274
"""
275-
for item in input:
276-
self._set_inoutput_item(item)
275+
276+
consider_ancient = self.workflow.dag_settings.consider_ancient.get(self.name)
277+
278+
for i, item in enumerate(input):
279+
self._set_inoutput_item(item, mark_ancient=i)
277280
for name, item in kwinput.items():
278-
self._set_inoutput_item(item, name=name)
281+
self._set_inoutput_item(
282+
item, name=name, mark_ancient=name in consider_ancient
283+
)
279284

280285
@property
281286
def output(self):
@@ -400,7 +405,7 @@ def _update_item_wildcard_constraints(self, item):
400405
except ValueError as e:
401406
raise WorkflowError(e, snakefile=self.snakefile, lineno=self.lineno)
402407

403-
def _set_inoutput_item(self, item, output=False, name=None):
408+
def _set_inoutput_item(self, item, output=False, name=None, mark_ancient=False):
404409
"""
405410
Set an item to be input or output.
406411
@@ -462,12 +467,17 @@ def _set_inoutput_item(self, item, output=False, name=None):
462467
)
463468
)
464469

465-
# add the rule to the dependencies
466470
if rule_dependency is not None:
471+
# add the rule to the dependencies
467472
self.dependencies[item] = rule_dependency
473+
468474
if output:
469475
item = self._update_item_wildcard_constraints(item)
476+
if self.workflow.storage_settings.all_temp:
477+
# mark as temp if all output files shall be marked as temp
478+
item = flag(item, "temp")
470479
else:
480+
# input
471481
if (
472482
contains_wildcard_constraints(item)
473483
and self.workflow.exec_mode != ExecMode.SUBPROCESS
@@ -477,10 +487,8 @@ def _set_inoutput_item(self, item, output=False, name=None):
477487
self
478488
)
479489
)
480-
481-
if self.workflow.storage_settings.all_temp and output:
482-
# mark as temp if all output files shall be marked as temp
483-
item = flag(item, "temp")
490+
if mark_ancient:
491+
item = flag(item, "ancient")
484492

485493
# record rule if this is an output file output
486494
_item = IOFile(item, rule=self)
@@ -520,8 +528,8 @@ def _set_inoutput_item(self, item, output=False, name=None):
520528
else:
521529
try:
522530
start = len(inoutput)
523-
for i in item:
524-
self._set_inoutput_item(i, output=output)
531+
for subitem in item:
532+
self._set_inoutput_item(subitem, output=output)
525533
if name:
526534
# if the list was named, make it accessible
527535
inoutput._set_name(name, start, end=len(inoutput))

snakemake/settings/types.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
from pathlib import Path
55
import re
6-
from typing import Any, Optional
6+
from typing import Any, List, Optional, Union
77
from typing import Mapping, Sequence, Set
88

99
import immutables
@@ -199,6 +199,9 @@ class DAGSettings(SettingsBase):
199199
allowed_rules: AnySet[str] = frozenset()
200200
rerun_triggers: AnySet[RerunTrigger] = RerunTrigger.all()
201201
max_inventory_wait_time: int = 20
202+
consider_ancient: Mapping[str, AnySet[Union[str, int]]] = field(
203+
default_factory=dict
204+
)
202205

203206
def _check(self):
204207
if self.batch is not None and self.forceall:

0 commit comments

Comments
 (0)