Skip to content

CI fails frequently after introducing pytest-xdist #6206

@nabenabe0928

Description

@nabenabe0928

Expected behavior

I will update the issue description whenever I find another failure.

In principle, the CI sometimes fails due to the parallel run by the following PR:

Environment

CI environment

Error messages, stack traces, or logs

test_get_timeline_plot_with_killed_running_trials
____ test_get_timeline_plot_with_killed_running_trials[0.0-plot_timeline0] _____
[gw7] linux -- Python 3.8.18 /opt/hostedtoolcache/Python/3.8.18/x64/bin/python

plot_timeline = <function plot_timeline at 0x7ff526abbb80>, waiting_time = 0.0

    @parametrize_plot_timeline
    @pytest.mark.parametrize("waiting_time", [0.0, 1.5])
    def test_get_timeline_plot_with_killed_running_trials(
        plot_timeline: Callable[..., Any], waiting_time: float
    ) -> None:
        def _objective_with_sleep(trial: optuna.Trial) -> float:
            sleep_start_datetime = datetime.datetime.now()
            # Spin waiting is used here because high accuracy is necessary even in weak VM.
            # Please check the motivation of the bugfix in https://github.com/optuna/optuna/pull/5549/
            while datetime.datetime.now() - sleep_start_datetime < datetime.timedelta(seconds=0.1):
                # `sleep(0.1)` is only guaranteed to rest for more than 0.1 second; the actual time
                # depends on the OS. spin waiting is used here to rest for 0.1 second as precisely as
                # possible without voluntarily releasing the context.
                pass
            assert datetime.datetime.now() - sleep_start_datetime < datetime.timedelta(seconds=0.19)
            trial.suggest_float("x", -1.0, 1.0)
            return 1.0
    
        study = optuna.create_study()
        trial = optuna.trial.create_trial(
            params={"x": 0.0},
            distributions={"x": optuna.distributions.FloatDistribution(-1.0, 1.0)},
            value=None,
            state=TrialState.RUNNING,
        )
        study.add_trial(trial)
>       study.optimize(_objective_with_sleep, n_trials=2)

tests/visualization_tests/test_timeline.py:221: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/optuna/study/study.py:490: in optimize
    _optimize(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/optuna/study/_optimize.py:63: in _optimize
    _optimize_sequential(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/optuna/study/_optimize.py:160: in _optimize_sequential
    frozen_trial_id = _run_trial(study, func, catch)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/optuna/study/_optimize.py:258: in _run_trial
    raise func_err
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/optuna/study/_optimize.py:201: in _run_trial
    value_or_values = func(trial)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

trial = <optuna.trial._trial.Trial object at 0x7ff523f32d00>

    def _objective_with_sleep(trial: optuna.Trial) -> float:
        sleep_start_datetime = datetime.datetime.now()
        # Spin waiting is used here because high accuracy is necessary even in weak VM.
        # Please check the motivation of the bugfix in https://github.com/optuna/optuna/pull/5549/
        while datetime.datetime.now() - sleep_start_datetime < datetime.timedelta(seconds=0.1):
            # `sleep(0.1)` is only guaranteed to rest for more than 0.1 second; the actual time
            # depends on the OS. spin waiting is used here to rest for 0.1 second as precisely as
            # possible without voluntarily releasing the context.
            pass
>       assert datetime.datetime.now() - sleep_start_datetime < datetime.timedelta(seconds=0.19)
E       AssertionError: assert (datetime.datetime(2025, 7, 16, 18, 15, 55, 98863) - datetime.datetime(2025, 7, 16, 18, 15, 54, 907173)) < datetime.timedelta(microseconds=190000)
E        +  where datetime.datetime(2025, 7, 16, 18, 15, 55, 98863) = <built-in method now of type object at 0x7ff52ea36d40>()
E        +    where <built-in method now of type object at 0x7ff52ea36d40> = <class 'datetime.datetime'>.now
E        +      where <class 'datetime.datetime'> = datetime.datetime
E        +  and   datetime.timedelta(microseconds=190000) = <class 'datetime.timedelta'>(seconds=0.19)
E        +    where <class 'datetime.timedelta'> = datetime.timedelta

tests/visualization_tests/test_timeline.py:209: AssertionError
tests/samplers_tests/test_brute_force.py

[!NOTE]
This failure is already reported at:

______________________ test_parallel_optimize_with_sleep _______________________
[gw3] linux -- Python 3.8.18 /opt/hostedtoolcache/Python/3.8.18/x64/bin/python

    def test_parallel_optimize_with_sleep() -> None:
        def objective(trial: Trial) -> float:
            x = trial.suggest_int("x", 0, 1)
            time.sleep(x)
            y = trial.suggest_int("y", 0, 1)
            return x + y
    
        # Seed is fixed to reproduce the same result.
        # See: https://github.com/optuna/optuna/issues/5780
        study = optuna.create_study(sampler=samplers.BruteForceSampler(seed=42))
        study.optimize(objective, n_jobs=2)
        expected_suggested_values = [
            {"x": 0, "y": 0},
            {"x": 0, "y": 1},
            {"x": 1, "y": 0},
        ]
        all_suggested_values = [t.params for t in study.trials]
>       assert len(all_suggested_values) == len(expected_suggested_values)
E       AssertionError: assert 4 == 3
E        +  where 4 = len([{'x': 0, 'y': 1}, {'x': 1, 'y': 0}, {'x': 1, 'y': 1}, {'x': 0, 'y': 0}])
E        +  and   3 = len([{'x': 0, 'y': 0}, {'x': 0, 'y': 1}, {'x': 1, 'y': 0}])

tests/samplers_tests/test_brute_force.py:324: AssertionError
tests/test_experimental.py
_______________________ test_experimental_func_decorator _______________________
[gw0] linux -- Python 3.13.5 /opt/hostedtoolcache/Python/3.13.5/x64/bin/python

    def test_experimental_func_decorator() -> None:
        version = "1.1.0"
        decorator_experimental = _experimental.experimental_func(version)
        assert callable(decorator_experimental)
    
        decorated_func = decorator_experimental(_sample_func)
        assert decorated_func.__name__ == _sample_func.__name__
        assert decorated_func.__doc__ == _experimental._EXPERIMENTAL_NOTE_TEMPLATE.format(ver=version)
    
        with pytest.warns(ExperimentalWarning) as warnings:
            decorated_func()
    
>       (warning,) = warnings
        ^^^^^^^^^^
E       ValueError: too many values to unpack (expected 1)

tests/test_experimental.py:61: ValueError
(Resolved) test_log_completed_trial_skip_storage_access
_________________ test_log_completed_trial_skip_storage_access _________________
[gw4] linux -- Python 3.9.23 /opt/hostedtoolcache/Python/3.9.23/x64/bin/python

    def test_log_completed_trial_skip_storage_access() -> None:
        study = create_study()
    
        # Create a trial to retrieve it as the `study.best_trial`.
        study.optimize(lambda _: 0.0, n_trials=1)
        frozen_trial = study.best_trial
    
        storage = study._storage
    
        with patch.object(storage, "get_best_trial", wraps=storage.get_best_trial) as mock_object:
            study._log_completed_trial(frozen_trial.values, frozen_trial.number, frozen_trial.params)
>           assert mock_object.call_count == 1
E           AssertionError: assert 0 == 1
E            +  where 0 = <MagicMock name='get_best_trial' id='140017043141632'>.call_count

tests/study_tests/test_study.py:1154: AssertionError

Steps to reproduce

Run the CI many times.
Some tests fail stochastically.

Additional context (optional)

No response

Metadata

Metadata

Assignees

Labels

bugIssue/PR about behavior that is broken. Not for typos/examples/CI/test but for Optuna itself.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions