Replace `np.isnan` with `math.isnan` by kAIto47802 · Pull Request #6080 · optuna/optuna

kAIto47802 · 2025-05-09T10:59:07Z

Motivation

Currently, np.isnan is used to detect NaN in distributions.py, but since every value passed through is a pure Python float, math.isnan is semantically identical and much faster.
This is especially critical for samplers such as TPESampler and BruteForceSampler, which invoke to_internal_repr $N$ times per trial, resulting in $O(N^2)$ total calls, where $N$ is the total number of trials.

Description of the changes

Replace np.isnan in distributions.py with math.isnan.

Benchmarking results

I benchmarked TPESampler and BruteForceSampler and confirmed a significant speedup with this change:)

The result is as follows:

The solid lines denote the mean and the translucent areas denote the standard error, both computed over five independent runs with different random seeds.

The Objective function I used here is as follows:

def objective(trial: optuna.Trial) -> float:
    _x = trial.suggest_float("x", 0.0, 10.0, step=0.1)
    _y = trial.suggest_int("y", 0, 10)
    return 0.0

The benchmarking code and the visualization code I used here is as follows:

Benchmarking code

from argparse import ArgumentParser, Namespace
from datetime import datetime
from typing import cast

import numpy as np
import optuna


def objective(trial: optuna.Trial) -> float:
    _x = trial.suggest_float("x", 0.0, 10.0, step=0.1)
    _y = trial.suggest_int("y", 0, 10)
    return 0.0


def _extract_elapsed_time(study: optuna.study.Study) -> list[float]:
    return [
        (
            cast(datetime, t.datetime_complete) - cast(datetime, study.trials[0].datetime_start)
        ).total_seconds()
        for t in study.trials
    ]


def _measure_time(
    sampler: optuna.samplers.BaseSampler,
    n_trials: int = 100,
) -> list[float]:
    study = optuna.create_study(sampler=sampler)
    study.optimize(objective, n_trials=n_trials)
    return _extract_elapsed_time(study)


def main(args: Namespace) -> None:
    samplers = {
        "tpe": optuna.samplers.TPESampler,
        "brute-force": optuna.samplers.BruteForceSampler,
    }

    data = {
        k: np.array(
            [
                _measure_time(sampler=sampler_cls(seed=42 + s), n_trials=args.n_trials)
                for s in range(args.n_seeds)
            ]
        )
        for k, sampler_cls in samplers.items()
    }

    mean = {k: np.mean(v, axis=0) for k, v in data.items()}
    std = {k: np.std(v, axis=0) for k, v in data.items()}

    np.savez(f"mean{args.suffix}.npz", **mean)
    np.savez(f"std{args.suffix}.npz", **std)
    np.savez(
        f"experimental_settings{args.suffix}.npz",
        n_trials=args.n_trials,
        n_seeds=args.n_seeds,
    )


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "--n_trials",
        type=int,
        default=100,
        help="Number of trials to run for each sampler.",
    )
    parser.add_argument(
        "--n_seeds",
        type=int,
        default=5,
        help="Number of seeds to use for each sampler.",
    )
    parser.add_argument(
        "--suffix",
        type=str,
        default="",
        help="Suffix to append to the output file names.",
    )
    args = parser.parse_args()

    main(args)

Visualization code

import numpy as np
import plotly.graph_objects as go


def plot_data(
    mean1: dict[str, np.ndarray],
    std1: dict[str, np.ndarray],
    mean2: dict[str, np.ndarray],
    std2: dict[str, np.ndarray],
    n_trials: int,
    n_seeds: int,
    colors: list[str],
    linestyles: dict[str, str],
    names: dict[str, str],
    labels: list[str],
    xlabel: str,
    ylabel: str,
) -> go.Figure:
    fig = go.Figure()
    dx = np.arange(n_trials) + 1
    for color, label, mean, std in zip(colors, labels, [mean1, mean2], [std1, std2]):
        for name in mean.keys():
            for show_legend, sign in zip([False, True], [1, -1]):
                fig.add_trace(
                    go.Scatter(
                        x=dx,
                        y=mean[name],
                        mode="lines",
                        name=names[name] + " " + label,
                        line=dict(color=color, dash=linestyles[name], width=2),
                        showlegend=show_legend,
                    )
                )
                fig.add_trace(
                    go.Scatter(
                        x=dx,
                        y=mean[name] + sign * std[name] / np.sqrt(n_seeds),
                        mode="lines",
                        fill="tonexty",
                        line=dict(color=color, width=0),
                        showlegend=False,
                    )
                )
    fig.update_layout(
        xaxis_title=xlabel,
        yaxis_title=ylabel,
        width=800,
        height=500,
        template="simple_white",
        font=dict(family="Computer Modern", size=16),
        legend=dict(
            x=0.01,
            y=0.95,
            xanchor="left",
            yanchor="top",
            bordercolor="black",
            borderwidth=1,
            tracegroupgap=0,
        ),
    )
    fig.update_xaxes(
        showgrid=True,
        mirror=True,
        gridcolor="gray",
        griddash="solid",
        range=[0, None],
    )
    fig.update_yaxes(
        showgrid=True,
        mirror=True,
        gridcolor="gray",
        griddash="solid",
        range=[0, None],
    )

    return fig


if __name__ == "__main__":
    mean1 = np.load("mean1.npz")
    std1 = np.load("std1.npz")
    mean2 = np.load("mean2.npz")
    std2 = np.load("std2.npz")

    experimental_settings1 = np.load("experimental_settings1.npz")
    experimental_settings2 = np.load("experimental_settings2.npz")
    assert experimental_settings1 == experimental_settings2

    colors = ["#0072B2", "#CC79A7"]
    linestyles = {"tpe": "solid", "brute-force": "dash"}
    names = {"tpe": "TPE", "brute-force": "Brute Force"}
    labels = ["(With this PR)", "(Original)"]
    xlabel = "Number of Trials"
    ylabel = "Elapsed Time / s"

    fig = plot_data(
        mean1=mean1,
        std1=std1,
        mean2=mean2,
        std2=std2,
        n_trials=experimental_settings1["n_trials"],
        n_seeds=experimental_settings1["n_seeds"],
        colors=colors,
        linestyles=linestyles,
        names=names,
        labels=labels,
        xlabel=xlabel,
        ylabel=ylabel,
    )
    fig.write_image("benchmark_time.png")

c-bata · 2025-05-13T03:37:00Z

@fusawa-yugo @y0z @not522 Could you review this PR?

fusawa-yugo · 2025-05-13T06:52:06Z

Thank you for the PR!!
LGTM!!

I could replicate the result using your testcode (n_trials=100, n_seeds=5).

And, I checked that math.isnan was significantly faster than np.isnan.

Code

import numpy as np
import math
import timeit

import matplotlib.pyplot as plt

def compare_isnan_speed():
    nan = float('nan')
    n_repeats = 1000000

    np_times = []
    for _ in range(10):
        start = timeit.default_timer()
        for _ in range(n_repeats):
            np.isnan(nan)
        end = timeit.default_timer()
        np_times.append(end - start)

    math_times = []
    for _ in range(10):
        start = timeit.default_timer()
        for _ in range(n_repeats):
            math.isnan(nan)
        end = timeit.default_timer()
        math_times.append(end - start)

    labels = ['np.isnan', 'math.isnan']
    avg_times = [np.mean(np_times), np.mean(math_times)]

    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.bar(labels, avg_times, color=['blue', 'orange'])
    plt.ylabel('Time (seconds)')
    plt.title('Average Time Comparison')

    plt.subplot(1, 2, 2)
    plt.plot(range(1, 11), np_times, label='np.isnan', marker='o', color='blue')
    plt.plot(range(1, 11), math_times, label='math.isnan', marker='o', color='orange')
    plt.xlabel('Trial')
    plt.ylabel('Time (seconds)')
    plt.title('Time per Trial')
    plt.legend()

    plt.tight_layout()
    plt.savefig('isnan_speed_comparison.png')

if __name__ == "__main__":
    compare_isnan_speed()

not522

LGTM! I confirmed that it works with Real values such as np.float32.

import numpy as np
import optuna

def objective(trial):
    x = trial.suggest_float("x", np.float32(-10.0), np.float32(10.0))
    y = trial.suggest_int("y", np.float32(-10.0), np.float32(10.0))
    z = trial.suggest_categorical("z", choices=(np.float32(-10.0), np.float32(10.0)))
    return x + y + z

study = optuna.create_study()
study.optimize(objective, n_trials=20)

y0z

LGTM.

I confirmed that this PR's implementation is faster than master when n_trials=1000.

yozaki% git checkout master
Switched to branch 'master'
Your branch is up to date with 'origin/master'.
yozaki% time PYTHONPATH=. python tpe_test.py
PYTHONPATH=. python tpe_test.py  8.60s user 0.41s system 107% cpu 8.410 total
yozaki% gh pr checkout 6080
Switched to branch 'replace-numpy-isnan-with-math-isnan'
yozaki% time PYTHONPATH=. python tpe_test.py
PYTHONPATH=. python tpe_test.py  7.53s user 0.43s system 118% cpu 6.717 total

kAIto47802 added 2 commits May 9, 2025 18:52

Replace np.isnan with math.isnan

2a31051

Remove unused import

cf0c981

kAIto47802 changed the title ~~Replace np.isnan with math.isnan~~ Replace np.isnan with math.isnan May 9, 2025

c-bata assigned not522 and y0z May 13, 2025

c-bata added the code-fix Change that does not change the behavior, such as code refactoring. label May 13, 2025

not522 approved these changes May 13, 2025

View reviewed changes

not522 removed their assignment May 13, 2025

y0z approved these changes May 13, 2025

View reviewed changes

y0z added this to the v4.4.0 milestone May 13, 2025

y0z merged commit 2a7f084 into optuna:master May 13, 2025
14 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Replace `np.isnan` with `math.isnan`#6080

Replace `np.isnan` with `math.isnan`#6080
y0z merged 2 commits intooptuna:masterfrom
kAIto47802:replace-numpy-isnan-with-math-isnan

kAIto47802 commented May 9, 2025 •

edited

Loading

Uh oh!

c-bata commented May 13, 2025

Uh oh!

fusawa-yugo commented May 13, 2025 •

edited

Loading

Uh oh!

not522 left a comment

Uh oh!

y0z left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Uh oh!

Conversation

kAIto47802 commented May 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Motivation

Description of the changes

Benchmarking results

Uh oh!

c-bata commented May 13, 2025

Uh oh!

fusawa-yugo commented May 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

not522 left a comment

Choose a reason for hiding this comment

Uh oh!

y0z left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

kAIto47802 commented May 9, 2025 •

edited

Loading

fusawa-yugo commented May 13, 2025 •

edited

Loading