import torch
from torch import nn
import numpy as np
import onnxruntime as ort
arr = np.random.randn(16, 10).astype(np.float32)
arr[0, :] = np.nan
def run_onnx_inference(sessions, inputs_by_session) -> "np.ndarray":
ort_outputs = []
for sess, inputs in zip(sessions, inputs_by_session, strict=True):
ort_inputs = {k.name: arr for k, arr in zip(sess.get_inputs(), inputs, strict=True)}
ort_outputs.append(np.hstack(sess.run(None, ort_inputs)))
return np.hstack(ort_outputs)
class ONNXModel(nn.Module):
def forward(self, x):
lower = torch.tensor([-10.0] * x.shape[1])
upper = torch.tensor([10.0] * x.shape[1])
x = x.clip(lower, upper)
x[torch.isnan(x)] = 0.0
return x
export_model = ONNXModel()
filename = "test.onnx"
torch.onnx.export(
export_model,
torch.randn(16, 10),
filename,
verbose=False,
export_params=True,
opset_version=17,
do_constant_folding=True,
input_names=["input"],
output_names=["output"],
dynamic_axes={"input": [0], "output": [0]},
)
onnx_gpu_model = ort.InferenceSession(filename, providers=["CUDAExecutionProvider"])
onnx_gpu = run_onnx_inference([onnx_gpu_model], [[arr]])
onnx_cpu_model = ort.InferenceSession(filename, providers=["CPUExecutionProvider"])
onnx_cpu = run_onnx_inference([onnx_cpu_model], [[arr]])
diff = onnx_cpu - onnx_gpu
array([[10., 10., 10., 10., 10., 10., 10., 10., 10., 10.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
Bug Report
Describe the bug
Exporting a torch model with a
clipoperation results in different behaviour with NaN values when inferencing with ONNX CPU vs GPU.System information
Reproduction instructions
This outputs
It looks like in the CPU case,
clip(NaN, -value, value)returnsNaNbut in the GPU case it's returning-value.