Skip to content

[libtorch]incorrect sigmoid result on arm chip(rk3326) #65673

@liuyx-baller

Description

@liuyx-baller

🐛 Bug

To Reproduce

Steps to reproduce the behavior:

  • code:
        torch::Tensor x = torch::randn({ 2,3,4 }, g_cpu_f32_opt);
        std::cout << x << std::endl;
        std::cout << torch::sigmoid(x) << std::endl;
        std::cout << x.sigmoid() << std::endl;
  • result(error result in bold)

(1,.,.) =
0.5277 -0.2145 -0.7829 1.0721
-0.4872 1.9324 1.1444 -0.6179
-1.7279 0.5759 1.0575 0.5680

(2,.,.) =
0.1572 -0.0927 -0.0646 -0.0828
-0.0152 -0.1934 -1.1129 0.4097
-1.9093 1.7625 0.9707 -0.3617
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153

(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153

(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]

Expected behavior

Environment

Please copy and paste the output from our
environment collection script
(or fill out the checklist below manually).

You can get the script and run it with:

wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
# For security purposes, please check the contents of collect_env.py before running it.
python collect_env.py
  • PyTorch Version : 1.8.1
  • OS : Linux
  • How you installed PyTorch: source
  • Build command you used : ./scripts/build_mobile.sh
  • Python version: 3.7.10
  • cross-compile toolchain:gcc-arm-8.2-2018.08-x86_64-aarch64-linux-gnu
  • chip: rk3326
  • scripts/build_mobile.sh content
#!/bin/bash
##############################################################################
# Example command to build the mobile target.
##############################################################################
#
# This script shows how one can build a libtorch library optimized for mobile
# devices using host toolchain.

set -e

export BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN=1
CAFFE2_ROOT="$( cd "$(dirname "$0")"/.. ; pwd -P)"

echo "Bash: $(/bin/bash --version | head -1)"
echo "Caffe2 path: $CAFFE2_ROOT"

CMAKE_ARGS=()
CMAKE_ARGS+=("-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')")
CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=$(python -c 'import sys; print(sys.executable)')")
CMAKE_ARGS+=("-DBUILD_CUSTOM_PROTOBUF=OFF")
CMAKE_ARGS+=("-DBUILD_SHARED_LIBS=ON")
# custom build with selected ops
if [ -n "${SELECTED_OP_LIST}" ]; then
  SELECTED_OP_LIST="$(cd $(dirname $SELECTED_OP_LIST); pwd -P)/$(basename $SELECTED_OP_LIST)"
  echo "Choose SELECTED_OP_LIST file: $SELECTED_OP_LIST"
  if [ ! -r ${SELECTED_OP_LIST} ]; then
    echo "Error: SELECTED_OP_LIST file ${SELECTED_OP_LIST} not found."
    exit 1
  fi
  CMAKE_ARGS+=("-DSELECTED_OP_LIST=${SELECTED_OP_LIST}")
fi

# If Ninja is installed, prefer it to Make
if [ -x "$(command -v ninja)" ]; then
  CMAKE_ARGS+=("-GNinja")
fi

# Disable unused dependencies
CMAKE_ARGS+=("-DUSE_ROCM=OFF")
CMAKE_ARGS+=("-DUSE_CUDA=OFF")
CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
CMAKE_ARGS+=("-DUSE_LMDB=OFF")
CMAKE_ARGS+=("-DUSE_LEVELDB=OFF")
CMAKE_ARGS+=("-DUSE_MPI=OFF")
CMAKE_ARGS+=("-DUSE_OPENMP=OFF")
CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=/opt/pytorch/rk3326.cmake")
CMAKE_ARGS+=("-DUSE_VULKAN=ON")
CMAKE_ARGS+=("-DUSE_MKLDNN=OFF")
CMAKE_ARGS+=("-DUSE_DISTRIBUTED=OFF")
CMAKE_ARGS+=("-DBUILD_PYTHON=OFF")
CMAKE_ARGS+=("-DUSE_VALGRIND=OFF")
CMAKE_ARGS+=("-DUSE_STATIC_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_SYSTEM_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_RCCL=ON")
CMAKE_ARGS+=("-DCAFFE2_USE_EXCEPTION_PTR=OFF")


CMAKE_ARGS+=("-DCMAKE_VERBOSE_MAKEFILE=1")

# Use-specified CMake arguments go last to allow overridding defaults
CMAKE_ARGS+=("$@")

# Now, actually build the Android target.
BUILD_ROOT=${BUILD_ROOT:-"$CAFFE2_ROOT/build_mobile"}
INSTALL_PREFIX=${BUILD_ROOT}/install
mkdir -p $BUILD_ROOT
cd $BUILD_ROOT
cmake "$CAFFE2_ROOT" \
    -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
    -DCMAKE_BUILD_TYPE=Release \
    "${CMAKE_ARGS[@]}"

# Cross-platform parallel build
if [ -z "$MAX_JOBS" ]; then
  if [ "$(uname)" == 'Darwin' ]; then
    MAX_JOBS=$(sysctl -n hw.ncpu)
  else
    MAX_JOBS=$(nproc)
  fi
fi

echo "Will install headers and libs to $INSTALL_PREFIX for further project usage."
cmake --build . --target install -- "-j${MAX_JOBS}"
echo "Installation completed, now you can copy the headers/libs from $INSTALL_PREFIX to your project directory."

  • rk3326.cmake content
SET(CMAKE_CROSSCOMPILING TRUE)

# Name of the OS CMake is building for. "uname -s"
SET(CMAKE_SYSTEM_NAME Linux)
# The name of the CPU CMake is building for. "uname -p"
SET(CMAKE_SYSTEM_PROCESSOR aarch64)

SET(TOOLCHAIN_DIR "/home/compiler/gcc-arm-8.2-2018.08-x86_64-aarch64-linux-gnu")
SET(CMAKE_C_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-linux-gnu-gcc)
SET(CMAKE_CXX_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-linux-gnu-c++)

# This variable controls whether the CMAKE_FIND_ROOT_PATH and CMAKE_SYSROOT are used by find_program()
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)

set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mcpu=cortex-a35 --std=c++11")

add_definitions(-D__ARM_NEON__)
add_definitions(-D__NEON__)

Additional context

cc @malfet

Metadata

Metadata

Assignees

No one assigned

    Labels

    module: armRelated to ARM architectures builds of PyTorch. Includes Apple M1triagedThis issue has been looked at a team member, and triaged and prioritized into an appropriate module

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions