🐛 Bug
To Reproduce
Steps to reproduce the behavior:
torch::Tensor x = torch::randn({ 2,3,4 }, g_cpu_f32_opt);
std::cout << x << std::endl;
std::cout << torch::sigmoid(x) << std::endl;
std::cout << x.sigmoid() << std::endl;
- result(error result in bold)
(1,.,.) =
0.5277 -0.2145 -0.7829 1.0721
-0.4872 1.9324 1.1444 -0.6179
-1.7279 0.5759 1.0575 0.5680
(2,.,.) =
0.1572 -0.0927 -0.0646 -0.0828
-0.0152 -0.1934 -1.1129 0.4097
-1.9093 1.7625 0.9707 -0.3617
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153
(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153
(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]
Expected behavior
Environment
Please copy and paste the output from our
environment collection script
(or fill out the checklist below manually).
You can get the script and run it with:
wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
# For security purposes, please check the contents of collect_env.py before running it.
python collect_env.py
- PyTorch Version : 1.8.1
- OS : Linux
- How you installed PyTorch: source
- Build command you used : ./scripts/build_mobile.sh
- Python version: 3.7.10
- cross-compile toolchain:gcc-arm-8.2-2018.08-x86_64-aarch64-linux-gnu
- chip: rk3326
- scripts/build_mobile.sh content
#!/bin/bash
##############################################################################
# Example command to build the mobile target.
##############################################################################
#
# This script shows how one can build a libtorch library optimized for mobile
# devices using host toolchain.
set -e
export BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN=1
CAFFE2_ROOT="$( cd "$(dirname "$0")"/.. ; pwd -P)"
echo "Bash: $(/bin/bash --version | head -1)"
echo "Caffe2 path: $CAFFE2_ROOT"
CMAKE_ARGS=()
CMAKE_ARGS+=("-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')")
CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=$(python -c 'import sys; print(sys.executable)')")
CMAKE_ARGS+=("-DBUILD_CUSTOM_PROTOBUF=OFF")
CMAKE_ARGS+=("-DBUILD_SHARED_LIBS=ON")
# custom build with selected ops
if [ -n "${SELECTED_OP_LIST}" ]; then
SELECTED_OP_LIST="$(cd $(dirname $SELECTED_OP_LIST); pwd -P)/$(basename $SELECTED_OP_LIST)"
echo "Choose SELECTED_OP_LIST file: $SELECTED_OP_LIST"
if [ ! -r ${SELECTED_OP_LIST} ]; then
echo "Error: SELECTED_OP_LIST file ${SELECTED_OP_LIST} not found."
exit 1
fi
CMAKE_ARGS+=("-DSELECTED_OP_LIST=${SELECTED_OP_LIST}")
fi
# If Ninja is installed, prefer it to Make
if [ -x "$(command -v ninja)" ]; then
CMAKE_ARGS+=("-GNinja")
fi
# Disable unused dependencies
CMAKE_ARGS+=("-DUSE_ROCM=OFF")
CMAKE_ARGS+=("-DUSE_CUDA=OFF")
CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
CMAKE_ARGS+=("-DUSE_LMDB=OFF")
CMAKE_ARGS+=("-DUSE_LEVELDB=OFF")
CMAKE_ARGS+=("-DUSE_MPI=OFF")
CMAKE_ARGS+=("-DUSE_OPENMP=OFF")
CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=/opt/pytorch/rk3326.cmake")
CMAKE_ARGS+=("-DUSE_VULKAN=ON")
CMAKE_ARGS+=("-DUSE_MKLDNN=OFF")
CMAKE_ARGS+=("-DUSE_DISTRIBUTED=OFF")
CMAKE_ARGS+=("-DBUILD_PYTHON=OFF")
CMAKE_ARGS+=("-DUSE_VALGRIND=OFF")
CMAKE_ARGS+=("-DUSE_STATIC_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_SYSTEM_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_NCCL=OFF")
CMAKE_ARGS+=("-DUSE_RCCL=ON")
CMAKE_ARGS+=("-DCAFFE2_USE_EXCEPTION_PTR=OFF")
CMAKE_ARGS+=("-DCMAKE_VERBOSE_MAKEFILE=1")
# Use-specified CMake arguments go last to allow overridding defaults
CMAKE_ARGS+=("$@")
# Now, actually build the Android target.
BUILD_ROOT=${BUILD_ROOT:-"$CAFFE2_ROOT/build_mobile"}
INSTALL_PREFIX=${BUILD_ROOT}/install
mkdir -p $BUILD_ROOT
cd $BUILD_ROOT
cmake "$CAFFE2_ROOT" \
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-DCMAKE_BUILD_TYPE=Release \
"${CMAKE_ARGS[@]}"
# Cross-platform parallel build
if [ -z "$MAX_JOBS" ]; then
if [ "$(uname)" == 'Darwin' ]; then
MAX_JOBS=$(sysctl -n hw.ncpu)
else
MAX_JOBS=$(nproc)
fi
fi
echo "Will install headers and libs to $INSTALL_PREFIX for further project usage."
cmake --build . --target install -- "-j${MAX_JOBS}"
echo "Installation completed, now you can copy the headers/libs from $INSTALL_PREFIX to your project directory."
SET(CMAKE_CROSSCOMPILING TRUE)
# Name of the OS CMake is building for. "uname -s"
SET(CMAKE_SYSTEM_NAME Linux)
# The name of the CPU CMake is building for. "uname -p"
SET(CMAKE_SYSTEM_PROCESSOR aarch64)
SET(TOOLCHAIN_DIR "/home/compiler/gcc-arm-8.2-2018.08-x86_64-aarch64-linux-gnu")
SET(CMAKE_C_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-linux-gnu-gcc)
SET(CMAKE_CXX_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-linux-gnu-c++)
# This variable controls whether the CMAKE_FIND_ROOT_PATH and CMAKE_SYSROOT are used by find_program()
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mcpu=cortex-a35 --std=c++11")
add_definitions(-D__ARM_NEON__)
add_definitions(-D__NEON__)
Additional context
cc @malfet
🐛 Bug
To Reproduce
Steps to reproduce the behavior:
(1,.,.) =
0.5277 -0.2145 -0.7829 1.0721
-0.4872 1.9324 1.1444 -0.6179
-1.7279 0.5759 1.0575 0.5680
(2,.,.) =
0.1572 -0.0927 -0.0646 -0.0828
-0.0152 -0.1934 -1.1129 0.4097
-1.9093 1.7625 0.9707 -0.3617
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153
(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]
(1,.,.) =
0.6289 0.4466 0.3137 0.7450
0.3806 0.8735 0.7585 0.3503
0.1509 0.2246 0.1008 0.4153
(2,.,.) =
0.1641 0.4639 0.4211 0.1353
0.4962 0.4518 0.2473 0.6010
0.1291 0.8535 0.7253 0.4105
[ CPUFloatType{2,3,4} ]
Expected behavior
Environment
Please copy and paste the output from our
environment collection script
(or fill out the checklist below manually).
You can get the script and run it with:
Additional context
cc @malfet