-
-
Notifications
You must be signed in to change notification settings - Fork 56.5k
Invalid read at Depthwise convolution with stride 2 and Winograd #23151
Copy link
Copy link
Closed
Description
System Information
Ubuntu 20.04
GCC 9.4.0
Detailed description
Problem is reproduced with stride=2 and enabled Winograd (by default?):
| Winograd | stride=1 | stride=2 |
|---|---|---|
| true | OK | Invalid read |
| false | OK | OK |
valgrind --leak-check=full output:
==19564== Memcheck, a memory error detector
==19564== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==19564== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==19564== Command: ./opencv_build/bin/opencv_test_dnn --gtest_filter=Layer_Test_DWconv_Stride2.Accuracy
==19564==
CTEST_FULL_OUTPUT
OpenCV version: 4.7.0-dev
OpenCV VCS version: 4.6.0-743-gc63d79c5b1-dirty
Build type: Debug
Compiler: /usr/bin/c++ (ver 9.4.0)
[ INFO:0@4.038] global registry_parallel.impl.hpp:96 ParallelBackendRegistry core(parallel): Enabled backends(3, sorted by priority): ONETBB(1000); TBB(990); OPENMP(980)
Parallel framework: pthreads (nthreads=8)
CPU features: SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?
Intel(R) IPP version: ippIP AVX2 (l9) 2020.0.0 Gold (-) Oct 19 2019
Intel(R) IPP features code: 0x8000
TEST: Skip tests with tags: 'mem_6gb', 'verylong', 'debug_verylong', 'dnn_skip_opencv_backend', 'dnn_skip_cpu', 'dnn_skip_ocl', 'dnn_skip_ocl_fp16', 'dnn_skip_onnx_conformance', 'dnn_skip_parser'
Note: Google Test filter = Layer_Test_DWconv_Stride2.Accuracy
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from Layer_Test_DWconv_Stride2
[ RUN ] Layer_Test_DWconv_Stride2.Accuracy
==19564== Invalid read of size 16
==19564== at 0x4C05D14: _mm_loadu_ps (xmmintrin.h:934)
==19564== by 0x4C05D14: cv::hal_baseline::v_load_deinterleave(float const*, cv::hal_baseline::v_float32x4&, cv::hal_baseline::v_float32x4&) (intrin_sse.hpp:2321)
==19564== by 0x4C06B93: cv::dnn::depthWiseBlockConv2D(float const*, int, int, int, int, int, int, int, int, float const*, float const*, float const*, int, int, float*, int, int, int, bool) (depthwise_convolution.cpp:138)
==19564== by 0x4C0860D: cv::dnn::runDepthwise(cv::_InputArray const&, cv::_OutputArray const&, cv::Ptr<cv::dnn::FastConv> const&, cv::dnn::dnn4_v20221220::ActivationLayer*, std::vector<float, std::allocator<float> > const&, bool)::{lambda(cv::Range const&)#1}::operator()(cv::Range const&) const (depthwise_convolution.cpp:427)
==19564== by 0x4C096E5: std::_Function_handler<void (cv::Range const&), cv::dnn::runDepthwise(cv::_InputArray const&, cv::_OutputArray const&, cv::Ptr<cv::dnn::FastConv> const&, cv::dnn::dnn4_v20221220::ActivationLayer*, std::vector<float, std::allocator<float> > const&, bool)::{lambda(cv::Range const&)#1}>::_M_invoke(std::_Any_data const&, cv::Range const&) (std_function.h:300)
==19564== by 0x4C09556: std::function<void (cv::Range const&)>::operator()(cv::Range const&) const (std_function.h:688)
==19564== by 0x4C05A10: cv::ParallelLoopBodyLambdaWrapper::operator()(cv::Range const&) const (utility.hpp:604)
==19564== by 0x7925F05: cv::(anonymous namespace)::ParallelLoopBodyWrapper::operator()(cv::Range const&) const (parallel.cpp:352)
==19564== by 0x79383DE: cv::ParallelJob::execute(bool) (parallel_impl.cpp:332)
==19564== by 0x793947B: cv::ThreadPool::run(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:647)
==19564== by 0x7939926: cv::parallel_for_pthreads(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:750)
==19564== by 0x792650B: cv::parallel_for_impl(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:609)
==19564== by 0x7926191: cv::parallel_for_(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:520)
==19564== Address 0x9fd227c is 18,492 bytes inside a block of size 18,504 alloc'd
==19564== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==19564== by 0x770C8C1: cv::fastMalloc(unsigned long) (alloc.cpp:160)
==19564== by 0x7872E2F: cv::StdMatAllocator::allocate(int, int const*, int, void*, unsigned long*, cv::AccessFlag, cv::UMatUsageFlags) const (matrix.cpp:147)
==19564== by 0x7875FF1: cv::Mat::create(int, int const*, int) (matrix.cpp:703)
==19564== by 0x78D17F9: cv::_OutputArray::create(int, int const*, int, int, bool, cv::_OutputArray::DepthMask) const (matrix_wrap.cpp:1308)
==19564== by 0x77B276B: cv::Mat::copyTo(cv::_OutputArray const&) const (copy.cpp:373)
==19564== by 0x4D1183C: cv::dnn::dnn4_v20221220::Net::Impl::setInput(cv::_InputArray const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, double, cv::Scalar_<double> const&) (net_impl.cpp:1438)
==19564== by 0x4CFD328: cv::dnn::dnn4_v20221220::Net::setInput(cv::_InputArray const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, double, cv::Scalar_<double> const&) (net.cpp:179)
==19564== by 0x305F5B: opencv_test::(anonymous namespace)::Layer_Test_DWconv_Stride2_Accuracy_Test::Body() (test_layers.cpp:1309)
==19564== by 0x3057C8: opencv_test::(anonymous namespace)::Layer_Test_DWconv_Stride2_Accuracy_Test::TestBody() (test_layers.cpp:1254)
==19564== by 0x4ADA8F: void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (ts_gtest.cpp:3919)
==19564== by 0x4A7E23: void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (ts_gtest.cpp:3955)
==19564==
==19564== Invalid read of size 16
==19564== at 0x4C05D14: _mm_loadu_ps (xmmintrin.h:934)
==19564== by 0x4C05D14: cv::hal_baseline::v_load_deinterleave(float const*, cv::hal_baseline::v_float32x4&, cv::hal_baseline::v_float32x4&) (intrin_sse.hpp:2321)
==19564== by 0x4C06BCF: cv::dnn::depthWiseBlockConv2D(float const*, int, int, int, int, int, int, int, int, float const*, float const*, float const*, int, int, float*, int, int, int, bool) (depthwise_convolution.cpp:139)
==19564== by 0x4C0860D: cv::dnn::runDepthwise(cv::_InputArray const&, cv::_OutputArray const&, cv::Ptr<cv::dnn::FastConv> const&, cv::dnn::dnn4_v20221220::ActivationLayer*, std::vector<float, std::allocator<float> > const&, bool)::{lambda(cv::Range const&)#1}::operator()(cv::Range const&) const (depthwise_convolution.cpp:427)
==19564== by 0x4C096E5: std::_Function_handler<void (cv::Range const&), cv::dnn::runDepthwise(cv::_InputArray const&, cv::_OutputArray const&, cv::Ptr<cv::dnn::FastConv> const&, cv::dnn::dnn4_v20221220::ActivationLayer*, std::vector<float, std::allocator<float> > const&, bool)::{lambda(cv::Range const&)#1}>::_M_invoke(std::_Any_data const&, cv::Range const&) (std_function.h:300)
==19564== by 0x4C09556: std::function<void (cv::Range const&)>::operator()(cv::Range const&) const (std_function.h:688)
==19564== by 0x4C05A10: cv::ParallelLoopBodyLambdaWrapper::operator()(cv::Range const&) const (utility.hpp:604)
==19564== by 0x7925F05: cv::(anonymous namespace)::ParallelLoopBodyWrapper::operator()(cv::Range const&) const (parallel.cpp:352)
==19564== by 0x79383DE: cv::ParallelJob::execute(bool) (parallel_impl.cpp:332)
==19564== by 0x793947B: cv::ThreadPool::run(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:647)
==19564== by 0x7939926: cv::parallel_for_pthreads(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:750)
==19564== by 0x792650B: cv::parallel_for_impl(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:609)
==19564== by 0x7926191: cv::parallel_for_(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:520)
==19564== Address 0x9fd2284 is 18,500 bytes inside a block of size 18,504 alloc'd
==19564== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==19564== by 0x770C8C1: cv::fastMalloc(unsigned long) (alloc.cpp:160)
==19564== by 0x7872E2F: cv::StdMatAllocator::allocate(int, int const*, int, void*, unsigned long*, cv::AccessFlag, cv::UMatUsageFlags) const (matrix.cpp:147)
==19564== by 0x7875FF1: cv::Mat::create(int, int const*, int) (matrix.cpp:703)
==19564== by 0x78D17F9: cv::_OutputArray::create(int, int const*, int, int, bool, cv::_OutputArray::DepthMask) const (matrix_wrap.cpp:1308)
==19564== by 0x77B276B: cv::Mat::copyTo(cv::_OutputArray const&) const (copy.cpp:373)
==19564== by 0x4D1183C: cv::dnn::dnn4_v20221220::Net::Impl::setInput(cv::_InputArray const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, double, cv::Scalar_<double> const&) (net_impl.cpp:1438)
==19564== by 0x4CFD328: cv::dnn::dnn4_v20221220::Net::setInput(cv::_InputArray const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, double, cv::Scalar_<double> const&) (net.cpp:179)
==19564== by 0x305F5B: opencv_test::(anonymous namespace)::Layer_Test_DWconv_Stride2_Accuracy_Test::Body() (test_layers.cpp:1309)
==19564== by 0x3057C8: opencv_test::(anonymous namespace)::Layer_Test_DWconv_Stride2_Accuracy_Test::TestBody() (test_layers.cpp:1254)
==19564== by 0x4ADA8F: void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (ts_gtest.cpp:3919)
==19564== by 0x4A7E23: void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (ts_gtest.cpp:3955)
==19564==
[ OK ] Layer_Test_DWconv_Stride2.Accuracy (662 ms)
[----------] 1 test from Layer_Test_DWconv_Stride2 (669 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (695 ms total)
[ PASSED ] 1 test.
==19564==
==19564== HEAP SUMMARY:
==19564== in use at exit: 6,848 bytes in 61 blocks
==19564== total heap usage: 133,380 allocs, 133,319 frees, 10,463,779 bytes allocated
==19564==
==19564== 2,128 bytes in 7 blocks are possibly lost in loss record 40 of 40
==19564== at 0x483DD99: calloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==19564== by 0x40149DA: allocate_dtv (dl-tls.c:286)
==19564== by 0x40149DA: _dl_allocate_tls (dl-tls.c:532)
==19564== by 0x5198322: allocate_stack (allocatestack.c:622)
==19564== by 0x5198322: pthread_create@@GLIBC_2.2.5 (pthread_create.c:660)
==19564== by 0x7937D05: cv::WorkerThread::WorkerThread(cv::ThreadPool&, unsigned int) (parallel_impl.cpp:241)
==19564== by 0x7938FF7: cv::ThreadPool::reconfigure_(unsigned int) (parallel_impl.cpp:537)
==19564== by 0x79391F3: cv::ThreadPool::run(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:575)
==19564== by 0x7939926: cv::parallel_for_pthreads(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel_impl.cpp:750)
==19564== by 0x792650B: cv::parallel_for_impl(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:609)
==19564== by 0x7926191: cv::parallel_for_(cv::Range const&, cv::ParallelLoopBody const&, double) (parallel.cpp:520)
==19564== by 0x4C09D40: cv::parallel_for_(cv::Range const&, std::function<void (cv::Range const&)>, double) (utility.hpp:612)
==19564== by 0x4C0DDD1: cv::dnn::initFastConv(cv::_InputArray const&, float*, int, int, int, std::vector<unsigned long, std::allocator<unsigned long> > const&, std::vector<unsigned long, std::allocator<unsigned long> > const&, std::vector<unsigned long, std::allocator<unsigned long> > const&, std::vector<unsigned long, std::allocator<unsigned long> > const&, std::vector<unsigned long, std::allocator<unsigned long> > const&, int, bool) (fast_convolution.cpp:134)
==19564== by 0x4BA86F0: cv::dnn::ConvolutionLayerImpl::forward(cv::_InputArray const&, cv::_OutputArray const&, cv::_OutputArray const&) (convolution_layer.cpp:1387)
==19564==
==19564== LEAK SUMMARY:
==19564== definitely lost: 0 bytes in 0 blocks
==19564== indirectly lost: 0 bytes in 0 blocks
==19564== possibly lost: 2,128 bytes in 7 blocks
==19564== still reachable: 4,720 bytes in 54 blocks
==19564== suppressed: 0 bytes in 0 blocks
==19564== Reachable blocks (those to which a pointer was found) are not shown.
==19564== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==19564==
==19564== For lists of detected and suppressed errors, rerun with: -s
==19564== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
Steps to reproduce
const int channels = 128;
Net net;
LayerParams lp;
lp.name = "dwconv";
lp.type = "Convolution";
lp.set("kernel_size", 3);
lp.set("num_output", channels);
lp.set("pad", 1);
lp.set("group", channels);
lp.set("stride", 2);
lp.set("bias_term", "true");
std::vector<int> weightsShape(4);
weightsShape[0] = channels; // #outChannels
weightsShape[1] = 1; // #inpChannels / group
weightsShape[2] = 3; // height
weightsShape[3] = 3; // width
Mat weights(weightsShape, CV_32F, Scalar(1));
//assign weights
for (int i = 0; i < weightsShape[0]; ++i)
{
for (int j = 0; j < weightsShape[1]; ++j)
{
for (int k = 0; k < weightsShape[2]; ++k)
{
for (int l = 0; l < weightsShape[3]; ++l)
{
weights.ptr<float>(i, j, k)[l]=-1*(i+1);
}
}
}
}
lp.blobs.push_back(weights);
//assign bias
Mat bias(1, channels, CV_32F, Scalar(1));
for (int i = 0; i < 1; ++i)
{
for (int j = 0; j < channels; ++j)
{
bias.ptr<float>(i)[j]=j+1;
}
}
lp.blobs.push_back(bias);
net.addLayerToPrev(lp.name, lp.type, lp);
int shape[] = {1, channels, 6, 6};
Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setInput(in_blob);
Mat out = net.forward();Issue submission checklist
- I report the issue, it's not a question
- I checked the problem with documentation, FAQ, open issues, forum.opencv.org, Stack Overflow, etc and have not found any solution
- I updated to the latest OpenCV version and the issue is still there
- There is reproducer code and related data files (videos, images, onnx, etc)
Reactions are currently unavailable