Cannot load the onnx model converted from pytorch

##### System information (version)

- OpenCV => OpenCV => 4.3.0(e6f32c6a69043456a806a4e802ee3ce7b7059c93)
- Operating System / Platform => Windows 64 Bit
- Compiler => Visual Studio 2017 64 bits

##### Detailed description

The decoder model converted from [this project](https://github.com/nianticlabs/monodepth2) do not work.

##### Steps to reproduce

1. install pytorch 1.4.0(cpu version)
2. clone the project
3. Navigate to the folder of the project, create a file called to_onnx.py

```
from __future__ import absolute_import, division, print_function

import os
import sys
import glob
import numpy as np
import PIL.Image as pil
import matplotlib as mpl
import matplotlib.cm as cm

import torch
from torchvision import transforms, datasets

import networks
from layers import disp_to_depth
from utils import download_model_if_doesnt_exist

device = torch.device("cpu")

model_name = "mono+stereo_640x192"
model_path = os.path.join("models", "mono+stereo_640x192")
print("-> Loading model from ", model_path)
encoder_path = os.path.join(model_path, "encoder.pth")
depth_decoder_path = os.path.join(model_path, "depth.pth")

# LOADING PRETRAINED MODEL
print("   Loading pretrained encoder")
encoder = networks.ResnetEncoder(18, False)
loaded_dict_enc = torch.load(encoder_path, map_location=device)

# extract the height and width of image that this model was trained with
feed_height = loaded_dict_enc['height']
feed_width = loaded_dict_enc['width']

filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)
encoder.to(device)
encoder.eval()

import onnx
import onnxruntime

def test_model_accuracy(export_model_name, raw_output, input):    
    ort_session = onnxruntime.InferenceSession(export_model_name)

    def to_numpy(tensor):
        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input)}
    ort_outs = ort_session.run(None, ort_inputs)	

    # compare ONNX Runtime and PyTorch results
    np.testing.assert_allclose(to_numpy(raw_output), ort_outs[0], rtol=1e-03, atol=1e-05)

    print("Exported model has been tested with ONNXRuntime, and the result looks good!")		

def export_model(model, input, export_model_name):
    torch.onnx.export(model, input, export_model_name, verbose=False, export_params=True, opset_version=11)	
    onnx_model = onnx.load(export_model_name)    
    onnx.checker.check_model(onnx_model)
    graph_output = onnx.helper.printable_graph(onnx_model.graph)
    with open("graph_output.txt", mode="w") as fout:
        fout.write(graph_output)

export_model_name = "encoder_" + model_name + '.onnx'
input_image = pil.open("assets/runner.jpg").convert('RGB')
input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
input_image = transforms.ToTensor()(input_image).unsqueeze(0)

features = encoder(input_image)
export_model(encoder, input_image, export_model_name)
test_model_accuracy(export_model_name, features, input_image)

#print("   Loading pretrained decoder")
depth_decoder = networks.DepthDecoder(
    num_ch_enc = encoder.num_ch_enc, scales=range(4))

loaded_dict = torch.load(depth_decoder_path, map_location=device)
depth_decoder.load_state_dict(loaded_dict)

depth_decoder.to(device)
depth_decoder.eval()

decoder_output = depth_decoder(features)

export_model_name = "decoder_" + model_name + '.onnx'

export_model(depth_decoder, features, export_model_name)
test_model_accuracy(export_model_name, decoder_output, features)
```
4. Change the class ResnetEncoder in networks/resnet_encoder.py to

```
class ResnetEncoder(nn.Module):
    """Pytorch module for a resnet encoder
    """
    def __init__(self, num_layers, pretrained, num_input_images=1):
        super(ResnetEncoder, self).__init__()

        self.num_ch_enc = np.array([64, 64, 128, 256, 512])

        resnets = models.resnet18
        self.encoder = resnets(pretrained)	        

    def forward(self, input_image):         
        x = (input_image - 0.45) / 0.225
        x = self.encoder.conv1(x)
        x = self.encoder.bn1(x)		        
        output1 = self.encoder.relu(x)
        output2 = self.encoder.layer1(self.encoder.maxpool(output1))
        output3 = self.encoder.layer2(output2)
        output4 = self.encoder.layer3(output3)
        output5 = self.encoder.layer4(output4)        	
		
        return torch.cat((output1.flatten(), output2.flatten(), output3.flatten(), output4.flatten(), output5.flatten()))        
```

5. Change the class DepthDecoder in networks/depth_decoder.py to

```
class DepthDecoder(nn.Module):
    def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
        super(DepthDecoder, self).__init__()

        self.num_output_channels = num_output_channels
        self.use_skips = use_skips
        self.upsample_mode = 'nearest'
        self.scales = scales

        self.num_ch_enc = num_ch_enc
        self.num_ch_dec = np.array([16, 32, 64, 128, 256])

        # decoder
        self.convs = OrderedDict()        
        for i in range(4, -1, -1):
            # upconv_0
            num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
            num_ch_out = self.num_ch_dec[i]
            self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)

            # upconv_1
            num_ch_in = self.num_ch_dec[i]
            if self.use_skips and i > 0:
                num_ch_in += self.num_ch_enc[i - 1]
            num_ch_out = self.num_ch_dec[i]
            self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)

        for s in self.scales:
            self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)

        self.decoder = nn.ModuleList(list(self.convs.values()))
        self.sigmoid = nn.Sigmoid()	        

    def forward(self, input_features):      
        #simplify the forward functions a lot, only post the codes which cause the issue  
        #the output processed by self.convs[("upconv", 4, 0)] will caused the error message
        return self.convs[("upconv", 4, 0)](input_features[2826240:2887680].reshape(1, 512, 6, 20))
```
6. Load the model by opencv

```
std::string const model_folder("C:/Users/yyyy/programs/Qt/app_to_simplify_ml/"
                                   "cv_projects/clients/mobile_depth_estimation/monodepth2");
cv::dnn::Net net = cv::dnn::readNet(model_folder + "/decoder_mono+stereo_640x192.onnx");
```

Both of the decoder and encoder models passed the test.

To save your troubles, you can have the graph,model and codes(to_onnx.py, resnet_encoder.py, depth_decoder.py) from [google drive](https://drive.google.com/open?id=1XNgdQhdQ8WW2R_ApyuTtJ8ituPhsGED2).

##### Issue submission checklist

Cannot load the encoder model after convert,  got error messages

**OpenCV: terminate handler is called! The last OpenCV error is:OpenCV(4.3.0-pre) Error: Assertion failed (shapeIt != outShapes.end()) in cv::dnn::dnn4_v20200310::ONNXImporter::populateNet, file C:\Users\yyyy\programs\Qt\3rdLibs\opencv\dev\opencv\modules\dnn\src\onnx\onnx_importer.cpp, line 1116**

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Cannot load the onnx model converted from pytorch #16971

System information (version)

Detailed description

Steps to reproduce

Issue submission checklist

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Cannot load the onnx model converted from pytorch #16971

Description

System information (version)

Detailed description

Steps to reproduce

Issue submission checklist

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions