Skip to content

Cannot load the onnx model converted from pytorch #16971

@stereomatchingkiss

Description

@stereomatchingkiss
System information (version)
  • OpenCV => OpenCV => 4.3.0(e6f32c6)
  • Operating System / Platform => Windows 64 Bit
  • Compiler => Visual Studio 2017 64 bits
Detailed description

The decoder model converted from this project do not work.

Steps to reproduce
  1. install pytorch 1.4.0(cpu version)
  2. clone the project
  3. Navigate to the folder of the project, create a file called to_onnx.py
from __future__ import absolute_import, division, print_function

import os
import sys
import glob
import numpy as np
import PIL.Image as pil
import matplotlib as mpl
import matplotlib.cm as cm

import torch
from torchvision import transforms, datasets

import networks
from layers import disp_to_depth
from utils import download_model_if_doesnt_exist

device = torch.device("cpu")

model_name = "mono+stereo_640x192"
model_path = os.path.join("models", "mono+stereo_640x192")
print("-> Loading model from ", model_path)
encoder_path = os.path.join(model_path, "encoder.pth")
depth_decoder_path = os.path.join(model_path, "depth.pth")

# LOADING PRETRAINED MODEL
print("   Loading pretrained encoder")
encoder = networks.ResnetEncoder(18, False)
loaded_dict_enc = torch.load(encoder_path, map_location=device)

# extract the height and width of image that this model was trained with
feed_height = loaded_dict_enc['height']
feed_width = loaded_dict_enc['width']

filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)
encoder.to(device)
encoder.eval()

import onnx
import onnxruntime

def test_model_accuracy(export_model_name, raw_output, input):    
    ort_session = onnxruntime.InferenceSession(export_model_name)

    def to_numpy(tensor):
        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input)}
    ort_outs = ort_session.run(None, ort_inputs)	

    # compare ONNX Runtime and PyTorch results
    np.testing.assert_allclose(to_numpy(raw_output), ort_outs[0], rtol=1e-03, atol=1e-05)

    print("Exported model has been tested with ONNXRuntime, and the result looks good!")		

def export_model(model, input, export_model_name):
    torch.onnx.export(model, input, export_model_name, verbose=False, export_params=True, opset_version=11)	
    onnx_model = onnx.load(export_model_name)    
    onnx.checker.check_model(onnx_model)
    graph_output = onnx.helper.printable_graph(onnx_model.graph)
    with open("graph_output.txt", mode="w") as fout:
        fout.write(graph_output)

export_model_name = "encoder_" + model_name + '.onnx'
input_image = pil.open("assets/runner.jpg").convert('RGB')
input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
input_image = transforms.ToTensor()(input_image).unsqueeze(0)

features = encoder(input_image)
export_model(encoder, input_image, export_model_name)
test_model_accuracy(export_model_name, features, input_image)

#print("   Loading pretrained decoder")
depth_decoder = networks.DepthDecoder(
    num_ch_enc = encoder.num_ch_enc, scales=range(4))

loaded_dict = torch.load(depth_decoder_path, map_location=device)
depth_decoder.load_state_dict(loaded_dict)

depth_decoder.to(device)
depth_decoder.eval()

decoder_output = depth_decoder(features)

export_model_name = "decoder_" + model_name + '.onnx'

export_model(depth_decoder, features, export_model_name)
test_model_accuracy(export_model_name, decoder_output, features)
  1. Change the class ResnetEncoder in networks/resnet_encoder.py to
class ResnetEncoder(nn.Module):
    """Pytorch module for a resnet encoder
    """
    def __init__(self, num_layers, pretrained, num_input_images=1):
        super(ResnetEncoder, self).__init__()

        self.num_ch_enc = np.array([64, 64, 128, 256, 512])

        resnets = models.resnet18
        self.encoder = resnets(pretrained)	        

    def forward(self, input_image):         
        x = (input_image - 0.45) / 0.225
        x = self.encoder.conv1(x)
        x = self.encoder.bn1(x)		        
        output1 = self.encoder.relu(x)
        output2 = self.encoder.layer1(self.encoder.maxpool(output1))
        output3 = self.encoder.layer2(output2)
        output4 = self.encoder.layer3(output3)
        output5 = self.encoder.layer4(output4)        	
		
        return torch.cat((output1.flatten(), output2.flatten(), output3.flatten(), output4.flatten(), output5.flatten()))        
  1. Change the class DepthDecoder in networks/depth_decoder.py to
class DepthDecoder(nn.Module):
    def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
        super(DepthDecoder, self).__init__()

        self.num_output_channels = num_output_channels
        self.use_skips = use_skips
        self.upsample_mode = 'nearest'
        self.scales = scales

        self.num_ch_enc = num_ch_enc
        self.num_ch_dec = np.array([16, 32, 64, 128, 256])

        # decoder
        self.convs = OrderedDict()        
        for i in range(4, -1, -1):
            # upconv_0
            num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
            num_ch_out = self.num_ch_dec[i]
            self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)

            # upconv_1
            num_ch_in = self.num_ch_dec[i]
            if self.use_skips and i > 0:
                num_ch_in += self.num_ch_enc[i - 1]
            num_ch_out = self.num_ch_dec[i]
            self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)

        for s in self.scales:
            self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)

        self.decoder = nn.ModuleList(list(self.convs.values()))
        self.sigmoid = nn.Sigmoid()	        

    def forward(self, input_features):      
        #simplify the forward functions a lot, only post the codes which cause the issue  
        #the output processed by self.convs[("upconv", 4, 0)] will caused the error message
        return self.convs[("upconv", 4, 0)](input_features[2826240:2887680].reshape(1, 512, 6, 20))
  1. Load the model by opencv
std::string const model_folder("C:/Users/yyyy/programs/Qt/app_to_simplify_ml/"
                                   "cv_projects/clients/mobile_depth_estimation/monodepth2");
cv::dnn::Net net = cv::dnn::readNet(model_folder + "/decoder_mono+stereo_640x192.onnx");

Both of the decoder and encoder models passed the test.

To save your troubles, you can have the graph,model and codes(to_onnx.py, resnet_encoder.py, depth_decoder.py) from google drive.

Issue submission checklist

Cannot load the encoder model after convert, got error messages

OpenCV: terminate handler is called! The last OpenCV error is:OpenCV(4.3.0-pre) Error: Assertion failed (shapeIt != outShapes.end()) in cv::dnn::dnn4_v20200310::ONNXImporter::populateNet, file C:\Users\yyyy\programs\Qt\3rdLibs\opencv\dev\opencv\modules\dnn\src\onnx\onnx_importer.cpp, line 1116

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions