99#include < qnnpack/operator.h>
1010
1111namespace qnnpack {
12+
13+ namespace {
14+ static inline size_t compute_output_dimension (
15+ size_t input_dim, // Input dimension
16+ size_t pad_dim, // Input padding
17+ size_t adjustment_dim, // Adjustment to the output dimension
18+ size_t kernel_dim, // Kernel dimension
19+ size_t dilation_dim, // Dilation dimension
20+ size_t stride_dim, // Stride or subsampling dimension
21+ bool transpose) { // Transposed convolution
22+ kernel_dim = (kernel_dim - 1 ) * dilation_dim + 1 ; // Effective kernel dim
23+ if (transpose) {
24+ return stride_dim * (input_dim - 1 ) + adjustment_dim + kernel_dim - pad_dim;
25+ } else {
26+ return (input_dim + pad_dim - kernel_dim) / stride_dim + 1 ;
27+ }
28+ }
29+ } // namespace
30+
1231struct conv_param_t {
13- const std::array<uint32_t , 2 > kernel_dims; // kernel width, kernel height
14- const std::array<uint32_t , 2 > subsampling_dims ; // subsampling width, height
32+ const std::array<uint32_t , 2 > kernel_dims; // kernel width, height
33+ const std::array<uint32_t , 2 > stride_dims ; // subsampling width, height
1534 const std::array<uint32_t , 2 > dilation; // dilation width, height
16- const std::array<uint32_t , 4 > pad; // input padding top, left, bottom, right
35+ const std::array<uint32_t , 4 > padding; // input padding top, left, bottom, right
36+ const std::array<uint32_t , 2 > adjustment_dims; // output adjustment
37+
1738 const uint32_t groups;
1839 const size_t input_channels;
1940 const size_t output_channels;
2041 const uint8_t kernel_zero_point;
2142 const float kernel_scale;
2243 const uint8_t output_min;
2344 const uint8_t output_max;
45+ const bool transpose;
2446
2547 // The following are derived parameters
2648 enum pytorch_qnnp_ukernel_type ukernel_type; // kernel type based on input params
2749 size_t group_input_channels;
2850 size_t group_output_channels;
2951
3052 /* *
31- * @brief Constructor for initializing the convolution parameters.
53+ * @brief Constructor for initializing the convolution/deconvolution
54+ * parameters.
3255 */
33- conv_param_t (
34- const std::array<uint32_t , 2 > kernel,
35- const std::array<uint32_t , 2 > subsampling,
36- const std::array<uint32_t , 2 > dil,
37- const std::array<uint32_t , 4 > pd,
38- const uint32_t grp,
39- const size_t in_ch,
40- const size_t out_ch,
41- const uint8_t kernel_zp,
42- const float kernel_s,
43- const uint8_t out_min,
44- const uint8_t out_max)
45- : kernel_dims(kernel),
46- subsampling_dims (subsampling),
47- dilation(dil),
48- pad(pd),
49- groups(grp),
50- input_channels(in_ch),
51- output_channels(out_ch),
52- kernel_zero_point(kernel_zp),
53- kernel_scale(kernel_s),
54- output_min(out_min),
55- output_max(out_max) {
56+ conv_param_t (const std::array<uint32_t , 2 > kernel_,
57+ const std::array<uint32_t , 2 > stride_,
58+ const std::array<uint32_t , 2 > dilation_,
59+ const std::array<uint32_t , 4 > padding_,
60+ const std::array<uint32_t , 2 > adjustment_,
61+ const uint32_t groups_,
62+ const size_t input_channels_,
63+ const size_t output_channels_,
64+ const uint8_t kernel_zp_,
65+ const float kernel_stride_,
66+ const uint8_t out_min_,
67+ const uint8_t out_max_,
68+ const bool transpose_)
69+ : kernel_dims(kernel_),
70+ stride_dims (stride_),
71+ dilation(dilation_),
72+ padding(padding_),
73+ adjustment_dims(adjustment_),
74+ groups(groups_),
75+ input_channels(input_channels_),
76+ output_channels(output_channels_),
77+ kernel_zero_point(kernel_zp_),
78+ kernel_scale(kernel_stride_),
79+ output_min(out_min_),
80+ output_max(out_max_),
81+ transpose(transpose_) {
5682 const uint32_t kernel_width = kernel_dims[0 ];
5783 const uint32_t kernel_height = kernel_dims[1 ];
5884
59- const uint32_t input_padding_top = pad[0 ];
60- const uint32_t input_padding_left = pad[1 ];
61- const uint32_t input_padding_bottom = pad[2 ];
62- const uint32_t input_padding_right = pad[3 ];
85+ const uint32_t input_padding_top = padding[0 ];
86+ const uint32_t input_padding_left = padding[1 ];
87+ const uint32_t input_padding_bottom = padding[2 ];
88+ const uint32_t input_padding_right = padding[3 ];
89+
90+ const char * _name;
91+ if (transpose) {
92+ _name = " deconvolution\0 " ;
93+ } else {
94+ _name = " convolution\0 " ;
95+ }
96+
97+ if (groups == 0 ) {
98+ pytorch_qnnp_log_error (
99+ " failed to create %s with groups equal to zero." , _name);
100+ assert (" Failed to initialize QNNPACK conv_param_t struct." );
101+ }
102+
103+ if (input_channels % groups != 0 || output_channels % groups != 0 ) {
104+ pytorch_qnnp_log_error (
105+ " failed to create %s: input and output channels must be divisible by"
106+ " groups." , _name);
107+ assert (" Failed to initialize QNNPACK conv_param_t struct." );
108+ }
63109
64110 group_input_channels = input_channels / groups;
65111 group_output_channels = output_channels / groups;
66112
67113 if (kernel_width == 0 || kernel_height == 0 ) {
68114 pytorch_qnnp_log_error (
69- " failed to create convolution with %" PRIu32 " x%" PRIu32
115+ " failed to create %s with %" PRIu32 " x%" PRIu32
70116 " kernel: kernel dimensions must be non-zero" ,
117+ _name,
71118 kernel_width,
72119 kernel_height);
73120 assert (" Failed to initialize QNNPACK conv_param_t struct." );
74121 }
75122
76- if (subsampling_dims [0 ] == 0 || subsampling_dims [1 ] == 0 ) {
123+ if (stride_dims [0 ] == 0 || stride_dims [1 ] == 0 ) {
77124 pytorch_qnnp_log_error (
78- " failed to create convolution with %" PRIu32 " x%" PRIu32
125+ " failed to create %s with %" PRIu32 " x%" PRIu32
79126 " subsampling: "
80127 " subsampling dimensions must be non-zero" ,
81- subsampling_dims[0 ],
82- subsampling_dims[1 ]);
128+ _name,
129+ stride_dims[0 ],
130+ stride_dims[1 ]);
83131 assert (" Failed to initialize QNNPACK conv_param_t struct." );
84132 }
85133
86134 if (dilation[0 ] == 0 || dilation[1 ] == 0 ) {
87135 pytorch_qnnp_log_error (
88- " failed to create convolution with %" PRIu32 " x%" PRIu32
136+ " failed to create %s with %" PRIu32 " x%" PRIu32
89137 " dilation: "
90138 " dilation dimensions must be non-zero" ,
139+ _name,
91140 dilation[0 ],
92141 dilation[1 ]);
93142 assert (" Failed to initialize QNNPACK conv_param_t struct." );
94143 }
95144
96145 if (kernel_scale <= 0 .0f || !std::isnormal (kernel_scale)) {
97146 pytorch_qnnp_log_error (
98- " failed to create convolution with %.7g kernel scale: scale must be"
147+ " failed to create %s with %.7g kernel scale: scale must be"
99148 " finite and positive" ,
149+ _name,
100150 kernel_scale);
101151 assert (" Failed to initialize QNNPACK conv_param_t struct." );
102152 }
103153
104- if (subsampling_dims [1 ] > kernel_height) {
154+ if (stride_dims [1 ] > kernel_height) {
105155 pytorch_qnnp_log_info (
106- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
107- " kernel and %" PRIu32 " x%" PRIu32
108- " subsampling: "
156+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
157+ PRIu32 " x%" PRIu32 " subsampling: "
109158 " height subsampling is greater than kernel height; subsampling should"
110- " be performed before the convolution" ,
159+ " be performed before the %s" ,
160+ _name,
111161 kernel_width,
112162 kernel_height,
113- subsampling_dims[0 ],
114- subsampling_dims[1 ]);
163+ stride_dims[0 ],
164+ stride_dims[1 ],
165+ _name);
115166 }
116167
117- if (subsampling_dims [0 ] > kernel_width) {
168+ if (stride_dims [0 ] > kernel_width) {
118169 pytorch_qnnp_log_info (
119- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
120- " kernel and %" PRIu32 " x%" PRIu32
121- " subsampling: "
170+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
171+ PRIu32 " x%" PRIu32 " subsampling: "
122172 " width subsampling is greater than kernel width; subsampling should"
123- " be performed before the convolution" ,
173+ " be performed before the %s" ,
174+ _name,
124175 kernel_width,
125176 kernel_height,
126- subsampling_dims[0 ],
127- subsampling_dims[1 ]);
177+ stride_dims[0 ],
178+ stride_dims[1 ],
179+ _name);
128180 }
129181
130182 if (input_padding_top >= kernel_height) {
131183 pytorch_qnnp_log_info (
132- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
133- " kernel and %" PRIu32 " +%" PRIu32
134- " height padding: "
184+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
185+ PRIu32 " +%" PRIu32 " height padding: "
135186 " input top padding is greater or equal to kernel height" ,
187+ _name,
136188 kernel_width,
137189 kernel_height,
138190 input_padding_top,
@@ -141,10 +193,10 @@ struct conv_param_t {
141193
142194 if (input_padding_bottom >= kernel_height) {
143195 pytorch_qnnp_log_info (
144- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
145- " kernel and %" PRIu32 " +%" PRIu32
146- " height padding: "
196+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
197+ PRIu32 " +%" PRIu32 " height padding: "
147198 " input bottom padding is greater or equal to kernel height" ,
199+ _name,
148200 kernel_width,
149201 kernel_height,
150202 input_padding_top,
@@ -153,10 +205,10 @@ struct conv_param_t {
153205
154206 if (input_padding_right >= kernel_width) {
155207 pytorch_qnnp_log_info (
156- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
157- " kernel and %" PRIu32 " +%" PRIu32
158- " width padding: "
208+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
209+ PRIu32 " +%" PRIu32 " width padding: "
159210 " input right padding is greater or equal to kernel width" ,
211+ _name,
160212 kernel_width,
161213 kernel_height,
162214 input_padding_left,
@@ -165,30 +217,55 @@ struct conv_param_t {
165217
166218 if (input_padding_left >= kernel_width) {
167219 pytorch_qnnp_log_info (
168- " inefficiency in convolution with %" PRIu32 " x%" PRIu32
169- " kernel and %" PRIu32 " +%" PRIu32
170- " width padding: "
220+ " inefficiency in %s with %" PRIu32 " x%" PRIu32 " kernel and %"
221+ PRIu32 " +%" PRIu32 " width padding: "
171222 " input left padding is greater or equal to kernel width" ,
223+ _name,
172224 kernel_width,
173225 kernel_height,
174226 input_padding_left,
175227 input_padding_right);
176228 }
177229
178230 const size_t kernel_size = kernel_height * kernel_width;
179-
180- ukernel_type = pytorch_qnnp_ukernel_type_none;
181- const bool any_padding = (input_padding_left | input_padding_top
182- | input_padding_right | input_padding_bottom) != 0 ;
183-
184- if ((kernel_size == 9 || kernel_size == 25 ) &&
185- group_input_channels == 1 && group_output_channels == 1 && groups > 1 ) {
186- ukernel_type = pytorch_qnnp_ukernel_type_dwconv;
187- } else if (kernel_size == 1 && subsampling_dims[1 ] == 1 && subsampling_dims[0 ] == 1 && !any_padding) {
188- ukernel_type = group_input_channels >= SIZE_MAX ? pytorch_qnnp_ukernel_type_xzp_gemm : pytorch_qnnp_ukernel_type_gemm;
189- } else {
231+ if (transpose) {
190232 ukernel_type = pytorch_qnnp_ukernel_type_conv;
233+ } else {
234+ ukernel_type = pytorch_qnnp_ukernel_type_none;
235+ const bool any_padding = (input_padding_left | input_padding_top
236+ | input_padding_right | input_padding_bottom) != 0 ;
237+
238+ if ((kernel_size == 9 || kernel_size == 25 ) &&
239+ group_input_channels == 1 && group_output_channels == 1 && groups > 1 ) {
240+ ukernel_type = pytorch_qnnp_ukernel_type_dwconv;
241+ } else if (kernel_size == 1 && stride_dims[1 ] == 1 && stride_dims[0 ] == 1 && !any_padding) {
242+ ukernel_type = group_input_channels >= SIZE_MAX ? pytorch_qnnp_ukernel_type_xzp_gemm : pytorch_qnnp_ukernel_type_gemm;
243+ } else {
244+ ukernel_type = pytorch_qnnp_ukernel_type_conv;
245+ }
191246 }
192247 }
248+
249+ /* *
250+ * @brief Computes the output dimensions given a 2D input.
251+ */
252+ std::array<size_t , 2 > compute_output_dims (std::array<size_t , 2 > input_dims) const {
253+ std::array<size_t , 2 > output_dims;
254+ output_dims[0 ] = compute_output_dimension (input_dims[0 ], // width
255+ padding[1 ] + padding[3 ],
256+ adjustment_dims[0 ],
257+ kernel_dims[0 ],
258+ dilation[0 ],
259+ stride_dims[0 ],
260+ transpose);
261+ output_dims[1 ] = compute_output_dimension (input_dims[1 ], // height
262+ padding[0 ] + padding[2 ],
263+ adjustment_dims[1 ],
264+ kernel_dims[1 ],
265+ dilation[1 ],
266+ stride_dims[1 ],
267+ transpose);
268+ return output_dims;
269+ }
193270};
194271} // namespace qnnpack
0 commit comments