Skip to content

Commit e841bea

Browse files
Zafarfacebook-github-bot
authored andcommitted
[quant] QNNPACK Add deconvolution parameters (#36716)
Summary: Pull Request resolved: #36716 Test Plan: Imported from OSS Differential Revision: D21110112 Pulled By: z-a-f fbshipit-source-id: 4b62e1bb3c3b6a3276bc5f8ee5ead0f513ec0137
1 parent 5efd105 commit e841bea

5 files changed

Lines changed: 173 additions & 106 deletions

File tree

aten/src/ATen/native/quantized/cpu/qconv.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,13 +555,15 @@ class QConvInt8 final {
555555
{stride_w, stride_h},
556556
{dilation_w, dilation_h},
557557
{pad_h, pad_w, pad_h, pad_w},
558+
/*adjustment=*/{0, 0},
558559
groups,
559560
C,
560561
M,
561562
kernel_zp,
562563
kernel_scale,
563564
output_min,
564-
output_max);
565+
output_max,
566+
/*transpose=*/false);
565567

566568
auto input_scale = act_nhwc.q_scale();
567569

aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,15 @@ class QConvPackWeightInt8 final {
272272
{stride_w, stride_h},
273273
{dilation_w, dilation_h},
274274
{pad_t, pad_l, pad_t, pad_l},
275+
/*adjustment=*/{0, 0},
275276
groups,
276277
in_ch,
277278
out_ch,
278279
weight.q_zero_point(),
279280
weight.q_scale(),
280281
std::numeric_limits<uint8_t>::min(),
281-
std::numeric_limits<uint8_t>::max());
282+
std::numeric_limits<uint8_t>::max(),
283+
/*transpose=*/false);
282284

283285
auto weight_contig = weight.contiguous(MemoryFormat::ChannelsLast);
284286
auto weight_zp = weight.q_zero_point();

aten/src/ATen/native/quantized/cpu/qnnpack/include/conv_utils.h

Lines changed: 152 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -9,130 +9,182 @@
99
#include <qnnpack/operator.h>
1010

1111
namespace qnnpack {
12+
13+
namespace {
14+
static inline size_t compute_output_dimension(
15+
size_t input_dim, // Input dimension
16+
size_t pad_dim, // Input padding
17+
size_t adjustment_dim, // Adjustment to the output dimension
18+
size_t kernel_dim, // Kernel dimension
19+
size_t dilation_dim, // Dilation dimension
20+
size_t stride_dim, // Stride or subsampling dimension
21+
bool transpose) { // Transposed convolution
22+
kernel_dim = (kernel_dim - 1) * dilation_dim + 1; // Effective kernel dim
23+
if (transpose) {
24+
return stride_dim * (input_dim - 1) + adjustment_dim + kernel_dim - pad_dim;
25+
} else {
26+
return (input_dim + pad_dim - kernel_dim) / stride_dim + 1;
27+
}
28+
}
29+
} // namespace
30+
1231
struct conv_param_t {
13-
const std::array<uint32_t, 2> kernel_dims; // kernel width, kernel height
14-
const std::array<uint32_t, 2> subsampling_dims; // subsampling width, height
32+
const std::array<uint32_t, 2> kernel_dims; // kernel width, height
33+
const std::array<uint32_t, 2> stride_dims; // subsampling width, height
1534
const std::array<uint32_t, 2> dilation; // dilation width, height
16-
const std::array<uint32_t, 4> pad; // input padding top, left, bottom, right
35+
const std::array<uint32_t, 4> padding; // input padding top, left, bottom, right
36+
const std::array<uint32_t, 2> adjustment_dims; // output adjustment
37+
1738
const uint32_t groups;
1839
const size_t input_channels;
1940
const size_t output_channels;
2041
const uint8_t kernel_zero_point;
2142
const float kernel_scale;
2243
const uint8_t output_min;
2344
const uint8_t output_max;
45+
const bool transpose;
2446

2547
// The following are derived parameters
2648
enum pytorch_qnnp_ukernel_type ukernel_type; // kernel type based on input params
2749
size_t group_input_channels;
2850
size_t group_output_channels;
2951

3052
/**
31-
* @brief Constructor for initializing the convolution parameters.
53+
* @brief Constructor for initializing the convolution/deconvolution
54+
* parameters.
3255
*/
33-
conv_param_t(
34-
const std::array<uint32_t, 2> kernel,
35-
const std::array<uint32_t, 2> subsampling,
36-
const std::array<uint32_t, 2> dil,
37-
const std::array<uint32_t, 4> pd,
38-
const uint32_t grp,
39-
const size_t in_ch,
40-
const size_t out_ch,
41-
const uint8_t kernel_zp,
42-
const float kernel_s,
43-
const uint8_t out_min,
44-
const uint8_t out_max)
45-
: kernel_dims(kernel),
46-
subsampling_dims(subsampling),
47-
dilation(dil),
48-
pad(pd),
49-
groups(grp),
50-
input_channels(in_ch),
51-
output_channels(out_ch),
52-
kernel_zero_point(kernel_zp),
53-
kernel_scale(kernel_s),
54-
output_min(out_min),
55-
output_max(out_max) {
56+
conv_param_t(const std::array<uint32_t, 2> kernel_,
57+
const std::array<uint32_t, 2> stride_,
58+
const std::array<uint32_t, 2> dilation_,
59+
const std::array<uint32_t, 4> padding_,
60+
const std::array<uint32_t, 2> adjustment_,
61+
const uint32_t groups_,
62+
const size_t input_channels_,
63+
const size_t output_channels_,
64+
const uint8_t kernel_zp_,
65+
const float kernel_stride_,
66+
const uint8_t out_min_,
67+
const uint8_t out_max_,
68+
const bool transpose_)
69+
: kernel_dims(kernel_),
70+
stride_dims(stride_),
71+
dilation(dilation_),
72+
padding(padding_),
73+
adjustment_dims(adjustment_),
74+
groups(groups_),
75+
input_channels(input_channels_),
76+
output_channels(output_channels_),
77+
kernel_zero_point(kernel_zp_),
78+
kernel_scale(kernel_stride_),
79+
output_min(out_min_),
80+
output_max(out_max_),
81+
transpose(transpose_) {
5682
const uint32_t kernel_width = kernel_dims[0];
5783
const uint32_t kernel_height = kernel_dims[1];
5884

59-
const uint32_t input_padding_top = pad[0];
60-
const uint32_t input_padding_left = pad[1];
61-
const uint32_t input_padding_bottom = pad[2];
62-
const uint32_t input_padding_right = pad[3];
85+
const uint32_t input_padding_top = padding[0];
86+
const uint32_t input_padding_left = padding[1];
87+
const uint32_t input_padding_bottom = padding[2];
88+
const uint32_t input_padding_right = padding[3];
89+
90+
const char* _name;
91+
if (transpose) {
92+
_name = "deconvolution\0";
93+
} else {
94+
_name = "convolution\0";
95+
}
96+
97+
if (groups == 0) {
98+
pytorch_qnnp_log_error(
99+
"failed to create %s with groups equal to zero.", _name);
100+
assert("Failed to initialize QNNPACK conv_param_t struct.");
101+
}
102+
103+
if (input_channels % groups != 0 || output_channels % groups != 0) {
104+
pytorch_qnnp_log_error(
105+
"failed to create %s: input and output channels must be divisible by"
106+
" groups.", _name);
107+
assert("Failed to initialize QNNPACK conv_param_t struct.");
108+
}
63109

64110
group_input_channels = input_channels / groups;
65111
group_output_channels = output_channels / groups;
66112

67113
if (kernel_width == 0 || kernel_height == 0) {
68114
pytorch_qnnp_log_error(
69-
"failed to create convolution with %" PRIu32 "x%" PRIu32
115+
"failed to create %s with %" PRIu32 "x%" PRIu32
70116
" kernel: kernel dimensions must be non-zero",
117+
_name,
71118
kernel_width,
72119
kernel_height);
73120
assert("Failed to initialize QNNPACK conv_param_t struct.");
74121
}
75122

76-
if (subsampling_dims[0] == 0 || subsampling_dims[1] == 0) {
123+
if (stride_dims[0] == 0 || stride_dims[1] == 0) {
77124
pytorch_qnnp_log_error(
78-
"failed to create convolution with %" PRIu32 "x%" PRIu32
125+
"failed to create %s with %" PRIu32 "x%" PRIu32
79126
" subsampling: "
80127
"subsampling dimensions must be non-zero",
81-
subsampling_dims[0],
82-
subsampling_dims[1]);
128+
_name,
129+
stride_dims[0],
130+
stride_dims[1]);
83131
assert("Failed to initialize QNNPACK conv_param_t struct.");
84132
}
85133

86134
if (dilation[0] == 0 || dilation[1] == 0) {
87135
pytorch_qnnp_log_error(
88-
"failed to create convolution with %" PRIu32 "x%" PRIu32
136+
"failed to create %s with %" PRIu32 "x%" PRIu32
89137
" dilation: "
90138
"dilation dimensions must be non-zero",
139+
_name,
91140
dilation[0],
92141
dilation[1]);
93142
assert("Failed to initialize QNNPACK conv_param_t struct.");
94143
}
95144

96145
if (kernel_scale <= 0.0f || !std::isnormal(kernel_scale)) {
97146
pytorch_qnnp_log_error(
98-
"failed to create convolution with %.7g kernel scale: scale must be"
147+
"failed to create %s with %.7g kernel scale: scale must be"
99148
"finite and positive",
149+
_name,
100150
kernel_scale);
101151
assert("Failed to initialize QNNPACK conv_param_t struct.");
102152
}
103153

104-
if (subsampling_dims[1] > kernel_height) {
154+
if (stride_dims[1] > kernel_height) {
105155
pytorch_qnnp_log_info(
106-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
107-
" kernel and %" PRIu32 "x%" PRIu32
108-
" subsampling: "
156+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
157+
PRIu32 "x%" PRIu32 " subsampling: "
109158
"height subsampling is greater than kernel height; subsampling should"
110-
" be performed before the convolution",
159+
" be performed before the %s",
160+
_name,
111161
kernel_width,
112162
kernel_height,
113-
subsampling_dims[0],
114-
subsampling_dims[1]);
163+
stride_dims[0],
164+
stride_dims[1],
165+
_name);
115166
}
116167

117-
if (subsampling_dims[0] > kernel_width) {
168+
if (stride_dims[0] > kernel_width) {
118169
pytorch_qnnp_log_info(
119-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
120-
" kernel and %" PRIu32 "x%" PRIu32
121-
" subsampling: "
170+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
171+
PRIu32 "x%" PRIu32 " subsampling: "
122172
"width subsampling is greater than kernel width; subsampling should"
123-
" be performed before the convolution",
173+
" be performed before the %s",
174+
_name,
124175
kernel_width,
125176
kernel_height,
126-
subsampling_dims[0],
127-
subsampling_dims[1]);
177+
stride_dims[0],
178+
stride_dims[1],
179+
_name);
128180
}
129181

130182
if (input_padding_top >= kernel_height) {
131183
pytorch_qnnp_log_info(
132-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
133-
" kernel and %" PRIu32 "+%" PRIu32
134-
" height padding: "
184+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
185+
PRIu32 "+%" PRIu32 " height padding: "
135186
"input top padding is greater or equal to kernel height",
187+
_name,
136188
kernel_width,
137189
kernel_height,
138190
input_padding_top,
@@ -141,10 +193,10 @@ struct conv_param_t {
141193

142194
if (input_padding_bottom >= kernel_height) {
143195
pytorch_qnnp_log_info(
144-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
145-
" kernel and %" PRIu32 "+%" PRIu32
146-
" height padding: "
196+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
197+
PRIu32 "+%" PRIu32 " height padding: "
147198
"input bottom padding is greater or equal to kernel height",
199+
_name,
148200
kernel_width,
149201
kernel_height,
150202
input_padding_top,
@@ -153,10 +205,10 @@ struct conv_param_t {
153205

154206
if (input_padding_right >= kernel_width) {
155207
pytorch_qnnp_log_info(
156-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
157-
" kernel and %" PRIu32 "+%" PRIu32
158-
" width padding: "
208+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
209+
PRIu32 "+%" PRIu32 " width padding: "
159210
"input right padding is greater or equal to kernel width",
211+
_name,
160212
kernel_width,
161213
kernel_height,
162214
input_padding_left,
@@ -165,30 +217,55 @@ struct conv_param_t {
165217

166218
if (input_padding_left >= kernel_width) {
167219
pytorch_qnnp_log_info(
168-
"inefficiency in convolution with %" PRIu32 "x%" PRIu32
169-
" kernel and %" PRIu32 "+%" PRIu32
170-
" width padding: "
220+
"inefficiency in %s with %" PRIu32 "x%" PRIu32 " kernel and %"
221+
PRIu32 "+%" PRIu32 " width padding: "
171222
"input left padding is greater or equal to kernel width",
223+
_name,
172224
kernel_width,
173225
kernel_height,
174226
input_padding_left,
175227
input_padding_right);
176228
}
177229

178230
const size_t kernel_size = kernel_height * kernel_width;
179-
180-
ukernel_type = pytorch_qnnp_ukernel_type_none;
181-
const bool any_padding = (input_padding_left | input_padding_top
182-
| input_padding_right | input_padding_bottom) != 0;
183-
184-
if ((kernel_size == 9 || kernel_size == 25) &&
185-
group_input_channels == 1 && group_output_channels == 1 && groups > 1) {
186-
ukernel_type = pytorch_qnnp_ukernel_type_dwconv;
187-
} else if (kernel_size == 1 && subsampling_dims[1] == 1 && subsampling_dims[0] == 1 && !any_padding) {
188-
ukernel_type = group_input_channels >= SIZE_MAX ? pytorch_qnnp_ukernel_type_xzp_gemm : pytorch_qnnp_ukernel_type_gemm;
189-
} else {
231+
if (transpose) {
190232
ukernel_type = pytorch_qnnp_ukernel_type_conv;
233+
} else {
234+
ukernel_type = pytorch_qnnp_ukernel_type_none;
235+
const bool any_padding = (input_padding_left | input_padding_top
236+
| input_padding_right | input_padding_bottom) != 0;
237+
238+
if ((kernel_size == 9 || kernel_size == 25) &&
239+
group_input_channels == 1 && group_output_channels == 1 && groups > 1) {
240+
ukernel_type = pytorch_qnnp_ukernel_type_dwconv;
241+
} else if (kernel_size == 1 && stride_dims[1] == 1 && stride_dims[0] == 1 && !any_padding) {
242+
ukernel_type = group_input_channels >= SIZE_MAX ? pytorch_qnnp_ukernel_type_xzp_gemm : pytorch_qnnp_ukernel_type_gemm;
243+
} else {
244+
ukernel_type = pytorch_qnnp_ukernel_type_conv;
245+
}
191246
}
192247
}
248+
249+
/**
250+
* @brief Computes the output dimensions given a 2D input.
251+
*/
252+
std::array<size_t, 2> compute_output_dims(std::array<size_t, 2> input_dims) const {
253+
std::array<size_t, 2> output_dims;
254+
output_dims[0] = compute_output_dimension(input_dims[0], // width
255+
padding[1] + padding[3],
256+
adjustment_dims[0],
257+
kernel_dims[0],
258+
dilation[0],
259+
stride_dims[0],
260+
transpose);
261+
output_dims[1] = compute_output_dimension(input_dims[1], // height
262+
padding[0] + padding[2],
263+
adjustment_dims[1],
264+
kernel_dims[1],
265+
dilation[1],
266+
stride_dims[1],
267+
transpose);
268+
return output_dims;
269+
}
193270
};
194271
} // namespace qnnpack

0 commit comments

Comments
 (0)