[cpp] elena cuda
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
- #include <iostream>
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #define EQUAL(a,b) (strcmp((a),(b))==0)
- #define ABORT(Msg) \
- { \
- std::cerr << ": \033[1;91m" \
- << "[Fatal]" \
- << "\033[m " << __FILE__ << ": " << __FUNCTION__ << ": Line" \
- << __LINE__ << ": " << Msg << std::endl; \
- std::abort(); \
- }
- #include "elena_int.h"
- #include <cuda_runtime.h>
- #define cuErrCheck(res) \
- { \
- if (res != cudaSuccess) \
- ABORT("cuda assert: " << cudaGetErrorString(res)); \
- }
- #define BLOCK_SIZE 128
- #include "elena_registry.h"
- namespace cuda_fa993066c1ec32a14d8d06ecfb8d5c54e7a04ec31ba833e1abf1f16c05e027c7 {
- __global__ void BGR_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float BGR_mean[3];
- for (uint64_t iter3 = 0; iter3 < 0 + 3; ++iter3) {
- if ((iter3 < ((0) + (3)))) {
- BGR_std[((iter3) - (0))] = ((iter3 == 0) ? norm_std_0 : ((iter3 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter2 = 0; iter2 < 0 + 3; ++iter2) {
- if ((iter2 < ((0) + (3)))) {
- BGR_mean[((iter2) - (0))] = ((iter2 == 0) ? norm_mean_0 : ((iter2 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter19_iter20_fused_iter21_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter21 = (((iter19_iter20_fused_iter21_fused % 3)) + (0));
- const uint64_t iter19_iter20_fused = ((((iter19_iter20_fused_iter21_fused) / (3))) + (0));
- const uint64_t iter20 = (((iter19_iter20_fused % crop_w)) + (0));
- const uint64_t iter19 = ((((iter19_iter20_fused) / (crop_w))) + (0));
- if ((iter19 < ((0) + (crop_h)))) {
- if ((iter20 < ((0) + (crop_w)))) {
- if ((iter21 < ((0) + (3)))) {
- HWC2CHW[((((((iter20) - (0))) + (((crop_w) * (((iter19) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter21) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter19) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter20) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (iter21))) + (((3) * (intw[((((iter20) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((3) * (w))) * (inth[((((iter19) + (crop_top))) + (((resize_h) * (0))))]))))]))) + (((((cubfh[((((iter19) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter20) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (iter21))) + (((3) * (intw[((((iter20) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((3) * (w))) * (inth[((((iter19) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (((((cubfh[((((iter19) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter20) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (iter21))) + (((3) * (intw[((((iter20) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((3) * (w))) * (inth[((((iter19) + (crop_top))) + (((resize_h) * (0))))]))))]))))) + (((((cubfh[((((iter19) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter20) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (iter21))) + (((3) * (intw[((((iter20) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((3) * (w))) * (inth[((((iter19) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter21) - (0))]))) / (BGR_std[((iter21) - (0))]));
- }
- }
- }
- }
- __global__ void BGR_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float BGR_mean[3];
- for (uint64_t iter28 = 0; iter28 < 0 + 3; ++iter28) {
- if ((iter28 < ((0) + (3)))) {
- BGR_std[((iter28) - (0))] = ((iter28 == 0) ? norm_std_0 : ((iter28 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter32 = 0; iter32 < 0 + 2; ++iter32) {
- if ((iter32 < ((0) + (2)))) {
- scale[((iter32) - (0))] = ((iter32 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter27 = 0; iter27 < 0 + 3; ++iter27) {
- if ((iter27 < ((0) + (3)))) {
- BGR_mean[((iter27) - (0))] = ((iter27 == 0) ? norm_mean_0 : ((iter27 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter45_iter46_fused_iter47_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter47 = (((iter45_iter46_fused_iter47_fused % 3)) + (0));
- const uint64_t iter45_iter46_fused = ((((iter45_iter46_fused_iter47_fused) / (3))) + (0));
- const uint64_t iter46 = (((iter45_iter46_fused % crop_w)) + (0));
- const uint64_t iter45 = ((((iter45_iter46_fused) / (crop_w))) + (0));
- if ((iter45 < ((0) + (crop_h)))) {
- if ((iter46 < ((0) + (crop_w)))) {
- if ((iter47 < ((0) + (3)))) {
- HWC2CHW[((((((iter46) - (0))) + (((crop_w) * (((iter45) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter47) - (0))))))] = (((((float)(input[((((((2) - (iter47))) + (((3) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter46) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((3) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter45) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))])) - (BGR_mean[((iter47) - (0))]))) / (BGR_std[((iter47) - (0))]));
- }
- }
- }
- }
- __global__ void RGB_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float BGR_mean[3];
- for (uint64_t iter54 = 0; iter54 < 0 + 3; ++iter54) {
- if ((iter54 < ((0) + (3)))) {
- BGR_std[((iter54) - (0))] = ((iter54 == 0) ? norm_std_0 : ((iter54 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter53 = 0; iter53 < 0 + 3; ++iter53) {
- if ((iter53 < ((0) + (3)))) {
- BGR_mean[((iter53) - (0))] = ((iter53 == 0) ? norm_mean_0 : ((iter53 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter73_iter74_fused_iter75_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter75 = (((iter73_iter74_fused_iter75_fused % 3)) + (0));
- const uint64_t iter73_iter74_fused = ((((iter73_iter74_fused_iter75_fused) / (3))) + (0));
- const uint64_t iter74 = (((iter73_iter74_fused % crop_w)) + (0));
- const uint64_t iter73 = ((((iter73_iter74_fused) / (crop_w))) + (0));
- if ((iter73 < ((0) + (crop_h)))) {
- if ((iter74 < ((0) + (crop_w)))) {
- if ((iter75 < ((0) + (3)))) {
- HWC2CHW[((((((iter74) - (0))) + (((crop_w) * (((iter73) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter75) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter73) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter74) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (((2) - (iter75))))) + (((3) * (intw[((((iter74) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((3) * (w))) * (inth[((((iter73) + (crop_top))) + (((resize_h) * (0))))]))))]))) + (((((cubfh[((((iter73) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter74) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (((2) - (iter75))))) + (((3) * (intw[((((iter74) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((3) * (w))) * (inth[((((iter73) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (((((cubfh[((((iter73) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter74) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (((2) - (iter75))))) + (((3) * (intw[((((iter74) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((3) * (w))) * (inth[((((iter73) + (crop_top))) + (((resize_h) * (0))))]))))]))))) + (((((cubfh[((((iter73) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter74) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (((2) - (iter75))))) + (((3) * (intw[((((iter74) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((3) * (w))) * (inth[((((iter73) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter75) - (0))]))) / (BGR_std[((iter75) - (0))]));
- }
- }
- }
- }
- __global__ void RGB_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float BGR_mean[3];
- for (uint64_t iter82 = 0; iter82 < 0 + 3; ++iter82) {
- if ((iter82 < ((0) + (3)))) {
- BGR_std[((iter82) - (0))] = ((iter82 == 0) ? norm_std_0 : ((iter82 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter89 = 0; iter89 < 0 + 2; ++iter89) {
- if ((iter89 < ((0) + (2)))) {
- scale[((iter89) - (0))] = ((iter89 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter81 = 0; iter81 < 0 + 3; ++iter81) {
- if ((iter81 < ((0) + (3)))) {
- BGR_mean[((iter81) - (0))] = ((iter81 == 0) ? norm_mean_0 : ((iter81 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter102_iter103_fused_iter104_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter104 = (((iter102_iter103_fused_iter104_fused % 3)) + (0));
- const uint64_t iter102_iter103_fused = ((((iter102_iter103_fused_iter104_fused) / (3))) + (0));
- const uint64_t iter103 = (((iter102_iter103_fused % crop_w)) + (0));
- const uint64_t iter102 = ((((iter102_iter103_fused) / (crop_w))) + (0));
- if ((iter102 < ((0) + (crop_h)))) {
- if ((iter103 < ((0) + (crop_w)))) {
- if ((iter104 < ((0) + (3)))) {
- HWC2CHW[((((((iter103) - (0))) + (((crop_w) * (((iter102) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter104) - (0))))))] = (((((float)(input[((((((2) - (((2) - (iter104))))) + (((3) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter103) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((3) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter102) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))])) - (BGR_mean[((iter104) - (0))]))) / (BGR_std[((iter104) - (0))]));
- }
- }
- }
- }
- __global__ void GRAY_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float BGR_mean[3];
- for (uint64_t iter111 = 0; iter111 < 0 + 3; ++iter111) {
- if ((iter111 < ((0) + (3)))) {
- BGR_std[((iter111) - (0))] = ((iter111 == 0) ? norm_std_0 : ((iter111 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter110 = 0; iter110 < 0 + 3; ++iter110) {
- if ((iter110 < ((0) + (3)))) {
- BGR_mean[((iter110) - (0))] = ((iter110 == 0) ? norm_mean_0 : ((iter110 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter130_iter131_fused_iter132_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter132 = (((iter130_iter131_fused_iter132_fused % 3)) + (0));
- const uint64_t iter130_iter131_fused = ((((iter130_iter131_fused_iter132_fused) / (3))) + (0));
- const uint64_t iter131 = (((iter130_iter131_fused % crop_w)) + (0));
- const uint64_t iter130 = ((((iter130_iter131_fused) / (crop_w))) + (0));
- if ((iter130 < ((0) + (crop_h)))) {
- if ((iter131 < ((0) + (crop_w)))) {
- if ((iter132 < ((0) + (3)))) {
- HWC2CHW[((((((iter131) - (0))) + (((crop_w) * (((iter130) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter132) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter130) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter131) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((0) + (((1) * (intw[((((iter131) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter130) + (crop_top))) + (((resize_h) * (0))))]))))]))) + (((((cubfh[((((iter130) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter131) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((0) + (((1) * (intw[((((iter131) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter130) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (((((cubfh[((((iter130) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter131) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((0) + (((1) * (intw[((((iter131) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter130) + (crop_top))) + (((resize_h) * (0))))]))))]))))) + (((((cubfh[((((iter130) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter131) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((0) + (((1) * (intw[((((iter131) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter130) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter132) - (0))]))) / (BGR_std[((iter132) - (0))]));
- }
- }
- }
- }
- __global__ void GRAY_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float BGR_mean[3];
- for (uint64_t iter139 = 0; iter139 < 0 + 3; ++iter139) {
- if ((iter139 < ((0) + (3)))) {
- BGR_std[((iter139) - (0))] = ((iter139 == 0) ? norm_std_0 : ((iter139 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter146 = 0; iter146 < 0 + 2; ++iter146) {
- if ((iter146 < ((0) + (2)))) {
- scale[((iter146) - (0))] = ((iter146 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter138 = 0; iter138 < 0 + 3; ++iter138) {
- if ((iter138 < ((0) + (3)))) {
- BGR_mean[((iter138) - (0))] = ((iter138 == 0) ? norm_mean_0 : ((iter138 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter159_iter160_fused_iter161_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter161 = (((iter159_iter160_fused_iter161_fused % 3)) + (0));
- const uint64_t iter159_iter160_fused = ((((iter159_iter160_fused_iter161_fused) / (3))) + (0));
- const uint64_t iter160 = (((iter159_iter160_fused % crop_w)) + (0));
- const uint64_t iter159 = ((((iter159_iter160_fused) / (crop_w))) + (0));
- if ((iter159 < ((0) + (crop_h)))) {
- if ((iter160 < ((0) + (crop_w)))) {
- if ((iter161 < ((0) + (3)))) {
- HWC2CHW[((((((iter160) - (0))) + (((crop_w) * (((iter159) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter161) - (0))))))] = (((((float)(input[((((0) + (((1) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter160) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((1) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter159) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))])) - (BGR_mean[((iter161) - (0))]))) / (BGR_std[((iter161) - (0))]));
- }
- }
- }
- }
- __global__ void BGRA_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float BGR_mean[3];
- for (uint64_t iter168 = 0; iter168 < 0 + 3; ++iter168) {
- if ((iter168 < ((0) + (3)))) {
- BGR_std[((iter168) - (0))] = ((iter168 == 0) ? norm_std_0 : ((iter168 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter167 = 0; iter167 < 0 + 3; ++iter167) {
- if ((iter167 < ((0) + (3)))) {
- BGR_mean[((iter167) - (0))] = ((iter167 == 0) ? norm_mean_0 : ((iter167 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter187_iter188_fused_iter189_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter189 = (((iter187_iter188_fused_iter189_fused % 3)) + (0));
- const uint64_t iter187_iter188_fused = ((((iter187_iter188_fused_iter189_fused) / (3))) + (0));
- const uint64_t iter188 = (((iter187_iter188_fused % crop_w)) + (0));
- const uint64_t iter187 = ((((iter187_iter188_fused) / (crop_w))) + (0));
- if ((iter187 < ((0) + (crop_h)))) {
- if ((iter188 < ((0) + (crop_w)))) {
- if ((iter189 < ((0) + (3)))) {
- HWC2CHW[((((((iter188) - (0))) + (((crop_w) * (((iter187) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter189) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter187) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter188) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (iter189))) + (((4) * (intw[((((iter188) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((4) * (w))) * (inth[((((iter187) + (crop_top))) + (((resize_h) * (0))))]))))]))) + (((((cubfh[((((iter187) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter188) + (crop_left))) + (((resize_w) * (0))))]))) * (input[((((((2) - (iter189))) + (((4) * (intw[((((iter188) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((4) * (w))) * (inth[((((iter187) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (((((cubfh[((((iter187) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter188) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (iter189))) + (((4) * (intw[((((iter188) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((4) * (w))) * (inth[((((iter187) + (crop_top))) + (((resize_h) * (0))))]))))]))))) + (((((cubfh[((((iter187) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter188) + (crop_left))) + (((resize_w) * (1))))]))) * (input[((((((2) - (iter189))) + (((4) * (intw[((((iter188) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((4) * (w))) * (inth[((((iter187) + (crop_top))) + (((resize_h) * (1))))]))))]))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter189) - (0))]))) / (BGR_std[((iter189) - (0))]));
- }
- }
- }
- }
- __global__ void BGRA_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float BGR_mean[3];
- for (uint64_t iter196 = 0; iter196 < 0 + 3; ++iter196) {
- if ((iter196 < ((0) + (3)))) {
- BGR_std[((iter196) - (0))] = ((iter196 == 0) ? norm_std_0 : ((iter196 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter203 = 0; iter203 < 0 + 2; ++iter203) {
- if ((iter203 < ((0) + (2)))) {
- scale[((iter203) - (0))] = ((iter203 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter195 = 0; iter195 < 0 + 3; ++iter195) {
- if ((iter195 < ((0) + (3)))) {
- BGR_mean[((iter195) - (0))] = ((iter195 == 0) ? norm_mean_0 : ((iter195 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter216_iter217_fused_iter218_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter218 = (((iter216_iter217_fused_iter218_fused % 3)) + (0));
- const uint64_t iter216_iter217_fused = ((((iter216_iter217_fused_iter218_fused) / (3))) + (0));
- const uint64_t iter217 = (((iter216_iter217_fused % crop_w)) + (0));
- const uint64_t iter216 = ((((iter216_iter217_fused) / (crop_w))) + (0));
- if ((iter216 < ((0) + (crop_h)))) {
- if ((iter217 < ((0) + (crop_w)))) {
- if ((iter218 < ((0) + (3)))) {
- HWC2CHW[((((((iter217) - (0))) + (((crop_w) * (((iter216) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter218) - (0))))))] = (((((float)(input[((((((2) - (iter218))) + (((4) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter217) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((4) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter216) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))])) - (BGR_mean[((iter218) - (0))]))) / (BGR_std[((iter218) - (0))]));
- }
- }
- }
- }
- __global__ void NV12_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float NV2BGR_params[((3) * (3))];
- float BGR_mean[3];
- for (uint64_t iter225 = 0; iter225 < 0 + 3; ++iter225) {
- if ((iter225 < ((0) + (3)))) {
- BGR_std[((iter225) - (0))] = ((iter225 == 0) ? norm_std_0 : ((iter225 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter222 = 0; iter222 < 0 + 3; ++iter222) {
- for (uint64_t iter223 = 0; iter223 < 0 + 3; ++iter223) {
- if ((iter222 < ((0) + (3)))) {
- if ((iter223 < ((0) + (3)))) {
- NV2BGR_params[((((iter223) - (0))) + (((3) * (((iter222) - (0))))))] = (((((true) & ((iter222 == 0)))) & ((iter223 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter222 == 0)))) & ((iter223 == 1))) ? (static_cast<float>(2.018)) : (((((true) & ((iter222 == 0)))) & ((iter223 == 2))) ? (static_cast<float>(0)) : (((((true) & ((iter222 == 1)))) & ((iter223 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter222 == 1)))) & ((iter223 == 1))) ? (static_cast<float>(-0.813)) : (((((true) & ((iter222 == 1)))) & ((iter223 == 2))) ? (static_cast<float>(-0.391)) : (((((true) & ((iter222 == 2)))) & ((iter223 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter222 == 2)))) & ((iter223 == 1))) ? (static_cast<float>(0)) : (static_cast<float>(1.596))))))))));
- }
- }
- }
- }
- for (uint64_t iter224 = 0; iter224 < 0 + 3; ++iter224) {
- if ((iter224 < ((0) + (3)))) {
- BGR_mean[((iter224) - (0))] = ((iter224 == 0) ? norm_mean_0 : ((iter224 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter244_iter245_fused_iter246_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter246 = (((iter244_iter245_fused_iter246_fused % 3)) + (0));
- const uint64_t iter244_iter245_fused = ((((iter244_iter245_fused_iter246_fused) / (3))) + (0));
- const uint64_t iter245 = (((iter244_iter245_fused % crop_w)) + (0));
- const uint64_t iter244 = ((((iter244_iter245_fused) / (crop_w))) + (0));
- if ((iter244 < ((0) + (crop_h)))) {
- if ((iter245 < ((0) + (crop_w)))) {
- if ((iter246 < ((0) + (3)))) {
- HWC2CHW[((((((iter245) - (0))) + (((crop_w) * (((iter244) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter246) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))) + (((((cubfh[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter245) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (((((cubfh[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (((((cubfh[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter246))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter245) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter244) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter246) - (0))]))) / (BGR_std[((iter246) - (0))]));
- }
- }
- }
- }
- __global__ void NV12_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float NV2BGR_params[((3) * (3))];
- float BGR_mean[3];
- for (uint64_t iter253 = 0; iter253 < 0 + 3; ++iter253) {
- if ((iter253 < ((0) + (3)))) {
- BGR_std[((iter253) - (0))] = ((iter253 == 0) ? norm_std_0 : ((iter253 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter260 = 0; iter260 < 0 + 2; ++iter260) {
- if ((iter260 < ((0) + (2)))) {
- scale[((iter260) - (0))] = ((iter260 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter250 = 0; iter250 < 0 + 3; ++iter250) {
- for (uint64_t iter251 = 0; iter251 < 0 + 3; ++iter251) {
- if ((iter250 < ((0) + (3)))) {
- if ((iter251 < ((0) + (3)))) {
- NV2BGR_params[((((iter251) - (0))) + (((3) * (((iter250) - (0))))))] = (((((true) & ((iter250 == 0)))) & ((iter251 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter250 == 0)))) & ((iter251 == 1))) ? (static_cast<float>(2.018)) : (((((true) & ((iter250 == 0)))) & ((iter251 == 2))) ? (static_cast<float>(0)) : (((((true) & ((iter250 == 1)))) & ((iter251 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter250 == 1)))) & ((iter251 == 1))) ? (static_cast<float>(-0.813)) : (((((true) & ((iter250 == 1)))) & ((iter251 == 2))) ? (static_cast<float>(-0.391)) : (((((true) & ((iter250 == 2)))) & ((iter251 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter250 == 2)))) & ((iter251 == 1))) ? (static_cast<float>(0)) : (static_cast<float>(1.596))))))))));
- }
- }
- }
- }
- for (uint64_t iter252 = 0; iter252 < 0 + 3; ++iter252) {
- if ((iter252 < ((0) + (3)))) {
- BGR_mean[((iter252) - (0))] = ((iter252 == 0) ? norm_mean_0 : ((iter252 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter273_iter274_fused_iter275_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter275 = (((iter273_iter274_fused_iter275_fused % 3)) + (0));
- const uint64_t iter273_iter274_fused = ((((iter273_iter274_fused_iter275_fused) / (3))) + (0));
- const uint64_t iter274 = (((iter273_iter274_fused % crop_w)) + (0));
- const uint64_t iter273 = ((((iter273_iter274_fused) / (crop_w))) + (0));
- if ((iter273 < ((0) + (crop_h)))) {
- if ((iter274 < ((0) + (crop_w)))) {
- if ((iter275 < ((0) + (3)))) {
- HWC2CHW[((((((iter274) - (0))) + (((crop_w) * (((iter273) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter275) - (0))))))] = (((((float)(((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter275))) - (0))))))]) * (((input[((((0) + (((1) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter274) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((1) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter273) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter275))) - (0))))))]) * (((input[((((0) + (((1) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter274) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))) / (2))) * (2))))))) + (((((1) * (w))) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter273) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter275))) - (0))))))]) * (((input[((((0) + (((1) * (((((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter274) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter273) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))) / (2))) + (h))))))]) - ((static_cast<float>(128)))))))))) - (BGR_mean[((iter275) - (0))]))) / (BGR_std[((iter275) - (0))]));
- }
- }
- }
- }
- __global__ void NV21_Bilinear_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, int16_t* __restrict__ cubfh, int16_t* __restrict__ cubfw, int32_t* __restrict__ inth, int32_t* __restrict__ intw, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float NV2BGR_params[((3) * (3))];
- float BGR_mean[3];
- for (uint64_t iter282 = 0; iter282 < 0 + 3; ++iter282) {
- if ((iter282 < ((0) + (3)))) {
- BGR_std[((iter282) - (0))] = ((iter282 == 0) ? norm_std_0 : ((iter282 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter279 = 0; iter279 < 0 + 3; ++iter279) {
- for (uint64_t iter280 = 0; iter280 < 0 + 3; ++iter280) {
- if ((iter279 < ((0) + (3)))) {
- if ((iter280 < ((0) + (3)))) {
- NV2BGR_params[((((iter280) - (0))) + (((3) * (((iter279) - (0))))))] = (((((true) & ((iter279 == 0)))) & ((iter280 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter279 == 0)))) & ((iter280 == 1))) ? (static_cast<float>(2.018)) : (((((true) & ((iter279 == 0)))) & ((iter280 == 2))) ? (static_cast<float>(0)) : (((((true) & ((iter279 == 1)))) & ((iter280 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter279 == 1)))) & ((iter280 == 1))) ? (static_cast<float>(-0.813)) : (((((true) & ((iter279 == 1)))) & ((iter280 == 2))) ? (static_cast<float>(-0.391)) : (((((true) & ((iter279 == 2)))) & ((iter280 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter279 == 2)))) & ((iter280 == 1))) ? (static_cast<float>(0)) : (static_cast<float>(1.596))))))))));
- }
- }
- }
- }
- for (uint64_t iter281 = 0; iter281 < 0 + 3; ++iter281) {
- if ((iter281 < ((0) + (3)))) {
- BGR_mean[((iter281) - (0))] = ((iter281 == 0) ? norm_mean_0 : ((iter281 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter301_iter302_fused_iter303_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter303 = (((iter301_iter302_fused_iter303_fused % 3)) + (0));
- const uint64_t iter301_iter302_fused = ((((iter301_iter302_fused_iter303_fused) / (3))) + (0));
- const uint64_t iter302 = (((iter301_iter302_fused % crop_w)) + (0));
- const uint64_t iter301 = ((((iter301_iter302_fused) / (crop_w))) + (0));
- if ((iter301 < ((0) + (crop_h)))) {
- if ((iter302 < ((0) + (crop_w)))) {
- if ((iter303 < ((0) + (3)))) {
- HWC2CHW[((((((iter302) - (0))) + (((crop_w) * (((iter301) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter303) - (0))))))] = (((((float)(((((((((((((((cubfh[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))) + (((((cubfh[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]))))) + (((((1) * (w))) * (inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter302) + (crop_left))) + (((resize_w) * (0))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (((((cubfh[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) * (cubfw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (0))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (((((cubfh[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) * (cubfw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]))) * (((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]))))) + (((((1) * (w))) * (inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((((intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter303))) - (0))))))]) * (((input[((((0) + (((1) * (((((intw[((((iter302) + (crop_left))) + (((resize_w) * (1))))]) / (2))) * (2))))))) + (((((1) * (w))) * (((((inth[((((iter301) + (crop_top))) + (((resize_h) * (1))))]) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))))))) + (2097152))) / (4194304)))) - (BGR_mean[((iter303) - (0))]))) / (BGR_std[((iter303) - (0))]));
- }
- }
- }
- }
- __global__ void NV21_Nearest_Kernel(uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ input, float* __restrict__ HWC2CHW, uint64_t h, uint64_t w) {
- float BGR_std[3];
- float scale[2];
- float NV2BGR_params[((3) * (3))];
- float BGR_mean[3];
- for (uint64_t iter310 = 0; iter310 < 0 + 3; ++iter310) {
- if ((iter310 < ((0) + (3)))) {
- BGR_std[((iter310) - (0))] = ((iter310 == 0) ? norm_std_0 : ((iter310 == 1) ? norm_std_1 : norm_std_2));
- }
- }
- for (uint64_t iter317 = 0; iter317 < 0 + 2; ++iter317) {
- if ((iter317 < ((0) + (2)))) {
- scale[((iter317) - (0))] = ((iter317 == 0) ? (((float)(h)) / ((float)(resize_h))) : (((float)(w)) / ((float)(resize_w))));
- }
- }
- for (uint64_t iter307 = 0; iter307 < 0 + 3; ++iter307) {
- for (uint64_t iter308 = 0; iter308 < 0 + 3; ++iter308) {
- if ((iter307 < ((0) + (3)))) {
- if ((iter308 < ((0) + (3)))) {
- NV2BGR_params[((((iter308) - (0))) + (((3) * (((iter307) - (0))))))] = (((((true) & ((iter307 == 0)))) & ((iter308 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter307 == 0)))) & ((iter308 == 1))) ? (static_cast<float>(2.018)) : (((((true) & ((iter307 == 0)))) & ((iter308 == 2))) ? (static_cast<float>(0)) : (((((true) & ((iter307 == 1)))) & ((iter308 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter307 == 1)))) & ((iter308 == 1))) ? (static_cast<float>(-0.813)) : (((((true) & ((iter307 == 1)))) & ((iter308 == 2))) ? (static_cast<float>(-0.391)) : (((((true) & ((iter307 == 2)))) & ((iter308 == 0))) ? (static_cast<float>(1.164)) : (((((true) & ((iter307 == 2)))) & ((iter308 == 1))) ? (static_cast<float>(0)) : (static_cast<float>(1.596))))))))));
- }
- }
- }
- }
- for (uint64_t iter309 = 0; iter309 < 0 + 3; ++iter309) {
- if ((iter309 < ((0) + (3)))) {
- BGR_mean[((iter309) - (0))] = ((iter309 == 0) ? norm_mean_0 : ((iter309 == 1) ? norm_mean_1 : norm_mean_2));
- }
- }
- const uint64_t iter330_iter331_fused_iter332_fused = ((((((blockIdx.x) * (128))) + (threadIdx.x))) + (0));
- const uint64_t iter332 = (((iter330_iter331_fused_iter332_fused % 3)) + (0));
- const uint64_t iter330_iter331_fused = ((((iter330_iter331_fused_iter332_fused) / (3))) + (0));
- const uint64_t iter331 = (((iter330_iter331_fused % crop_w)) + (0));
- const uint64_t iter330 = ((((iter330_iter331_fused) / (crop_w))) + (0));
- if ((iter330 < ((0) + (crop_h)))) {
- if ((iter331 < ((0) + (crop_w)))) {
- if ((iter332 < ((0) + (3)))) {
- HWC2CHW[((((((iter331) - (0))) + (((crop_w) * (((iter330) - (0))))))) + (((((crop_w) * (crop_h))) * (((iter332) - (0))))))] = (((((float)(((((((NV2BGR_params[((((0) - (0))) + (((3) * (((((2) - (iter332))) - (0))))))]) * (((input[((((0) + (((1) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter331) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))))))) + (((((1) * (w))) * (min((uint64_t)(static_cast<uint64_t>(floorf(((((iter330) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))))))]) - ((static_cast<float>(16))))))) + (((NV2BGR_params[((((1) - (0))) + (((3) * (((((2) - (iter332))) - (0))))))]) * (((input[((((0) + (((1) * (((((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter331) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))) / (2))) * (2))) + (1))))))) + (((((1) * (w))) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter330) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))) / (2))) + (h))))))]) - ((static_cast<float>(128))))))))) + (((NV2BGR_params[((((2) - (0))) + (((3) * (((((2) - (iter332))) - (0))))))]) * (((input[((((0) + (((1) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter331) + (crop_left))) * (scale[((1) - (0))]))))), (uint64_t)((w) - (1)))) / (2))) * (2))))))) + (((((1) * (w))) * (((((min((uint64_t)(static_cast<uint64_t>(floorf(((((iter330) + (crop_top))) * (scale[((0) - (0))]))))), (uint64_t)((h) - (1)))) / (2))) + (h))))))]) - ((static_cast<float>(128)))))))))) - (BGR_mean[((iter332) - (0))]))) / (BGR_std[((iter332) - (0))]));
- }
- }
- }
- }
- #define INCREASE(x, l) ((x + 1) >= (l) ? (x) : ((x) + 1))
- __global__ void bilinear_resize_preprocess_h(uint64_t src_h, uint64_t dst_h,
- int16_t* __restrict__ cubfh, int32_t* __restrict__ inth) {
- int element_x = blockIdx.x * blockDim.x + threadIdx.x;
- if (element_x >= dst_h) {
- return;
- }
- float scale_h = double(src_h) / dst_h;
- float fh = (float)((element_x + 0.5) * scale_h - 0.5f);
- int sh = floor(fh);
- fh -= sh;
- if (sh < 0) {
- fh = 0;
- sh = 0;
- }
- if (sh >= src_h) {
- fh = 0;
- sh = src_h - 1;
- }
- int int_h1 = INCREASE(sh, src_h);
- fh = fh * 2048;
- cubfh[element_x] = rint(2048 - fh);
- cubfh[dst_h + element_x] = rint(fh);
- inth[element_x] = sh;
- inth[dst_h + element_x] = int_h1;
- }
- __global__ void bilinear_resize_preprocess_w(uint64_t src_w, uint64_t dst_w,
- int16_t* __restrict__ cubfw, int32_t* __restrict__ intw) {
- int element_x = blockIdx.x * blockDim.x + threadIdx.x;
- if (element_x >= dst_w) {
- return;
- }
- float scale_w = double(src_w) / dst_w;
- float fw = (float)((element_x + 0.5) * scale_w - 0.5f);
- int sw = floor(fw);
- fw -= sw;
- if (sw < 0) {
- fw = 0;
- sw = 0;
- }
- if (sw >= src_w) {
- fw = 0;
- sw = src_w - 1;
- }
- int int_w1 = INCREASE(sw, src_w);
- fw = fw * 2048;
- cubfw[element_x] = rint(2048 - fw);
- cubfw[dst_w + element_x] = rint(fw);
- intw[element_x] = sw;
- intw[dst_w + element_x] = int_w1;
- }
- void FuseKernelCU(cudaStream_t stream, uint64_t resize_h, uint64_t resize_w, uint64_t crop_h, uint64_t crop_w, int32_t crop_top, int32_t crop_left, float norm_mean_0, float norm_mean_1, float norm_mean_2, float norm_std_0, float norm_std_1, float norm_std_2, uint64_t pad_h, uint64_t pad_w, int32_t pad_top, int32_t pad_left, int32_t pad_bottom, int32_t pad_right, float pad_value, uint8_t* __restrict__ src_raw_data, float* __restrict__ dst_raw_data, uint64_t dst_element_num, uint64_t src_h, uint64_t src_w, const char *format, const char *interpolation = "nearest"){
- if (resize_h && resize_w && EQUAL(interpolation, "nearest")) {
- if(EQUAL(format, "BGR")){
- BGR_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "RGB")){
- RGB_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "GRAY")){
- GRAY_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "BGRA")){
- BGRA_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "NV12")){
- NV12_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "NV21")){
- NV21_Nearest_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, src_raw_data, dst_raw_data, src_h, src_w);
- } else {
- ABORT("This format is not supported");
- }
- }
- else if(resize_h && resize_w && EQUAL(interpolation, "bilinear")){
- short* cubfh;
- short* cubfw;
- int* inth;
- int* intw;
- cuErrCheck(cudaMalloc(&cubfh, resize_h*2 * sizeof(short)));
- cuErrCheck(cudaMalloc(&cubfw, resize_w*2 * sizeof(short)));
- cuErrCheck(cudaMalloc(&inth, resize_h*2 * sizeof(int)));
- cuErrCheck(cudaMalloc(&intw, resize_w*2 * sizeof(int)));
- int block = 512;
- bilinear_resize_preprocess_h<<<(resize_h + block -1) / block, block, 0, stream>>>(src_h, resize_h, cubfh, inth);
- bilinear_resize_preprocess_w<<<(resize_w + block -1) / block, block, 0, stream>>>(src_w, resize_w, cubfw, intw);
- if(EQUAL(format, "BGR")){
- BGR_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "RGB")){
- RGB_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "GRAY")){
- GRAY_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "BGRA")){
- BGRA_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "NV12")){
- NV12_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else if(EQUAL(format, "NV21")){
- NV21_Bilinear_Kernel<<<(dst_element_num + BLOCK_SIZE -1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>(resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, norm_mean_0, norm_mean_1, norm_mean_2, norm_std_0, norm_std_1, norm_std_2, pad_h, pad_w, pad_top, pad_left, pad_bottom, pad_right, pad_value, cubfh, cubfw, inth, intw, src_raw_data, dst_raw_data, src_h, src_w);
- } else {
- ABORT("This format is not supported");
- }
- cudaStreamSynchronize(stream);
- if (cubfh) cuErrCheck(cudaFree(cubfh));
- if (cubfw) cuErrCheck(cudaFree(cubfw));
- if (inth) cuErrCheck(cudaFree(inth));
- if (intw) cuErrCheck(cudaFree(intw));
- }
- else {
- ABORT("This interpolation is not supported");
- }
- }
- void FuseFunc(void* stream, uint8_t* data_in, int src_h, int src_w, const char* format,
- int resize_h, int resize_w, const char* interpolation, int crop_top, int crop_left,
- int crop_h, int crop_w, float mean0, float mean1, float mean2, float std0, float std1,
- float std2, int pad_top, int pad_left, int pad_bottom, int pad_right, int pad_h,
- int pad_w, float pad_value, float* data_out, int data_out_num) {
- cudaStream_t stream_ = (cudaStream_t)stream;
- const char* interpolation_ = "nearest";
- if (strcmp(interpolation, "bilinear") == 0) {
- interpolation_ = "bilinear";
- }
- FuseKernelCU(stream_, resize_h, resize_w, crop_h, crop_w, crop_top, crop_left, mean0, mean1, mean2, std0,
- std1, std2, pad_top, pad_left, pad_bottom, pad_right, pad_h, pad_w, pad_value, data_in,
- data_out, data_out_num, src_h, src_w, format, interpolation_);
- }
- REGISTER_FUSE_KERNEL(fa993066c1ec32a14d8d06ecfb8d5c54e7a04ec31ba833e1abf1f16c05e027c7_cuda, "fa993066c1ec32a14d8d06ecfb8d5c54e7a04ec31ba833e1abf1f16c05e027c7_cuda",
- FuseFunc);
- }
Editor
You can edit this paste and save as new: