-
-
Notifications
You must be signed in to change notification settings - Fork 56.5k
Closed
Labels
Description
Describe the feature and motivation
Currently, if architecture is not RV, there is only SSE4.1 and SIMD128 optimization instead of SIMD256 or more generic SIMD.
Additional context
opencv/modules/imgproc/src/imgwarp.cpp
Lines 2702 to 2739 in 450e741
| void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) | |
| { | |
| CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw); | |
| const int AB_BITS = MAX(10, (int)INTER_BITS); | |
| int x1 = 0; | |
| #if CV_TRY_SSE4_1 | |
| bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; | |
| if( useSSE4_1 ) | |
| opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw); | |
| else | |
| #endif | |
| { | |
| #if CV_SIMD128 | |
| { | |
| v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); | |
| int span = VTraits<v_uint16x8>::vlanes(); | |
| for( ; x1 <= bw - span; x1 += span ) | |
| { | |
| v_int16x8 v_dst[2]; | |
| #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\ | |
| v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4)))) | |
| v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0); | |
| v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0); | |
| #undef CV_CONVERT_MAP | |
| v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); | |
| } | |
| } | |
| #endif | |
| for( ; x1 < bw; x1++ ) | |
| { | |
| int X = (X0 + adelta[x1]) >> AB_BITS; | |
| int Y = (Y0 + bdelta[x1]) >> AB_BITS; | |
| xy[x1*2] = saturate_cast<short>(X); | |
| xy[x1*2+1] = saturate_cast<short>(Y); | |
| } | |
| } | |
| } |
Reactions are currently unavailable