Enable SIMD256 for warpAffineBlocklineNN · Issue #26185 · opencv/opencv

Describe the feature and motivation

Currently, if architecture is not RV, there is only SSE4.1 and SIMD128 optimization instead of SIMD256 or more generic SIMD.

Additional context

opencv/modules/imgproc/src/imgwarp.cpp

Lines 2702 to 2739 in 450e741

    
           void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) 
        
           { 
        
               CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw); 
        
               const int AB_BITS = MAX(10, (int)INTER_BITS); 
        
               int x1 = 0; 
        
               #if CV_TRY_SSE4_1 
        
               bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; 
        
               if( useSSE4_1 ) 
        
                   opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw); 
        
               else 
        
               #endif 
        
               { 
        
                   #if CV_SIMD128 
        
                   { 
        
                       v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); 
        
                       int span = VTraits<v_uint16x8>::vlanes(); 
        
                       for( ; x1 <= bw - span; x1 += span ) 
        
                       { 
        
                           v_int16x8 v_dst[2]; 
        
                           #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\ 
        
                                                                           v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4)))) 
        
                           v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0); 
        
                           v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0); 
        
                           #undef CV_CONVERT_MAP 
        
                           v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); 
        
                       } 
        
                   } 
        
                   #endif 
        
                   for( ; x1 < bw; x1++ ) 
        
                   { 
        
                       int X = (X0 + adelta[x1]) >> AB_BITS; 
        
                       int Y = (Y0 + bdelta[x1]) >> AB_BITS; 
        
                       xy[x1*2] = saturate_cast<short>(X); 
        
                       xy[x1*2+1] = saturate_cast<short>(Y); 
        
                   } 
        
               } 
        
           }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Enable SIMD256 for warpAffineBlocklineNN #26185

Describe the feature and motivation

Additional context

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

	void warpAffineBlocklineNN(int adelta, int bdelta, short* xy, int X0, int Y0, int bw)
	{
	CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw);

	const int AB_BITS = MAX(10, (int)INTER_BITS);
	int x1 = 0;
	#if CV_TRY_SSE4_1
	bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
	if( useSSE4_1 )
	opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw);
	else
	#endif
	{
	#if CV_SIMD128
	{
	v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
	int span = VTraits<v_uint16x8>::vlanes();
	for( ; x1 <= bw - span; x1 += span )
	{
	v_int16x8 v_dst[2];
	#define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\
	v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4))))
	v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0);
	v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0);
	#undef CV_CONVERT_MAP
	v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
	}
	}
	#endif
	for( ; x1 < bw; x1++ )
	{
	int X = (X0 + adelta[x1]) >> AB_BITS;
	int Y = (Y0 + bdelta[x1]) >> AB_BITS;
	xy[x1*2] = saturate_cast<short>(X);
	xy[x1*2+1] = saturate_cast<short>(Y);
	}
	}
	}

Uh oh!

Enable SIMD256 for warpAffineBlocklineNN #26185

Description

Describe the feature and motivation

Additional context

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions