Added Static build for Advanced augmentation operrations#8
Conversation
src/modules/hip/hip_declarations.hpp
Outdated
|
|
||
| RppStatus | ||
| lut_hip_batch_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, Rpp8u *lut,rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info); | ||
| // RppStatus |
There was a problem hiding this comment.
Remove these 2 commented headers
src/modules/hip/hip_declarations.hpp
Outdated
| non_linear_blend_hip_batch_tensor_fp32(Rpp32f *srcPtr1, Rpp32f *srcPtr2, Rpp32f *dstPtr, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info); | ||
| RppStatus | ||
| non_linear_blend_hip_batch_tensor_int8(Rpp8s *srcPtr1, Rpp8s *srcPtr2, Rpp8s *dstPtr, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info); | ||
|
|
There was a problem hiding this comment.
Remove blank lines between functions in Line 25, 34, 43, 52, 61, 70.
| /******************** lut ********************/ | ||
|
|
||
| // RppStatus | ||
| // lut_hip_batch(Rpp8u* srcPtr, Rpp8u* dstPtr, Rpp8u* lut, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info) |
There was a problem hiding this comment.
Remove the commented lut_hip_batch function, Also remove the 6 other *_hip_batch functions. Let's just have the *_hip_batch_tensor for these 7 functions.
| } | ||
|
|
||
| RppStatus | ||
| lut_hip_batch_tensor_fp16(Rpp16f *srcPtr1, Rpp16f *srcPtr2, Rpp16f *dstPtr, Rpp8u* lut, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info) |
There was a problem hiding this comment.
Just for lut, remove this lut_hip_batch_tensor_fp16 function
| } | ||
|
|
||
| RppStatus | ||
| lut_hip_batch_tensor_fp32(Rpp32f *srcPtr, Rpp32f *dstPtr, Rpp32f* lut, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info) |
There was a problem hiding this comment.
Just for lut, remove this lut_hip_batch_tensor_fp32 function too
| RppStatus | ||
| non_linear_blend_hip_batch_tensor_fp16(Rpp16f *srcPtr1, Rpp16f *srcPtr2, Rpp16f *dstPtr, rpp::Handle &handle, RPPTensorFunctionMetaData &tensor_info) | ||
| { | ||
| // int in_plnpkdind = getplnpkdind(tensor_info._in_format), out_plnpkdind = getplnpkdind(tensor_info._out_format); |
There was a problem hiding this comment.
For all the fp16 functions (7-1=6 funcs), keep the contents of the function uncommented here in the higher level API. Let's just comment out the contents of the kernel and hip_exec func inside the respective kernel files.
| RppStatus hip_exec_color_cast_batch_int8(Rpp8s *srcPtr, Rpp8s *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
|
|
||
| RppStatus hip_exec_lut_batch_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, Rpp8u* lut, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
| // RppStatus hip_exec_lut_batch_fp16(Rpp16f *srcPtr, Rpp16f *dstPtr, Rpp16f* lut, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); |
There was a problem hiding this comment.
Remove the commented fp16, fp32 versions of lut too
| RppStatus hip_exec_color_cast_batch_fp32(Rpp32f *srcPtr, Rpp32f *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
| RppStatus hip_exec_color_cast_batch_int8(Rpp8s *srcPtr, Rpp8s *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
|
|
||
| RppStatus hip_exec_lut_batch_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, Rpp8u* lut, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); |
There was a problem hiding this comment.
Change this name to just hip_exec_lut_batch to match others
| RppStatus hip_exec_non_linear_blend_batch_fp16(Rpp16f *srcPtr1, Rpp16f *srcPtr2, Rpp16f *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
| RppStatus hip_exec_non_linear_blend_batch_fp32(Rpp32f *srcPtr1, Rpp32f *srcPtr2, Rpp32f *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
| RppStatus hip_exec_non_linear_blend_batch_int8(Rpp8s *srcPtr1, Rpp8s *srcPtr2, Rpp8s *dstPtr, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width); | ||
|
|
There was a problem hiding this comment.
Remove gap lines in 14, 19, 24, 29, 34, 39
| } | ||
|
|
||
| #if defined(STATIC) | ||
| RppStatus hip_exec_lut_batch_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, Rpp8u* lut, rpp::Handle& handle, RPPTensorFunctionMetaData &tensor_info, Rpp32s in_plnpkdind, Rpp32s out_plnpkdind, Rpp32u max_height, Rpp32u max_width) |
There was a problem hiding this comment.
Change this to hip_exec_lut_batch() too
| output[out_pix_index] = lutPtr[lutIndex]; | ||
| in_pix_index += inc[id_z]; | ||
| out_pix_index += dst_inc[id_z]; | ||
| extern "C" __global__ void look_up_table_batch_tensor(unsigned char *input, |
There was a problem hiding this comment.
I don't think the 'tensor' word appears in the lower level files. Lets rename:
There was a problem hiding this comment.
Abishek, I tried to do it, but it is throwing an """ conflicting types for 'look_up_table_batch' """
| int luptrIndex = id_z << 8; | ||
| for (int indextmp = 0; indextmp < channel; indextmp++) | ||
| { | ||
| int lutIndex = luptrIndex + input[in_pix_index]; |
|
|
||
| for (int indextmp = 0; indextmp < channel; indextmp++) | ||
| { | ||
| int lutIndex = luptrIndex + input[in_pix_index] + 128; |
| { | ||
| lut_hip_batch_tensor(static_cast<Rpp8u *>(srcPtr), static_cast<Rpp8u *>(dstPtr), static_cast<Rpp8u *>(lut), rpp::deref(rppHandle), tensor_info); | ||
| } | ||
| // else if (in_tensor_type == RPPTensorDataType::FP16) |
There was a problem hiding this comment.
Remove FP16, FP32 conditions commented lines
r-abishek
left a comment
There was a problem hiding this comment.
Needs a few changes as commented, ensure unittests run fine after the changes.
* Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update
* Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * Format files
* Changed Channel extract and channel combine function call * updated erode dilate kernals [OCL] * Non Working [FULLY BUILD] code for min_max_loc and mean_stddev * Updated Rain GPU kernel for multiple destination image calls [OCL] * Updated Median, Non Max and Histogram and added support for mean * Updated tensor [OCL] * updated table lookup [OCL] * small updates in mean and stddev [OCL] * Full functioning code for mean and standard deviation [OCL] * Added Support to Min Max Location [OCL] * Added support for gaussian_image_pyramid [OCL] * Added support for laplacian_image_pyramid [OCL] * small modification in LIP [OCL] * small modification in Min Max Location and Mean stddev [OCL] * box filter hisEq [OCL] * Added support for gaussian filter * Added support for bin in Histogram [OCL] * updated sobel [OCL] * Update in Temperature [CPU] * FIX SNP CPU half noise issue [OCL] * fin small change in Absolute difference [OCL] * Small changes in Custom convolution and table lookup [OCL} * Fix regressions due to scripting [cl & CPU]. * fix histogram [OCL] * Updated snow [OCL] * updated snow [CPU] * small update in Snow [OCL] * Modify filter_operations to add gaussian_filter with same backend as blur * Fix issue with rain Grey Scale [OCL] * Fix Rain GPU Transparancy [OCL] * Add Kernel Caching using Map/Kernelmanger * Resolved histogram grayscale issue in GPU * Resolved histogram grayscale issue in GPU * Fix the bug in warp affine planar call * Fix issue with resize crop validation [cl & CPU]. * Cl_enque_buffer, the argument is set to CL_FALSE * Fix Gamma correction [OCL] * minor changes to gamma_correction, vignette commons, flip functionalities * Fix Jitter with new Implementation [CPU & OCL] * Modify brightness bug that gave patches in output * Fix the buy with Lens correction [OCL]. * Fix Median filter issue * Modify rotate to match GPU functionality * Fix median filter * merge abi-dev-host-ms4 to main-hipcl-dev * Fix a round about fix for Hue and Saturation Shift * Modify scale to match GPU functionality * Fix a round about fix for Hue and Saturation Shift * Fix syntax error in hsvkernel * changed CL_False to CL_True in minmax location * Resolve merg * Modify warp affine to match GPU - inversion exists * Add validation for Warp Affine Matrix * changes in Warp Affine * Add Blocking calls [CL_TRUE flag is on] * Removed validation printf statements in the library * Removed a syntax error * Add extra validation for contrast * Fix issue with rain [CPU] * Added support to new Pixelate [OCL & CPU] * Fix issue with Fish eye [OCL] * Modify Histogram Implementation * Histogram Balance Fix * Update Readme.md Amended the list * Update Readme.md * Fix Histogram Planar Version * Add new support to Histogram [OCL] * Remove all files to include batch version * Move Mem-Mgmt_HIP branch files to master * Update Readme.md * Put all the recent changes or RPP here * Fix Border issues in crop mirror normalize and crop * Fix Crop mirror normalize border issue * Add RPP UnitTests * Add f32 support for crop_mirror_normalize * Add f32 support for crop * Add f32 support for resize_crop_mirror * Add f32 support for resize and resize_crop * Add f32 support for color_twist * Correct blur * Add f32 support for rotate * Add f16 host support for rotate, resize, resize_crop, crop, resize_crop_mirror, crop_mirror_normalize, color_twist * Major changes to host test suite * Separate host test suites for pkd3 and pln1 * modify rpp_unittests host * correct additional folder creation and readme * Minor correction in pln1/pkd3 host test scripts * Add basic float tensor support * Add FP32 and FP16 support for Crop function * Fix bug in crop * crop mirror normalize report * Float Support for Rotate GPU * Add Kernel Support in OCL for colorTwist and resize funtionalities * Add float support for ColorTwist and Resize Crop Mirror - FP16 and FP32 * Code Refactoring and Rotate Support for FP16 and FP32 * Fix Rotate Float issue * Fix FP32 Rotate Issue * Add Resize Function * Add Resize Crop Mirror in GPU OCL * Fix Typo * Add Resize Crop GPU FP16 and FP32 support * Update rppdefs.h * Crop Mirror Normalize Support is added * Support for ColorTwist in Float space * Update Colortwwist.cl - temp * Remove MIOPEN dependency in RPP build set-up * Update colortwist.cl * Fix Bug in ColorTwist * Fix Bug in ColorTwist (r-abishek#6) * API refactoring for fused_functions * Fix make_data-type bug and code formatting * Testsuite for Float Support Functions * Removed the brace in switchcase * Add free statements for unreleased memomry and f16 fix for colortwist * rename folders * Fix Resize for U8 case * minor change in BatchPD host * Fix type error in resize.cl * Fix float errors for resize fucntions * foramt file * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * update (r-abishek#10) * Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * Format files * New Changes (r-abishek#11) * Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * Format files * Correct f16 color twist host bug * Change test suite to input 0-1 normalized values for all f16/f32 functionalities * Refactor API code for geometry_transforms * Added Testsuite for Float Functions in OCL * AMD Docs * Create install.rst * Update index.rst * Add host support for u8->f16 and u8->f32 for resize, crop, crop_mirror_normalize * Add host support in test suite for u8->f16 and u8->f32 for resize, crop, crop_mirror_normalize * Add host support for i8 in resize, crop, cmn, rotate, resize_crop, resize_crop_mirror and color_twist * Add host test suite support for i8 * Add host support for u8->i8 in crop, resize, crop_mirror_normalize * modify test suite * Add host plan1 test suite to SOW3_HOST * crop mirror normalize full support in w.r.t type change and layout change * Add API calls for CMN function for new set of variations * Fix bug with respect to I8 * change type info in kernels * Fix cmn bub * Support I8 for Rotate * Int 8 support for colortwist and code refactoring * Add int8 support for resize crop mirror function * resize crop mirror int8 support is added * Crop various variations are added * Add crop support for all the conversions * Add host support for resize outputFormatToggle * Add host support for crop outputFormatToggle * Add host support for rotate outputFormatToggle * Add host support for resize_crop outputFormatToggle * Add host support for resize_crop_mirror outputFormatToggle * Add host support for crop_mirror_normalize outputFormatToggle * Add host support for color_twist outputFormatToggle and all other pln->pkd support * Add missing pln3 API for crop host * Major modifications in test suite and ReadMe for pkd3, pln3 and pln1 inputs for host * Modify resize kernel * Add outputtoggle in the API and functions * Add new changes to all the fused function w.r.t to outputFormatToggle * Add pln3 api for Crop on GPU * add missing API for resize cro * Fix compilation bugs * Remove unnecessary functions and fix build bug * Add ocl testing framework * Fix bug in rotate helper * Minor temp changes in test code to accomodate PKD3 input U8 cases with toggle format * Correct resize_u8_i8_pkd * Fix resize kenel issues for output toogle change * colortwist bug fix * Fix colortwist bug * resize tensor fix * Minor mods to both pln3 and pkd3 test suite to accomodate CMN's ability to do U8 format toggles * Corrections in PLN3 input funcitons for host * Fix bugs in Fused function new code * Add changes relatedd to planar format in padded * Fix issues with pln3 colortwist * Fix issue with test suite * Add pln3 testing and fix issues * Modify a few things in test script * Fix pln3 issue for FP16 for Rotate * Fix index issues with Test suit * Add output layout toggle for host API * ix pln3 issues in test suite Fix pln1 issues in testsuite Fix other minor bugs * Change paramerter order in resize pd pln host * remove print statements * Update README.MD * Codacy issues corrections in utilities/rpp-unittests * Codacy issues corrections for resize kernel * Codacy issues corrections in utilities/rpp-unittests OCL/HIP * Codacy issues corrections in utilities/rpp-unittests * Codacy issues corrections in utilities/rpp-unittests * Fix some codecy issues * Remove some Codecy issues in rpp unnittests * Remove a few codecy issues * Remove Print statements Co-authored-by: Muthukumaravel <muthukumaravel@multicorewareinc.com> Co-authored-by: shobana-mcw <shobana@multicorewareinc.com> Co-authored-by: r-abishekmcw <abishek@multicorewareinc.com> Co-authored-by: LokeshBonta <you@example.com> Co-authored-by: Reza <Seyedreza.Najafi@amd.com> Co-authored-by: Swetha B S <swetha@multicorewareinc.com>
* Modify phase for visualization * Pre-MS4 optimizations on arithmetic_operations * Pre-MS4 optimizations on arithmetic_operations * Pre-MS4 optimizations on morphological_transforms * Added support for table lookup [OCL] * Fix issues with pixelate greyscale. * Pre-MS4 optimizations on color_model_conversions * Modify sobel_filter functionality to match GPU impl. * mean and stddev base function [OCL] * Changed Channel extract and channel combine function call * updated erode dilate kernals [OCL] * Non Working [FULLY BUILD] code for min_max_loc and mean_stddev * Updated Rain GPU kernel for multiple destination image calls [OCL] * Updated Median, Non Max and Histogram and added support for mean * Updated tensor [OCL] * updated table lookup [OCL] * small updates in mean and stddev [OCL] * Full functioning code for mean and standard deviation [OCL] * Added Support to Min Max Location [OCL] * Added support for gaussian_image_pyramid [OCL] * Added support for laplacian_image_pyramid [OCL] * small modification in LIP [OCL] * small modification in Min Max Location and Mean stddev [OCL] * box filter hisEq [OCL] * Added support for gaussian filter * Added support for bin in Histogram [OCL] * updated sobel [OCL] * Update in Temperature [CPU] * FIX SNP CPU half noise issue [OCL] * fin small change in Absolute difference [OCL] * Small changes in Custom convolution and table lookup [OCL} * Fix regressions due to scripting [cl & CPU]. * fix histogram [OCL] * Updated snow [OCL] * updated snow [CPU] * small update in Snow [OCL] * Modify filter_operations to add gaussian_filter with same backend as blur * Fix issue with rain Grey Scale [OCL] * Fix Rain GPU Transparancy [OCL] * Add Kernel Caching using Map/Kernelmanger * Resolved histogram grayscale issue in GPU * Resolved histogram grayscale issue in GPU * Fix the bug in warp affine planar call * Fix issue with resize crop validation [cl & CPU]. * Cl_enque_buffer, the argument is set to CL_FALSE * Fix Gamma correction [OCL] * minor changes to gamma_correction, vignette commons, flip functionalities * Fix Jitter with new Implementation [CPU & OCL] * Modify brightness bug that gave patches in output * Fix the buy with Lens correction [OCL]. * Fix Median filter issue * Modify rotate to match GPU functionality * Fix median filter * merge abi-dev-host-ms4 to main-hipcl-dev * Fix a round about fix for Hue and Saturation Shift * Modify scale to match GPU functionality * Fix a round about fix for Hue and Saturation Shift * Fix syntax error in hsvkernel * changed CL_False to CL_True in minmax location * Resolve merg * Modify warp affine to match GPU - inversion exists * Add validation for Warp Affine Matrix * changes in Warp Affine * Add Blocking calls [CL_TRUE flag is on] * Removed validation printf statements in the library * Removed a syntax error * Add extra validation for contrast * Fix issue with rain [CPU] * Added support to new Pixelate [OCL & CPU] * Fix issue with Fish eye [OCL] * Modify Histogram Implementation * Histogram Balance Fix * Update Readme.md Amended the list * Update Readme.md * Fix Histogram Planar Version * Add new support to Histogram [OCL] * Remove all files to include batch version * Move Mem-Mgmt_HIP branch files to master * Update Readme.md * Put all the recent changes or RPP here * Fix Border issues in crop mirror normalize and crop * Fix Crop mirror normalize border issue * Add RPP UnitTests * Add f32 support for crop_mirror_normalize * Add f32 support for crop * Add f32 support for resize_crop_mirror * Add f32 support for resize and resize_crop * Add f32 support for color_twist * Correct blur * Add f32 support for rotate * Add f16 host support for rotate, resize, resize_crop, crop, resize_crop_mirror, crop_mirror_normalize, color_twist * Major changes to host test suite * Separate host test suites for pkd3 and pln1 * modify rpp_unittests host * correct additional folder creation and readme * Minor correction in pln1/pkd3 host test scripts * Add basic float tensor support * Add FP32 and FP16 support for Crop function * Fix bug in crop * crop mirror normalize report * Float Support for Rotate GPU * Add Kernel Support in OCL for colorTwist and resize funtionalities * Add float support for ColorTwist and Resize Crop Mirror - FP16 and FP32 * Code Refactoring and Rotate Support for FP16 and FP32 * Fix Rotate Float issue * Fix FP32 Rotate Issue * Add Resize Function * Add Resize Crop Mirror in GPU OCL * Fix Typo * Add Resize Crop GPU FP16 and FP32 support * Update rppdefs.h * Crop Mirror Normalize Support is added * Support for ColorTwist in Float space * Update Colortwwist.cl - temp * Update colortwist.cl * Fix Bug in ColorTwist * Fix Bug in ColorTwist (r-abishek#6) * API refactoring for fused_functions * Fix make_data-type bug and code formatting * Testsuite for Float Support Functions * Removed the brace in switchcase * Add free statements for unreleased memomry and f16 fix for colortwist * rename folders * Fix Resize for U8 case * minor change in BatchPD host * Fix type error in resize.cl * Fix float errors for resize fucntions * foramt file * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * update (r-abishek#10) * Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * Format files * New Changes (r-abishek#11) * Fix Bug in ColorTwist (r-abishek#6) * Fix Bug in ColorTwist (r-abishek#6) (r-abishek#8) (r-abishek#9) * Update * Format files * Correct f16 color twist host bug * Change test suite to input 0-1 normalized values for all f16/f32 functionalities * Refactor API code for geometry_transforms * Added Testsuite for Float Functions in OCL * Add host support for u8->f16 and u8->f32 for resize, crop, crop_mirror_normalize * Add host support in test suite for u8->f16 and u8->f32 for resize, crop, crop_mirror_normalize * Add host support for i8 in resize, crop, cmn, rotate, resize_crop, resize_crop_mirror and color_twist * Add host test suite support for i8 * Add host support for u8->i8 in crop, resize, crop_mirror_normalize * modify test suite * Add host plan1 test suite to SOW3_HOST * crop mirror normalize full support in w.r.t type change and layout change * Add API calls for CMN function for new set of variations * Fix bug with respect to I8 * change type info in kernels * Fix cmn bub * Support I8 for Rotate * Int 8 support for colortwist and code refactoring * Add int8 support for resize crop mirror function * resize crop mirror int8 support is added * Crop various variations are added * Add crop support for all the conversions * Add host support for resize outputFormatToggle * Add host support for crop outputFormatToggle * Add host support for rotate outputFormatToggle * Add host support for resize_crop outputFormatToggle * Add host support for resize_crop_mirror outputFormatToggle * Add host support for crop_mirror_normalize outputFormatToggle * Add host support for color_twist outputFormatToggle and all other pln->pkd support * Add missing pln3 API for crop host * Major modifications in test suite and ReadMe for pkd3, pln3 and pln1 inputs for host * Modify resize kernel * Add outputtoggle in the API and functions * Add new changes to all the fused function w.r.t to outputFormatToggle * Add pln3 api for Crop on GPU * add missing API for resize cro * Fix compilation bugs * Remove unnecessary functions and fix build bug * Add ocl testing framework * Fix bug in rotate helper * Minor temp changes in test code to accomodate PKD3 input U8 cases with toggle format * Correct resize_u8_i8_pkd * Fix resize kenel issues for output toogle change * colortwist bug fix * Fix colortwist bug * resize tensor fix * Minor mods to both pln3 and pkd3 test suite to accomodate CMN's ability to do U8 format toggles * Corrections in PLN3 input funcitons for host * Fix bugs in Fused function new code * Add changes relatedd to planar format in padded * Fix issues with pln3 colortwist * Fix issue with test suite * Add pln3 testing and fix issues * Modify a few things in test script * Fix pln3 issue for FP16 for Rotate * Fix index issues with Test suit * Add output layout toggle for host API * ix pln3 issues in test suite Fix pln1 issues in testsuite Fix other minor bugs * Change paramerter order in resize pd pln host * remove print statements * Add unittest * Fix HIP backend issues * able to build hip * Changed cmakelists for linking issues * Change include hip/hip_hcc.h to hip/hip_ext.h to avoid warning Co-authored-by: Muthukumaravel <muthukumaravel@multicorewareinc.com> Co-authored-by: shobana-mcw <shobana@multicorewareinc.com> Co-authored-by: LokeshBonta <you@example.com> Co-authored-by: Reza <Seyedreza.Najafi@amd.com> Co-authored-by: LokeshBonta <lokeshpsn93@gmail.com> Co-authored-by: Lokesh Bonta <lokeswara@multicorewareinc.com> Co-authored-by: Swetha B S <swetha@multicorewareinc.com>
* Travis CI - key error fix * Fix Bug in ColorTwist (#6) (#8) (#9) * Added golden outputs and resolved HOST backend * Updated bin files for median filter and resize crop mirror * Updated bin files * Updated bin files for the next set of kernel F32 QA * Updated bin files for jpeg_compression_distortion * Fixed resize QA failures * Fix for Resize bilinear F32 QA HOST and HIP * Fix for lens correction QA f32 for HOST and HIP for 1e-4 precision * Fixed HIP rcm QA * updates for warp Affine F32 QA * Fix for RCM QA match for U8 and F32 updates AVX * Fix for lens correction AVX * Removed space * Fixed warp affine for every other varient with the updated changes * Add fixes to match precision in quantization * Fix Precision mismatches * Update default cutoff to 1e-5 and specialized cutoff to 1e-4 * F32 QA Fix * Made Quality percentage as arg from testsuite * Resolved copilot comments * Resolved the copilot comments * Resolved Codex comments * HOST and HIP - pinned buffers for respective API (ROCm#628) * Removed memcpy and used hipHostMalloc for allocation : blend * Removed memcpy and used hipHostMalloc for allocation : brightness * Removed memcpy and used hipHostMalloc for allocation : color cast * Removed memcpy and used hipHostMalloc for allocation : color twist * Removed memcpy and used hipHostMalloc for allocation : contrast * Removed memcpy and used hipHostMalloc for allocation : crop mirror normalize * Removed memcpy and used hipHostMalloc for allocation : Exposure * Removed memcpy and used hipHostMalloc for allocation : Gamma correction * Removed memcpy and used hipHostMalloc for allocation : gaussian filter * Removed memcpy and used hipHostMalloc for allocation : Noise * Removed memcpy and used hipHostMalloc for allocation : Non linear blend * Removed memcpy and used hipHostMalloc for allocation : Resize mirror normalize * Removed memcpy and used hipHostMalloc for allocation : Water * Added hipHostFree for all kernels in test suite * Added hipHostFree for all kernels in test suite * Removed memcpy and used hipHostMalloc for allocation : Flip, spatter, rcm, color temperature * Resolved copilot review comments * Updated version * Removed unused parameter * Updated version in cmakeList * removed the host to device mem copies for warp affine and rotate * Updated version * Removed comment * Updated Chnagelog file * Update patch version from 2.2.0 to 2.2.1 * Update CHANGELOG * Address copilot comments for HIP HOST consistent allocation * Documentation changes for updated memcpy changes * Update ricap outer API to use pinned memory and remove mem copy * Fix memory allocation and deallocation for permutationTensor * Update api/rppt_tensor_effects_augmentations.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix spelling of noiseProbability and saltProbability * Fix deallocation --------- Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com> Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com> Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com> Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * resolved review comments * minor comment change * Resolved copilot review comments * Update src/modules/tensor/cpu/kernel/resize.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/modules/tensor/cpu/kernel/resize.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/modules/tensor/hip/kernel/jpeg_compression_distortion.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Updated test suite and resoled review comments * Updated HIP for F32 QA reduction function cases --------- Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com> Co-authored-by: Lokesh Bonta <lokeswara@multicorewareinc.com> Co-authored-by: sampath117 <snehaa@multicorewareinc.com> Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com> Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com> Co-authored-by: ManasaDattaT <tammisetti.manasadatta@multicorewareinc.com> Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com> Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>
* Travis CI - key error fix * Fix Bug in ColorTwist (#6) (#8) (#9) * Added golden outputs and resolved HOST backend * Updated bin files for median filter and resize crop mirror * Updated bin files * Updated bin files for the next set of kernel F32 QA * Updated bin files for jpeg_compression_distortion * Fixed resize QA failures * Fix for Resize bilinear F32 QA HOST and HIP * Fix for lens correction QA f32 for HOST and HIP for 1e-4 precision * Fixed HIP rcm QA * updates for warp Affine F32 QA * Fix for RCM QA match for U8 and F32 updates AVX * Fix for lens correction AVX * Removed space * Fixed warp affine for every other varient with the updated changes * Add fixes to match precision in quantization * Fix Precision mismatches * Update default cutoff to 1e-5 and specialized cutoff to 1e-4 * F32 QA Fix * Made Quality percentage as arg from testsuite * Resolved copilot comments * Resolved the copilot comments * Resolved Codex comments * HOST and HIP - pinned buffers for respective API (ROCm#628) * Removed memcpy and used hipHostMalloc for allocation : blend * Removed memcpy and used hipHostMalloc for allocation : brightness * Removed memcpy and used hipHostMalloc for allocation : color cast * Removed memcpy and used hipHostMalloc for allocation : color twist * Removed memcpy and used hipHostMalloc for allocation : contrast * Removed memcpy and used hipHostMalloc for allocation : crop mirror normalize * Removed memcpy and used hipHostMalloc for allocation : Exposure * Removed memcpy and used hipHostMalloc for allocation : Gamma correction * Removed memcpy and used hipHostMalloc for allocation : gaussian filter * Removed memcpy and used hipHostMalloc for allocation : Noise * Removed memcpy and used hipHostMalloc for allocation : Non linear blend * Removed memcpy and used hipHostMalloc for allocation : Resize mirror normalize * Removed memcpy and used hipHostMalloc for allocation : Water * Added hipHostFree for all kernels in test suite * Added hipHostFree for all kernels in test suite * Removed memcpy and used hipHostMalloc for allocation : Flip, spatter, rcm, color temperature * Resolved copilot review comments * Updated version * Removed unused parameter * Updated version in cmakeList * removed the host to device mem copies for warp affine and rotate * Updated version * Removed comment * Updated Chnagelog file * Update patch version from 2.2.0 to 2.2.1 * Update CHANGELOG * Address copilot comments for HIP HOST consistent allocation * Documentation changes for updated memcpy changes * Update ricap outer API to use pinned memory and remove mem copy * Fix memory allocation and deallocation for permutationTensor * Update api/rppt_tensor_effects_augmentations.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix spelling of noiseProbability and saltProbability * Fix deallocation --------- Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com> Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com> Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com> Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * resolved review comments * minor comment change * Resolved copilot review comments * Update src/modules/tensor/cpu/kernel/resize.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/modules/tensor/cpu/kernel/resize.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/modules/tensor/hip/kernel/jpeg_compression_distortion.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Updated test suite and resoled review comments * Updated HIP for F32 QA reduction function cases --------- Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com> Co-authored-by: Lokesh Bonta <lokeswara@multicorewareinc.com> Co-authored-by: sampath117 <snehaa@multicorewareinc.com> Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com> Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com> Co-authored-by: ManasaDattaT <tammisetti.manasadatta@multicorewareinc.com> Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com> Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>
* add support for dilate in HOST backend
* minor fix in changelog
* added golden outputs
remove commented code
* resolve build errors
* Add padding changes in HIP backend
* fix sigsev issues
* fix QA for 9x9 kernel
* Add if condition for pack function and template for unpack and signext function
* Fix the rename of preLoadRows and max Comments
* Fix Fix remane of Loader and MorphVecLoader
* Fix empty space, dilate_row_hip_compute function, removed if & else and aligned indent R.
* Fix remove whitespace and restored all unnecessary changes.
* Fix remove precision line and reverted back to static cast.
* Fix remove empty line, rename of kernelSze & padPolicy and remove {} for single line condition
* Fix Indentation of IF condition.
* resolved review comments
* resolve review comments
* Test suite - Add QA pass/fail tests for F32 bit depth (ROCm#665)
* Travis CI - key error fix
* Fix Bug in ColorTwist (#6) (#8) (#9)
* Added golden outputs and resolved HOST backend
* Updated bin files for median filter and resize crop mirror
* Updated bin files
* Updated bin files for the next set of kernel F32 QA
* Updated bin files for jpeg_compression_distortion
* Fixed resize QA failures
* Fix for Resize bilinear F32 QA HOST and HIP
* Fix for lens correction QA f32 for HOST and HIP for 1e-4 precision
* Fixed HIP rcm QA
* updates for warp Affine F32 QA
* Fix for RCM QA match for U8 and F32 updates AVX
* Fix for lens correction AVX
* Removed space
* Fixed warp affine for every other varient with the updated changes
* Add fixes to match precision in quantization
* Fix Precision mismatches
* Update default cutoff to 1e-5 and specialized cutoff to 1e-4
* F32 QA Fix
* Made Quality percentage as arg from testsuite
* Resolved copilot comments
* Resolved the copilot comments
* Resolved Codex comments
* HOST and HIP - pinned buffers for respective API (ROCm#628)
* Removed memcpy and used hipHostMalloc for allocation : blend
* Removed memcpy and used hipHostMalloc for allocation : brightness
* Removed memcpy and used hipHostMalloc for allocation : color cast
* Removed memcpy and used hipHostMalloc for allocation : color twist
* Removed memcpy and used hipHostMalloc for allocation : contrast
* Removed memcpy and used hipHostMalloc for allocation : crop mirror normalize
* Removed memcpy and used hipHostMalloc for allocation : Exposure
* Removed memcpy and used hipHostMalloc for allocation : Gamma correction
* Removed memcpy and used hipHostMalloc for allocation : gaussian filter
* Removed memcpy and used hipHostMalloc for allocation : Noise
* Removed memcpy and used hipHostMalloc for allocation : Non linear blend
* Removed memcpy and used hipHostMalloc for allocation : Resize mirror normalize
* Removed memcpy and used hipHostMalloc for allocation : Water
* Added hipHostFree for all kernels in test suite
* Added hipHostFree for all kernels in test suite
* Removed memcpy and used hipHostMalloc for allocation : Flip, spatter, rcm, color temperature
* Resolved copilot review comments
* Updated version
* Removed unused parameter
* Updated version in cmakeList
* removed the host to device mem copies for warp affine and rotate
* Updated version
* Removed comment
* Updated Chnagelog file
* Update patch version from 2.2.0 to 2.2.1
* Update CHANGELOG
* Address copilot comments for HIP HOST consistent allocation
* Documentation changes for updated memcpy changes
* Update ricap outer API to use pinned memory and remove mem copy
* Fix memory allocation and deallocation for permutationTensor
* Update api/rppt_tensor_effects_augmentations.h
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Fix spelling of noiseProbability and saltProbability
* Fix deallocation
---------
Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com>
Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* resolved review comments
* minor comment change
* Resolved copilot review comments
* Update src/modules/tensor/cpu/kernel/resize.cpp
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Update src/modules/tensor/cpu/kernel/resize.cpp
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Update src/modules/tensor/hip/kernel/jpeg_compression_distortion.cpp
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Updated test suite and resoled review comments
* Updated HIP for F32 QA reduction function cases
---------
Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com>
Co-authored-by: Lokesh Bonta <lokeswara@multicorewareinc.com>
Co-authored-by: sampath117 <snehaa@multicorewareinc.com>
Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com>
Co-authored-by: ManasaDattaT <tammisetti.manasadatta@multicorewareinc.com>
Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>
* Erode - HOST and HIP update (ROCm#666)
* added initial api support for erode
* added support for U8 and I8 bitdepths for 3, 5, 7, 9 kernel sizes
* added F16 and F32 bitdepth support
* added generic kernel support
* added golden outputs
removed commented code
* fix build errors
* Fix build and test_suite errors
* revert padding changes
* updated erode HIP kernel with latest changes
* Add F32 QA
* minor formatting fixes
* minor comment fix
* resolve copilot comments
* resolve review comments
* resolved review comments
* Add unpack templating changes and fix segmentation issue
* Fix PKD to PKD kernel 9 for Pack and Unpack changes.
* Add and template signext function
* Fix min Comments
* Fix one min Comments
* Add unroll and rename of preLoadRows
* Fix remane of Loader and MorphVecLoader
* Add empty line before comment
* Fix remove empty line, rename of kernelSze & padPolicy and remove {} for single line condition
* resolved review comments
* fix build warnings
---------
Co-authored-by: sampath1117 <sampath.rachumallu@multicorewareinc.com>
Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Co-authored-by: Mukesh Jayakodi <mukesh.jayakodi@multicorewareinc.com>
Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
Co-authored-by: HazarathKumarM <119284987+HazarathKumarM@users.noreply.github.com>
Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>
* fix build errors
---------
Co-authored-by: sampath1117 <sampath.rachumallu@multicorewareinc.com>
Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Co-authored-by: Mukesh Jayakodi <mukesh.jayakodi@multicorewareinc.com>
Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
Co-authored-by: HazarathKumarM <119284987+HazarathKumarM@users.noreply.github.com>
Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com>
Co-authored-by: Lokesh Bonta <lokeswara@multicorewareinc.com>
Co-authored-by: sampath117 <snehaa@multicorewareinc.com>
Co-authored-by: Srihari-mcw <srihari@multicorewareinc.com>
Co-authored-by: ManasaDattaT <tammisetti.manasadatta@multicorewareinc.com>
Co-authored-by: hmaddise <HazarathKumar.Maddisetty@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>
No description provided.