-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Closed
Closed
Copy link
Labels
Description
Git commit
Operating systems
Linux
GGML backends
HIP
Problem description & steps to reproduce
Imposible to build llama.cpp with ROCWMMA_FATTN=ON on ubuntu 24.04 and ROCm 6.4.4 for
gfx1100 + gfx908
First Bad Commit
No response
Compile command
HIPCXX="$(hipconfig -l)/clang" \
HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build \
-DGGML_HIP=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_NATIVE=ON \
-DBUILD_SHARED_LIBS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_SERVER=ON \
-DGGML_RPC=ON \
-DCMAKE_BUILD_TYPE=Release \
&& cmake --build build --config Release -- -j 48Relevant log output
[ 10%] Building HIP object ggml/src/ggml-hip/CMakeFiles/ggml-hip.dir/__/ggml-cuda/fattn-wmma-f16.cu.o
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:570:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
570 | ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 16, 4, 64, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:570:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 16, float>' requested here
570 | ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<80, 16, 4, 16, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:573:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<80, 16, float>' requested here
573 | ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<80, 16, 4, 16, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:573:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<80, 16, float>' requested here
573 | ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<96, 16, 4, 32, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:576:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<96, 16, float>' requested here
576 | ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<96, 16, 4, 32, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:576:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<96, 16, float>' requested here
576 | ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<112, 16, 4, 16, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:579:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<112, 16, float>' requested here
579 | ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<112, 16, 4, 16, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:579:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<112, 16, float>' requested here
579 | ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<128, 16, 4, 64, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:582:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<128, 16, float>' requested here
582 | ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<128, 16, 4, 64, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:582:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<128, 16, float>' requested here
582 | ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<256, 16, 4, 64, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:585:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<256, 16, float>' requested here
585 | ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<256, 16, 4, 64, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:585:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<256, 16, float>' requested here
585 | ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 32, 4, 64, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:595:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 32, float>' requested here
595 | ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<64, 32, 4, 64, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:595:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<64, 32, float>' requested here
595 | ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<80, 32, 4, 16, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:598:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<80, 32, float>' requested here
598 | ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<80, 32, 4, 16, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:598:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<80, 32, float>' requested here
598 | ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<96, 32, 4, 32, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:601:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<96, 32, float>' requested here
601 | ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:552:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<96, 32, 4, 32, float, true>' requested here
552 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:601:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<96, 32, float>' requested here
601 | ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:344:17: error: no matching function for call to 'fill_fragment'
344 | wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], static_cast<half>(0.0f));
| ^~~~~~~~~~~~~~~~~~~
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:548:24: note: in instantiation of function template specialization 'flash_attn_ext_f16<112, 32, 4, 16, float, false>' requested here
548 | fattn_kernel = flash_attn_ext_f16<
| ^
/home/iyanello/Projects/ML/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu:604:21: note: in instantiation of function template specialization 'ggml_cuda_flash_attn_ext_wmma_f16_case<112, 32, float>' requested here
604 | ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst);
| ^
/opt/rocm/include/rocwmma/rocwmma_impl.hpp:205:9: note: candidate template ignored: deduced conflicting types for parameter 'DataT' ('_Float16' vs. 'half' (aka '__half'))
205 | fill_fragment(fragment<MatrixT, BlockM, BlockN, BlockK, DataT, DataLayoutT>& frag,
| ^
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated when compiling for gfx1100.
gmake[2]: *** [ggml/src/ggml-hip/CMakeFiles/ggml-hip.dir/build.make:335: ggml/src/ggml-hip/CMakeFiles/ggml-hip.dir/__/ggml-cuda/fattn-wmma-f16.cu.o] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:622: ggml/src/ggml-hip/CMakeFiles/ggml-hip.dir/all] Error 2
gmake: *** [Makefile:136: all] Error 2Reactions are currently unavailable