Guest User

Untitled

a guest
Jun 3rd, 2024
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.32 KB | None | 0 0
  1. cutlass/include/cutlass/gemm/warp/mma_tensor_op_policy.h(58): error: incomplete type is not allowed
  2. detected during:
  3. instantiation of class "cutlass::gemm::warp::MmaTensorOpPolicy<Operator_, OpDelta_> [with Operator_=cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 8, 32>, 32, int8_t, cutlass::layout::RowMajor, cutlass::int4b_t, cutlass::layout::ColumnMajor, int32_t, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, OpDelta_=cutlass::MatrixShape<1, 1>]"
  4. /target/w4a8/cutlass/include/cutlass/gemm/warp/mma_tensor_op.h(194): here
  5. instantiation of class "cutlass::gemm::warp::MmaTensorOp<Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, Policy_, PartitionsK_, AccumulatorsInRowMajor, Enable> [with Shape_=cutlass::gemm::GemmShape<64, 64, 64>, ElementA_=int8_t, LayoutA_=cutlass::layout::RowMajorTensorOpMultiplicandCrosswise<8, 64>, ElementB_=cutlass::int4b_t, LayoutB_=cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise<4, 64>, ElementC_=int32_t, LayoutC_=cutlass::layout::RowMajor, Policy_=cutlass::gemm::warp::MmaTensorOpPolicy<cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 8, 32>, 32, int8_t, cutlass::layout::RowMajor, cutlass::int4b_t, cutlass::layout::ColumnMajor, int32_t, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, cutlass::MatrixShape<1, 1>>, PartitionsK_=1, AccumulatorsInRowMajor=false, Enable=__nv_bool]"
  6. /target/w4a8/cutlass/include/cutlass/gemm/threadblock/mma_base.h(108): here
  7. instantiation of class "cutlass::gemm::threadblock::MmaBase<Shape_, Policy_, Stages, Enable> [with Shape_=cutlass::gemm::GemmShape<128, 128, 64>, Policy_=cutlass::gemm::threadblock::MmaPolicy<cutlass::gemm::warp::MmaTensorOp<cutlass::gemm::GemmShape<64, 64, 64>, int8_t, cutlass::layout::RowMajorTensorOpMultiplicandCrosswise<8, 64>, cutlass::int4b_t, cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise<4, 64>, int32_t, cutlass::layout::RowMajor, cutlass::gemm::warp::MmaTensorOpPolicy<cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 8, 32>, 32, int8_t, cutlass::layout::RowMajor, cutlass::int4b_t, cutlass::layout::ColumnMajor, int32_t, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, cutlass::MatrixShape<1, 1>>, 1, false, __nv_bool>, cutlass::MatrixShape<0, 0>, cutlass::MatrixShape<0, 0>, 1>, Stages=4, Enable=__nv_bool]"
  8. /target/w4a8/cutlass/include/cutlass/gemm/threadblock/mma_multistage.h(92): here
  9. instantiation of class "cutlass::gemm::threadblock::MmaMultistage<Shape_, IteratorA_, SmemIteratorA_, CacheOpA, IteratorB_, SmemIteratorB_, CacheOpB, ElementC_, LayoutC_, Policy_, Stages, SharedMemoryClear, Enable> [with Shape_=cutlass::gemm::GemmShape<128, 128, 64>, IteratorA_=cutlass::transform::threadblock::PredicatedTileAccessIterator<cutlass::MatrixShape<128, 64>, int8_t, cutlass::layout::RowMajor, 1, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::PitchLinearShape<64, 128>, 128, cutlass::PitchLinearShape<4, 8>, 16>, cutlass::Array<int8_t, 16, false>, false, cutlass::layout::NoPermute>, SmemIteratorA_=cutlass::transform::threadblock::RegularTileAccessIterator<cutlass::MatrixShape<128, 64>, int8_t, cutlass::layout::RowMajorTensorOpMultiplicandCrosswise<8, 64>, 0, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::PitchLinearShape<64, 128>, 128, cutlass::PitchLinearShape<4, 8>, 16>, 16>, CacheOpA=cutlass::arch::CacheOperation::Global, IteratorB_=cutlass::transform::threadblock::PredicatedTileAccessIterator<cutlass::MatrixShape<64, 128>, cutlass::int4b_t, cutlass::layout::ColumnMajor, 0, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::PitchLinearShape<64, 128>, 128, cutlass::PitchLinearShape<2, 16>, 32>, cutlass::Array<cutlass::int4b_t, 32, false>, false, cutlass::layout::NoPermute>, SmemIteratorB_=cutlass::transform::threadblock::RegularTileAccessIterator<cutlass::MatrixShape<64, 128>, cutlass::int4b_t, cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise<4, 64>, 1, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::PitchLinearShape<64, 128>, 128, cutlass::PitchLinearShape<2, 16>, 32>, 16>, CacheOpB=cutlass::arch::CacheOperation::Global, ElementC_=int32_t, LayoutC_=cutlass::layout::RowMajor, Policy_=cutlass::gemm::threadblock::MmaPolicy<cutlass::gemm::warp::MmaTensorOp<cutlass::gemm::GemmShape<64, 64, 64>, int8_t, cutlass::layout::RowMajorTensorOpMultiplicandCrosswise<8, 64>, cutlass::int4b_t, cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise<4, 64>, int32_t, cutlass::layout::RowMajor, cutlass::gemm::warp::MmaTensorOpPolicy<cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 8, 32>, 32, int8_t, cutlass::layout::RowMajor, cutlass::int4b_t, cutlass::layout::ColumnMajor, int32_t, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, cutlass::MatrixShape<1, 1>>, 1, false, __nv_bool>, cutlass::MatrixShape<0, 0>, cutlass::MatrixShape<0, 0>, 1>, Stages=4, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, Enable=__nv_bool]"
  10. /target/w4a8/cutlass/include/cutlass/gemm/kernel/default_gemm.h(375): here
  11. instantiation of class "cutlass::gemm::kernel::DefaultGemm<ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, LayoutC, ElementAccumulator, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, SplitKSerial, Operator, SharedMemoryClear, GatherA, GatherB, ScatterD, PermuteDLayout, PermuteALayout, PermuteBLayout, void> [with ElementA=int8_t, LayoutA=cutlass::layout::RowMajor, kAlignmentA=16, ElementB=cutlass::int4b_t, LayoutB=cutlass::layout::ColumnMajor, kAlignmentB=32, ElementC=float, LayoutC=cutlass::layout::RowMajor, ElementAccumulator=int32_t, ThreadblockShape=cutlass::gemm::GemmShape<128, 128, 64>, WarpShape=cutlass::gemm::GemmShape<64, 64, 64>, InstructionShape=cutlass::gemm::GemmShape<16, 8, 32>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<float, 4, int32_t, float, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, float>, ThreadblockSwizzle=cutlass::gemm::threadblock::ThreadblockSwizzleStreamK, Stages=4, SplitKSerial=true, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, GatherA=false, GatherB=false, ScatterD=false, PermuteDLayout=cutlass::layout::NoPermute, PermuteALayout=cutlass::layout::NoPermute, PermuteBLayout=cutlass::layout::NoPermute]"
  12. /target/w4a8/cutlass/include/cutlass/gemm/kernel/default_gemm_universal.h(241): here
  13. instantiation of class "cutlass::gemm::kernel::DefaultGemmUniversal<ElementA, LayoutA, cutlass::ComplexTransform::kNone, kAlignmentA, ElementB, LayoutB, cutlass::ComplexTransform::kNone, kAlignmentB, ElementC, LayoutC, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator, SharedMemoryClear, GatherA, GatherB, ScatterD, PermuteDLayout, PermuteALayout, PermuteBLayout, std::enable_if<<expression>, void>::type> [with ElementA=int8_t, LayoutA=cutlass::layout::RowMajor, kAlignmentA=16, ElementB=cutlass::int4b_t, LayoutB=cutlass::layout::ColumnMajor, kAlignmentB=32, ElementC=float, LayoutC=cutlass::layout::RowMajor, ElementAccumulator=int32_t, OperatorClass=cutlass::arch::OpClassTensorOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<128, 128, 64>, WarpShape=cutlass::gemm::GemmShape<64, 64, 64>, InstructionShape=cutlass::gemm::GemmShape<16, 8, 32>, EpilogueOutputOp=cutlass::epilogue::thread::LinearCombination<float, 4, int32_t, float, cutlass::epilogue::thread::ScaleType::Default, cutlass::FloatRoundStyle::round_to_nearest, float>, ThreadblockSwizzle=cutlass::gemm::threadblock::ThreadblockSwizzleStreamK, Stages=4, Operator=cutlass::arch::OpMultiplyAdd, SharedMemoryClear=cutlass::gemm::SharedMemoryClearOption::kNone, GatherA=false, GatherB=false, ScatterD=false, PermuteDLayout=cutlass::layout::NoPermute, PermuteALayout=cutlass::layout::NoPermute, PermuteBLayout=cutlass::layout::NoPermute]"
  14. /target/w4a8/cutlass/include/cutlass/gemm/kernel/default_gemm_universal_with_visitor.h(120): here
  15. instantiation of class "cutlass::gemm::kernel::DefaultGemmWithVisitor<ElementA_, LayoutA_, TransformA, kAlignmentA, ElementB_, LayoutB_, TransformB, kAlignmentB, ElementC_, LayoutC_, kAlignmentC, ElementAccumulator, ElementEpilogue, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, FusionCallbacks, ThreadblockSwizzle, Stages, Operator, EpilogueStages> [with ElementA_=int8_t, LayoutA_=cutlass::layout::RowMajor, TransformA=cutlass::ComplexTransform::kNone, kAlignmentA=16, ElementB_=cutlass::int4b_t, LayoutB_=cutlass::layout::ColumnMajor, TransformB=cutlass::ComplexTransform::kNone, kAlignmentB=32, ElementC_=float, LayoutC_=cutlass::layout::RowMajor, kAlignmentC=4, ElementAccumulator=int32_t, ElementEpilogue=float, OperatorClass=cutlass::arch::OpClassTensorOp, ArchTag=cutlass::arch::Sm80, ThreadblockShape=cutlass::gemm::GemmShape<128, 128, 64>, WarpShape=cutlass::gemm::GemmShape<64, 64, 64>, InstructionShape=cutlass::gemm::GemmShape<16, 8, 32>, FusionCallbacks=cutlass::epilogue::threadblock::TreeVisitor2x<cutlass::epilogue::threadblock::VisitorAuxStore<cutlass::epilogue::threadblock::OutputTileThreadLayout<cutlass::gemm::GemmShape<128, 128, 64>, cutlass::gemm::GemmShape<64, 64, 64>, float, 4, 1>, float, cutlass::FloatRoundStyle::round_to_nearest, cute::tuple<int64_t, cute::C<1>, int64_t>>, cutlass::epilogue::threadblock::TreeVisitor2x<cutlass::epilogue::threadblock::VisitorCompute<cutlass::multiplies, float, float, cutlass::FloatRoundStyle::round_to_nearest, void>, cutlass::epilogue::threadblock::TreeVisitor2x<cutlass::epilogue::threadblock::VisitorCompute<cutlass::multiplies, float, float, cutlass::FloatRoundStyle::round_to_nearest, void>, cutlass::epilogue::threadblock::VisitorAccFetch, cutlass::epilogue::threadblock::VisitorColBroadcast<cutlass::epilogue::threadblock::OutputTileThreadLayout<cutlass::gemm::GemmShape<128, 128, 64>, cutlass::gemm::GemmShape<64, 64, 64>, float, 4, 1>, float, cute::tuple<int32_t, cute::_1, cute::_0>>>, cutlass::epilogue::threadblock::VisitorRowBroadcast<cutlass::epilogue::threadblock::OutputTileThreadLayout<cutlass::gemm::GemmShape<128, 128, 64>, cutlass::gemm::GemmShape<64, 64, 64>, float, 4, 1>, float, cute::tuple<cute::_0, cute::_1, int32_t>>>>, ThreadblockSwizzle=cutlass::gemm::threadblock::ThreadblockSwizzleStreamK, Stages=4, Operator=cutlass::arch::OpMultiplyAdd, EpilogueStages=1]"
Advertisement
Add Comment
Please, Sign In to add comment