Skip to content

Block is already a merge block for another header #3441

@ehsannas

Description

@ehsannas

The loop unrolling pass has recently been added to performance passes. We have received a couple of reports saying this has caused the following error to occur for some HLSL shaders compiled with DXC. It seems to be a corner case.

Here is a sample shader:

RWStructuredBuffer<float> output;

struct Data {
  uint flags;
  float4 extended;	// Must be a vector -- scalar float does not repro
};

// This function looks completely pointless, but without it there is no repro
// (obviously it does something in the actual shader!)
void emptyFunction(inout float4 param) {}

bool boolFunction(Data data) {
  // This must be a if-else, conditional expression, cast or just if (without else) does not repro
  if (data.flags)
    return true;
  else
    return false;
}

Data getData() {
  Data data;
  emptyFunction(data.extended);
  if (boolFunction(data)) {
    // This must be a single component, setting the whole vector does not repro
    data.extended.x = 0;
  }
  return data;
}

// Must be >= 2, 1 does not repro
#define LOOP_COUNT 2

[numthreads(64, 1, 1)]
void main() {
  // Must be unroll, [loop] does not repro
  [unroll]
  for(uint i = 0; i < LOOP_COUNT; ++i)
  output[0] = getData().extended.x;
}

If optimizations are disabled, we get the following SPIR-V:

; SPIR-V
; Version: 1.0
; Generator: Google spiregg; 0
; Bound: 78
; Schema: 0
               OpCapability Shader
               OpMemoryModel Logical GLSL450
               OpEntryPoint GLCompute %main "main"
               OpExecutionMode %main LocalSize 64 1 1
               OpSource HLSL 600
               OpName %type_RWStructuredBuffer_float "type.RWStructuredBuffer.float"
               OpName %output "output"
               OpName %type_ACSBuffer_counter "type.ACSBuffer.counter"
               OpMemberName %type_ACSBuffer_counter 0 "counter"
               OpName %counter_var_output "counter.var.output"
               OpName %main "main"
               OpName %src_main "src.main"
               OpName %bb_entry "bb.entry"
               OpName %i "i"
               OpName %Data "Data"
               OpMemberName %Data 0 "flags"
               OpMemberName %Data 1 "extended"
               OpName %temp_var_Data "temp.var.Data"
               OpName %for_check "for.check"
               OpName %for_body "for.body"
               OpName %for_continue "for.continue"
               OpName %for_merge "for.merge"
               OpName %getData "getData"
               OpName %bb_entry_0 "bb.entry"
               OpName %data "data"
               OpName %param_var_data "param.var.data"
               OpName %if_true "if.true"
               OpName %if_merge "if.merge"
               OpName %emptyFunction "emptyFunction"
               OpName %param "param"
               OpName %bb_entry_1 "bb.entry"
               OpName %boolFunction "boolFunction"
               OpName %data_0 "data"
               OpName %bb_entry_2 "bb.entry"
               OpName %if_true_0 "if.true"
               OpName %if_false "if.false"
               OpName %if_merge_0 "if.merge"
               OpDecorate %output DescriptorSet 0
               OpDecorate %output Binding 0
               OpDecorate %counter_var_output DescriptorSet 0
               OpDecorate %counter_var_output Binding 1
               OpDecorate %_runtimearr_float ArrayStride 4
               OpMemberDecorate %type_RWStructuredBuffer_float 0 Offset 0
               OpDecorate %type_RWStructuredBuffer_float BufferBlock
               OpMemberDecorate %type_ACSBuffer_counter 0 Offset 0
               OpDecorate %type_ACSBuffer_counter BufferBlock
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
     %uint_2 = OpConstant %uint 2
        %int = OpTypeInt 32 1
      %int_1 = OpConstant %int 1
      %int_0 = OpConstant %int 0
     %uint_1 = OpConstant %uint 1
      %float = OpTypeFloat 32
    %float_0 = OpConstant %float 0
       %bool = OpTypeBool
       %true = OpConstantTrue %bool
      %false = OpConstantFalse %bool
         %16 = OpConstantNull %bool
%_runtimearr_float = OpTypeRuntimeArray %float
%type_RWStructuredBuffer_float = OpTypeStruct %_runtimearr_float
%_ptr_Uniform_type_RWStructuredBuffer_float = OpTypePointer Uniform %type_RWStructuredBuffer_float
%type_ACSBuffer_counter = OpTypeStruct %int
%_ptr_Uniform_type_ACSBuffer_counter = OpTypePointer Uniform %type_ACSBuffer_counter
       %void = OpTypeVoid
         %23 = OpTypeFunction %void
%_ptr_Function_uint = OpTypePointer Function %uint
    %v4float = OpTypeVector %float 4
       %Data = OpTypeStruct %uint %v4float
%_ptr_Function_Data = OpTypePointer Function %Data
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Uniform_float = OpTypePointer Uniform %float
         %50 = OpTypeFunction %Data
%_ptr_Function_float = OpTypePointer Function %float
         %66 = OpTypeFunction %void %_ptr_Function_v4float
         %69 = OpTypeFunction %bool %_ptr_Function_Data
     %output = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_float Uniform
%counter_var_output = OpVariable %_ptr_Uniform_type_ACSBuffer_counter Uniform
       %main = OpFunction %void None %23
         %24 = OpLabel
         %25 = OpFunctionCall %void %src_main
               OpReturn
               OpFunctionEnd
   %src_main = OpFunction %void None %23
   %bb_entry = OpLabel
          %i = OpVariable %_ptr_Function_uint Function
%temp_var_Data = OpVariable %_ptr_Function_Data Function
               OpStore %i %uint_0
               OpBranch %for_check
  %for_check = OpLabel
         %35 = OpLoad %uint %i
         %36 = OpULessThan %bool %35 %uint_2
               OpLoopMerge %for_merge %for_continue Unroll
               OpBranchConditional %36 %for_body %for_merge
   %for_body = OpLabel
         %40 = OpFunctionCall %Data %getData
               OpStore %temp_var_Data %40
         %43 = OpAccessChain %_ptr_Function_v4float %temp_var_Data %int_1
         %44 = OpLoad %v4float %43
         %45 = OpCompositeExtract %float %44 0
         %47 = OpAccessChain %_ptr_Uniform_float %output %int_0 %uint_0
               OpStore %47 %45
               OpBranch %for_continue
%for_continue = OpLabel
         %48 = OpLoad %uint %i
         %49 = OpIAdd %uint %48 %uint_1
               OpStore %i %49
               OpBranch %for_check
  %for_merge = OpLabel
               OpReturn
               OpFunctionEnd
    %getData = OpFunction %Data None %50
 %bb_entry_0 = OpLabel
       %data = OpVariable %_ptr_Function_Data Function
%param_var_data = OpVariable %_ptr_Function_Data Function
         %54 = OpAccessChain %_ptr_Function_v4float %data %int_1
         %55 = OpFunctionCall %void %emptyFunction %54
         %57 = OpLoad %Data %data
               OpStore %param_var_data %57
         %58 = OpFunctionCall %bool %boolFunction %param_var_data
               OpSelectionMerge %if_merge None
               OpBranchConditional %58 %if_true %if_merge
    %if_true = OpLabel
         %62 = OpAccessChain %_ptr_Function_v4float %data %int_1
         %64 = OpAccessChain %_ptr_Function_float %62 %int_0
               OpStore %64 %float_0
               OpBranch %if_merge
   %if_merge = OpLabel
         %65 = OpLoad %Data %data
               OpReturnValue %65
               OpFunctionEnd
%emptyFunction = OpFunction %void None %66
      %param = OpFunctionParameter %_ptr_Function_v4float
 %bb_entry_1 = OpLabel
               OpReturn
               OpFunctionEnd
%boolFunction = OpFunction %bool None %69
     %data_0 = OpFunctionParameter %_ptr_Function_Data
 %bb_entry_2 = OpLabel
         %72 = OpAccessChain %_ptr_Function_uint %data_0 %int_0
         %73 = OpLoad %uint %72
         %74 = OpINotEqual %bool %73 %uint_0
               OpSelectionMerge %if_merge_0 None
               OpBranchConditional %74 %if_true_0 %if_false
  %if_true_0 = OpLabel
               OpReturnValue %true
   %if_false = OpLabel
               OpReturnValue %false
 %if_merge_0 = OpLabel
               OpReturnValue %16
               OpFunctionEnd

And the default compile (with optimizations enabled) results in the following error:

 Block 26[%26] is already a merge block for another header

And the default compile (with optimizations enabled) and spirv-val silenced, we get:

; SPIR-V
; Version: 1.0
; Generator: Google spiregg; 0
; Bound: 46
; Schema: 0
               OpCapability Shader
               OpMemoryModel Logical GLSL450
               OpEntryPoint GLCompute %main "main"
               OpExecutionMode %main LocalSize 64 1 1
               OpSource HLSL 600
               OpName %type_RWStructuredBuffer_float "type.RWStructuredBuffer.float"
               OpName %output "output"
               OpName %main "main"
               OpDecorate %output DescriptorSet 0
               OpDecorate %output Binding 0
               OpDecorate %_runtimearr_float ArrayStride 4
               OpMemberDecorate %type_RWStructuredBuffer_float 0 Offset 0
               OpDecorate %type_RWStructuredBuffer_float BufferBlock
       %uint = OpTypeInt 32 0
     %uint_0 = OpConstant %uint 0
        %int = OpTypeInt 32 1
      %int_0 = OpConstant %int 0
      %float = OpTypeFloat 32
    %float_0 = OpConstant %float 0
       %bool = OpTypeBool
       %true = OpConstantTrue %bool
      %false = OpConstantFalse %bool
%_runtimearr_float = OpTypeRuntimeArray %float
%type_RWStructuredBuffer_float = OpTypeStruct %_runtimearr_float
%_ptr_Uniform_type_RWStructuredBuffer_float = OpTypePointer Uniform %type_RWStructuredBuffer_float
       %void = OpTypeVoid
         %16 = OpTypeFunction %void
    %v4float = OpTypeVector %float 4
%_ptr_Uniform_float = OpTypePointer Uniform %float
     %output = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_float Uniform
         %19 = OpUndef %uint
         %20 = OpUndef %v4float
         %21 = OpUndef %v4float
       %main = OpFunction %void None %16
         %22 = OpLabel
               OpSelectionMerge %23 None
               OpSwitch %uint_0 %24
         %24 = OpLabel
         %25 = OpINotEqual %bool %19 %uint_0
               OpSelectionMerge %26 None
               OpBranchConditional %25 %27 %28
         %28 = OpLabel
               OpBranch %23
         %27 = OpLabel
               OpBranch %23
         %26 = OpLabel
               OpUnreachable
         %23 = OpLabel
         %29 = OpPhi %bool %false %28 %true %27
               OpSelectionMerge %30 None
               OpBranchConditional %29 %31 %30
         %31 = OpLabel
         %32 = OpCompositeInsert %v4float %float_0 %21 0
               OpBranch %30
         %30 = OpLabel
         %33 = OpPhi %v4float %20 %23 %32 %31
         %34 = OpCompositeExtract %float %33 0
         %35 = OpAccessChain %_ptr_Uniform_float %output %int_0 %uint_0
               OpStore %35 %34
               OpSelectionMerge %36 None
               OpSwitch %uint_0 %37
         %37 = OpLabel
               OpSelectionMerge %26 None
               OpBranchConditional %25 %38 %39
         %39 = OpLabel
               OpBranch %36
         %38 = OpLabel
               OpBranch %36
         %36 = OpLabel
         %40 = OpPhi %bool %false %39 %true %38
               OpSelectionMerge %41 None
               OpBranchConditional %40 %42 %41
         %42 = OpLabel
         %43 = OpCompositeInsert %v4float %float_0 %21 0
               OpBranch %41
         %41 = OpLabel
         %44 = OpPhi %v4float %33 %36 %43 %42
         %45 = OpCompositeExtract %float %44 0
               OpStore %35 %45
               OpReturn
               OpFunctionEnd

where in fact %26 is used a merge block of two different OpSelectionMerge instructions.

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions