Skip to content

For loop incorrectly optimized away #1166

@antiagainst

Description

@antiagainst

For the following HLSL code:

SamplerState gSS  :register(s2, space0);
Texture2D    gTex :register(t0, space0);

cbuffer MyCBuffer :register(b0, space2)
{
  float2 scale;
  float3 size;
};

float4 GetTexture2DLod(Texture2D tex, SamplerState samplerState, float2 uv, float lod)
{
  return tex.SampleLevel(samplerState, uv.xy, lod);
}

float4 PS_BlurFilter_Box1() : SV_Target
{
  float4 color = 0.0f;
  float i = 0.0f;

  for (i = -size.x; i <= size.x; i++)
    color += GetTexture2DLod(gTex, gSS, i * scale, 0.f);

  return color;
}

DXC/Spiregg generates the following raw SPIR-V:

               OpCapability Shader
               OpMemoryModel Logical GLSL450
               OpEntryPoint Fragment %PS_BlurFilter_Box1 "PS_BlurFilter_Box1" %out_var_SV_Target
               OpExecutionMode %PS_BlurFilter_Box1 OriginUpperLeft
               OpName %bb_entry "bb.entry"
               OpName %for_check "for.check"
               OpName %for_body "for.body"
               OpName %for_continue "for.continue"
               OpName %for_merge "for.merge"
               OpName %bb_entry_0 "bb.entry"
               OpName %type_sampler "type.sampler"
               OpName %gSS "gSS"
               OpName %type_2d_image "type.2d.image"
               OpName %gTex "gTex"
               OpName %type_MyCBuffer "type.MyCBuffer"
               OpMemberName %type_MyCBuffer 0 "scale"
               OpMemberName %type_MyCBuffer 1 "size"
               OpName %var_MyCBuffer "var.MyCBuffer"
               OpName %src_PS_BlurFilter_Box1 "src.PS_BlurFilter_Box1"
               OpName %PS_BlurFilter_Box1 "PS_BlurFilter_Box1"
               OpName %out_var_SV_Target "out.var.SV_Target"
               OpName %color "color"
               OpName %i "i"
               OpName %param_var_tex "param.var.tex"
               OpName %param_var_samplerState "param.var.samplerState"
               OpName %param_var_uv "param.var.uv"
               OpName %param_var_lod "param.var.lod"
               OpName %GetTexture2DLod "GetTexture2DLod"
               OpName %tex "tex"
               OpName %samplerState "samplerState"
               OpName %uv "uv"
               OpName %lod "lod"
               OpName %type_sampled_image "type.sampled.image"
               OpMemberDecorate %type_MyCBuffer 0 Offset 0
               OpMemberDecorate %type_MyCBuffer 1 Offset 16
               OpDecorate %type_MyCBuffer Block
               OpDecorate %out_var_SV_Target Location 0
               OpDecorate %gSS DescriptorSet 0
               OpDecorate %gSS Binding 2
               OpDecorate %gTex DescriptorSet 0
               OpDecorate %gTex Binding 0
               OpDecorate %var_MyCBuffer DescriptorSet 2
               OpDecorate %var_MyCBuffer Binding 0
        %int = OpTypeInt 32 1
%type_sampler = OpTypeSampler
%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
      %float = OpTypeFloat 32
%type_2d_image = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
    %v2float = OpTypeVector %float 2
    %v3float = OpTypeVector %float 3
%type_MyCBuffer = OpTypeStruct %v2float %v3float
%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer
       %void = OpTypeVoid
         %15 = OpTypeFunction %void
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
         %22 = OpTypeFunction %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
%_ptr_Uniform_float = OpTypePointer Uniform %float
       %bool = OpTypeBool
%_ptr_Function_type_2d_image = OpTypePointer Function %type_2d_image
%_ptr_Function_type_sampler = OpTypePointer Function %type_sampler
%_ptr_Function_v2float = OpTypePointer Function %v2float
%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
         %71 = OpTypeFunction %v4float %_ptr_Function_type_2d_image %_ptr_Function_type_sampler %_ptr_Function_v2float %_ptr_Function_float
%type_sampled_image = OpTypeSampledImage %type_2d_image
    %float_0 = OpConstant %float 0
         %27 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
      %int_1 = OpConstant %int 1
      %int_0 = OpConstant %int 0
    %float_1 = OpConstant %float 1
        %gSS = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
       %gTex = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
%var_MyCBuffer = OpVariable %_ptr_Uniform_type_MyCBuffer Uniform
%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
%PS_BlurFilter_Box1 = OpFunction %void None %15
         %17 = OpLabel
         %19 = OpFunctionCall %v4float %src_PS_BlurFilter_Box1
               OpStore %out_var_SV_Target %19
               OpReturn
               OpFunctionEnd
%src_PS_BlurFilter_Box1 = OpFunction %v4float None %22
   %bb_entry = OpLabel
      %color = OpVariable %_ptr_Function_v4float Function
          %i = OpVariable %_ptr_Function_float Function
%param_var_tex = OpVariable %_ptr_Function_type_2d_image Function
%param_var_samplerState = OpVariable %_ptr_Function_type_sampler Function
%param_var_uv = OpVariable %_ptr_Function_v2float Function
%param_var_lod = OpVariable %_ptr_Function_float Function
               OpStore %color %27
               OpStore %i %float_0
         %37 = OpAccessChain %_ptr_Uniform_v3float %var_MyCBuffer %int_1
         %40 = OpAccessChain %_ptr_Uniform_float %37 %int_0
         %41 = OpLoad %float %40
         %42 = OpFNegate %float %41
               OpStore %i %42
               OpBranch %for_check
  %for_check = OpLabel
         %44 = OpLoad %float %i
         %45 = OpAccessChain %_ptr_Uniform_v3float %var_MyCBuffer %int_1
         %46 = OpAccessChain %_ptr_Uniform_float %45 %int_0
         %47 = OpLoad %float %46
         %48 = OpFOrdLessThanEqual %bool %44 %47
               OpLoopMerge %for_merge %for_continue None
               OpBranchConditional %48 %for_body %for_merge
   %for_body = OpLabel
         %51 = OpLoad %type_2d_image %gTex
               OpStore %param_var_tex %51
         %54 = OpLoad %type_sampler %gSS
               OpStore %param_var_samplerState %54
         %58 = OpAccessChain %_ptr_Uniform_v2float %var_MyCBuffer %int_0
         %59 = OpLoad %v2float %58
         %60 = OpLoad %float %i
         %61 = OpVectorTimesScalar %v2float %59 %60
               OpStore %param_var_uv %61
               OpStore %param_var_lod %float_0
         %64 = OpFunctionCall %v4float %GetTexture2DLod %param_var_tex %param_var_samplerState %param_var_uv %param_var_lod
         %65 = OpLoad %v4float %color
         %66 = OpFAdd %v4float %65 %64
               OpStore %color %66
               OpBranch %for_continue
%for_continue = OpLabel
         %67 = OpLoad %float %i
         %69 = OpFAdd %float %67 %float_1
               OpStore %i %69
               OpBranch %for_check
  %for_merge = OpLabel
         %70 = OpLoad %v4float %color
               OpReturnValue %70
               OpFunctionEnd
%GetTexture2DLod = OpFunction %v4float None %71
        %tex = OpFunctionParameter %_ptr_Function_type_2d_image
%samplerState = OpFunctionParameter %_ptr_Function_type_sampler
         %uv = OpFunctionParameter %_ptr_Function_v2float
        %lod = OpFunctionParameter %_ptr_Function_float
 %bb_entry_0 = OpLabel
         %77 = OpLoad %type_2d_image %tex
         %78 = OpLoad %type_sampler %samplerState
         %79 = OpLoad %v2float %uv
         %80 = OpLoad %float %lod
         %81 = OpSampledImage %type_sampled_image %77 %78
         %83 = OpImageSampleExplicitLod %v4float %81 %79 Lod %80
               OpReturnValue %83
               OpFunctionEnd

Legalization passes (RegisterLegalizationPasses()) turns it into

               OpCapability Shader
               OpMemoryModel Logical GLSL450
               OpEntryPoint Fragment %PS_BlurFilter_Box1 "PS_BlurFilter_Box1" %out_var_SV_Target
               OpExecutionMode %PS_BlurFilter_Box1 OriginUpperLeft
               OpName %type_sampler "type.sampler"
               OpName %gSS "gSS"
               OpName %type_2d_image "type.2d.image"
               OpName %gTex "gTex"
               OpName %type_MyCBuffer "type.MyCBuffer"
               OpMemberName %type_MyCBuffer 0 "scale"
               OpMemberName %type_MyCBuffer 1 "size"
               OpName %var_MyCBuffer "var.MyCBuffer"
               OpName %PS_BlurFilter_Box1 "PS_BlurFilter_Box1"
               OpName %out_var_SV_Target "out.var.SV_Target"
               OpName %type_sampled_image "type.sampled.image"
               OpMemberDecorate %type_MyCBuffer 0 Offset 0
               OpMemberDecorate %type_MyCBuffer 1 Offset 16
               OpDecorate %type_MyCBuffer Block
               OpDecorate %out_var_SV_Target Location 0
               OpDecorate %gSS DescriptorSet 0
               OpDecorate %gSS Binding 2
               OpDecorate %gTex DescriptorSet 0
               OpDecorate %gTex Binding 0
               OpDecorate %var_MyCBuffer DescriptorSet 2
               OpDecorate %var_MyCBuffer Binding 0
        %int = OpTypeInt 32 1
%type_sampler = OpTypeSampler
%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
      %float = OpTypeFloat 32
%type_2d_image = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
    %v2float = OpTypeVector %float 2
    %v3float = OpTypeVector %float 3
%type_MyCBuffer = OpTypeStruct %v2float %v3float
%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer
       %void = OpTypeVoid
         %15 = OpTypeFunction %void
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
         %22 = OpTypeFunction %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
%_ptr_Uniform_float = OpTypePointer Uniform %float
       %bool = OpTypeBool
%_ptr_Function_type_2d_image = OpTypePointer Function %type_2d_image
%_ptr_Function_type_sampler = OpTypePointer Function %type_sampler
%_ptr_Function_v2float = OpTypePointer Function %v2float
%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
         %71 = OpTypeFunction %v4float %_ptr_Function_type_2d_image %_ptr_Function_type_sampler %_ptr_Function_v2float %_ptr_Function_float
%type_sampled_image = OpTypeSampledImage %type_2d_image
    %float_0 = OpConstant %float 0
         %27 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
      %int_1 = OpConstant %int 1
      %int_0 = OpConstant %int 0
    %float_1 = OpConstant %float 1
        %gSS = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
       %gTex = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
%var_MyCBuffer = OpVariable %_ptr_Uniform_type_MyCBuffer Uniform
%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
%PS_BlurFilter_Box1 = OpFunction %void None %15
         %17 = OpLabel
         %91 = OpAccessChain %_ptr_Uniform_v3float %var_MyCBuffer %int_1
         %92 = OpAccessChain %_ptr_Uniform_float %91 %int_0
         %93 = OpLoad %float %92
         %94 = OpFNegate %float %93
               OpBranch %95
         %95 = OpLabel
        %123 = OpPhi %v4float %27 %17 %112 %102
        %124 = OpPhi %float %94 %17 %114 %102
         %97 = OpAccessChain %_ptr_Uniform_v3float %var_MyCBuffer %int_1
         %98 = OpAccessChain %_ptr_Uniform_float %97 %int_0
         %99 = OpLoad %float %98
        %100 = OpFOrdLessThanEqual %bool %124 %99
               OpLoopMerge %101 %102 None
               OpBranchConditional %100 %103 %101
        %103 = OpLabel
        %104 = OpLoad %type_2d_image %gTex
        %105 = OpLoad %type_sampler %gSS
        %106 = OpAccessChain %_ptr_Uniform_v2float %var_MyCBuffer %int_0
        %107 = OpLoad %v2float %106
        %109 = OpVectorTimesScalar %v2float %107 %124
        %121 = OpSampledImage %type_sampled_image %104 %105
        %122 = OpImageSampleExplicitLod %v4float %121 %109 Lod %float_0
        %112 = OpFAdd %v4float %123 %122
               OpBranch %102
        %102 = OpLabel
        %114 = OpFAdd %float %124 %float_1
               OpBranch %95
        %101 = OpLabel
               OpStore %out_var_SV_Target %123
               OpReturn
               OpFunctionEnd

Which seems correct. But optimization passes (RegisterPerformancePasses()) turns it into

               OpCapability Shader
               OpMemoryModel Logical GLSL450
               OpEntryPoint Fragment %PS_BlurFilter_Box1 "PS_BlurFilter_Box1" %out_var_SV_Target
               OpExecutionMode %PS_BlurFilter_Box1 OriginUpperLeft
               OpName %type_sampler "type.sampler"
               OpName %type_2d_image "type.2d.image"
               OpName %type_MyCBuffer "type.MyCBuffer"
               OpMemberName %type_MyCBuffer 0 "scale"
               OpMemberName %type_MyCBuffer 1 "size"
               OpName %PS_BlurFilter_Box1 "PS_BlurFilter_Box1"
               OpName %out_var_SV_Target "out.var.SV_Target"
               OpName %type_sampled_image "type.sampled.image"
               OpMemberDecorate %type_MyCBuffer 0 Offset 0
               OpMemberDecorate %type_MyCBuffer 1 Offset 16
               OpDecorate %type_MyCBuffer Block
               OpDecorate %out_var_SV_Target Location 0
        %int = OpTypeInt 32 1
%type_sampler = OpTypeSampler
%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
      %float = OpTypeFloat 32
%type_2d_image = OpTypeImage %float 2D 0 0 0 1 Unknown
%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
    %v2float = OpTypeVector %float 2
    %v3float = OpTypeVector %float 3
%type_MyCBuffer = OpTypeStruct %v2float %v3float
%_ptr_Uniform_type_MyCBuffer = OpTypePointer Uniform %type_MyCBuffer
       %void = OpTypeVoid
         %15 = OpTypeFunction %void
    %v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
         %22 = OpTypeFunction %v4float
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
%_ptr_Uniform_float = OpTypePointer Uniform %float
       %bool = OpTypeBool
%_ptr_Function_type_2d_image = OpTypePointer Function %type_2d_image
%_ptr_Function_type_sampler = OpTypePointer Function %type_sampler
%_ptr_Function_v2float = OpTypePointer Function %v2float
%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
         %71 = OpTypeFunction %v4float %_ptr_Function_type_2d_image %_ptr_Function_type_sampler %_ptr_Function_v2float %_ptr_Function_float
%type_sampled_image = OpTypeSampledImage %type_2d_image
    %float_0 = OpConstant %float 0
         %27 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
      %int_1 = OpConstant %int 1
      %int_0 = OpConstant %int 0
    %float_1 = OpConstant %float 1
%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
%PS_BlurFilter_Box1 = OpFunction %void None %15
         %17 = OpLabel
               OpStore %out_var_SV_Target %27
               OpReturn
               OpFunctionEnd

The for loop is completely gone.

Originally reported in microsoft/DirectXShaderCompiler#958.

Likely related to #1143, but I'm not certain.

Using the correct top of the tree: 702852b

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions