; Function Signature: mapreduce_impl(typeof(Base.identity), typeof(Base.add_sum), FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}}, Int64, Int64, Int64)
; @ reduce.jl:251 within `mapreduce_impl`
; Function Attrs: noinline
define double @julia_mapreduce_impl_7339(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %"A::FixedSizeArray", ptr nocapture readonly %.roots.A, i64 signext %"ifirst::Int64", i64 signext %"ilast::Int64", i64 signext %"blksize::Int64") local_unnamed_addr #0 {
top:
%gcframe1 = alloca [4 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 32, i1 true)
%0 = getelementptr inbounds nuw i8, ptr %gcframe1, i64 24
%1 = getelementptr inbounds nuw i8, ptr %gcframe1, i64 16
%pgcstack = call ptr inttoptr (i64 4377493260 to ptr)(i64 4377493296) #12
store i64 8, ptr %gcframe1, align 8
%task.gcstack = load ptr, ptr %pgcstack, align 8
%frame.prev = getelementptr inbounds nuw i8, ptr %gcframe1, i64 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %pgcstack, align 8
%memoryref_mem = load ptr, ptr %.roots.A, align 8
; @ reduce.jl:253 within `mapreduce_impl`
; ┌ @ promotion.jl:637 within `==`
%.not = icmp eq i64 %"ifirst::Int64", %"ilast::Int64"
; └
br i1 %.not, label %L19, label %L26
L19: ; preds = %top
; @ reduce.jl:254 within `mapreduce_impl`
; ┌ @ /Users/mose/.julia/packages/FixedSizeArrays/22QFl/src/FixedSizeArray.jl:167 within `getindex` @ essentials.jl:386
%memory_data_ptr = getelementptr inbounds nuw i8, ptr %memoryref_mem, i64 8
%memoryref_data = load ptr, ptr %memory_data_ptr, align 8
%2 = getelementptr double, ptr %memoryref_data, i64 %"ifirst::Int64"
%memoryref_data2 = getelementptr i8, ptr %2, i64 -8
%3 = load double, ptr %memoryref_data2, align 8
; └
; @ reduce.jl:255 within `mapreduce_impl`
br label %common.ret
common.ret: ; preds = %L142, %L95, %middle.block, %L45, %L19
%common.ret.op = phi double [ %3, %L19 ], [ %34, %L142 ], [ %8, %L45 ], [ %25, %middle.block ], [ %28, %L95 ]
%frame.prev96 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev96, ptr %pgcstack, align 8
; @ reduce.jl within `mapreduce_impl`
ret double %common.ret.op
L26: ; preds = %top
; @ reduce.jl:256 within `mapreduce_impl`
; ┌ @ int.jl:86 within `-`
%4 = sub i64 %"ilast::Int64", %"ifirst::Int64"
; └
; ┌ @ int.jl:83 within `<`
%.not72 = icmp slt i64 %4, %"blksize::Int64"
; └
br i1 %.not72, label %L45, label %L142
L45: ; preds = %L26
; @ reduce.jl:258 within `mapreduce_impl`
; ┌ @ /Users/mose/.julia/packages/FixedSizeArrays/22QFl/src/FixedSizeArray.jl:167 within `getindex` @ essentials.jl:386
%memory_data_ptr5 = getelementptr inbounds nuw i8, ptr %memoryref_mem, i64 8
%memoryref_data7 = load ptr, ptr %memory_data_ptr5, align 8
%5 = getelementptr double, ptr %memoryref_data7, i64 %"ifirst::Int64"
%memoryref_data12 = getelementptr i8, ptr %5, i64 -8
%6 = load double, ptr %memoryref_data12, align 8
; └
; @ reduce.jl:259 within `mapreduce_impl`
; ┌ @ /Users/mose/.julia/packages/FixedSizeArrays/22QFl/src/FixedSizeArray.jl:167 within `getindex` @ essentials.jl:386
%7 = load double, ptr %5, align 8
; └
; @ reduce.jl:260 within `mapreduce_impl`
; ┌ @ reduce.jl:19 within `add_sum`
; │┌ @ float.jl:492 within `+`
%8 = fadd double %6, %7
; └└
; @ reduce.jl:261 within `mapreduce_impl`
; ┌ @ simdloop.jl:69 within `macro expansion`
; │┌ @ int.jl:87 within `+`
%9 = add i64 %"ifirst::Int64", 2
; │└
; │┌ @ range.jl:5 within `Colon`
; ││┌ @ range.jl:415 within `UnitRange`
; │││┌ @ range.jl:426 within `unitrange_last`
; ││││┌ @ operators.jl:472 within `>=`
; │││││┌ @ int.jl:522 within `<=`
%.not73 = icmp sgt i64 %9, %"ilast::Int64"
; ││││└└
%10 = add i64 %"ifirst::Int64", 1
%value_phi = select i1 %.not73, i64 %10, i64 %"ilast::Int64"
; │└└└
; │ @ simdloop.jl:71 within `macro expansion`
; │┌ @ simdloop.jl:51 within `simd_inner_length`
; ││┌ @ range.jl:783 within `length`
; │││┌ @ int.jl:86 within `-`
%11 = sub i64 %value_phi, %9
%.not7475 = icmp ult i64 %11, 9223372036854775807
; │└└└
; │ @ simdloop.jl:72 within `macro expansion`
br i1 %.not7475, label %L95.lr.ph, label %common.ret
L95.lr.ph: ; preds = %L45
%12 = getelementptr double, ptr %memoryref_data7, i64 %9
; │ @ simdloop.jl:75 within `macro expansion`
%invariant.gep = getelementptr i8, ptr %12, i64 -8
%13 = xor i64 %"ifirst::Int64", -1
%14 = add i64 %value_phi, %13
%min.iters.check = icmp ult i64 %14, 8
br i1 %min.iters.check, label %L95, label %vector.ph
vector.ph: ; preds = %L95.lr.ph
%n.vec = and i64 %14, -8
%15 = insertelement <2 x double> <double poison, double -0.000000e+00>, double %8, i64 0
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <2 x double> [ %15, %vector.ph ], [ %20, %vector.body ]
%vec.phi78 = phi <2 x double> [ splat (double -0.000000e+00), %vector.ph ], [ %21, %vector.body ]
%vec.phi79 = phi <2 x double> [ splat (double -0.000000e+00), %vector.ph ], [ %22, %vector.body ]
%vec.phi80 = phi <2 x double> [ splat (double -0.000000e+00), %vector.ph ], [ %23, %vector.body ]
; │└└
; │ @ simdloop.jl:77 within `macro expansion` @ reduce.jl:262
; │┌ @ /Users/mose/.julia/packages/FixedSizeArrays/22QFl/src/FixedSizeArray.jl:167 within `getindex` @ essentials.jl:386
%16 = getelementptr double, ptr %invariant.gep, i64 %index
%17 = getelementptr i8, ptr %16, i64 16
%18 = getelementptr i8, ptr %16, i64 32
%19 = getelementptr i8, ptr %16, i64 48
%wide.load = load <2 x double>, ptr %16, align 8
%wide.load81 = load <2 x double>, ptr %17, align 8
%wide.load82 = load <2 x double>, ptr %18, align 8
%wide.load83 = load <2 x double>, ptr %19, align 8
; │└
; │ @ simdloop.jl:77 within `macro expansion` @ reduce.jl:263
; │┌ @ reduce.jl:19 within `add_sum`
; ││┌ @ float.jl:492 within `+`
%20 = fadd reassoc contract <2 x double> %vec.phi, %wide.load
%21 = fadd reassoc contract <2 x double> %vec.phi78, %wide.load81
%22 = fadd reassoc contract <2 x double> %vec.phi79, %wide.load82
%23 = fadd reassoc contract <2 x double> %vec.phi80, %wide.load83
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
%index.next = add nuw i64 %index, 8
%24 = icmp eq i64 %index.next, %n.vec
br i1 %24, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
; │└└
; │ @ simdloop.jl:75 within `macro expansion`
%bin.rdx = fadd reassoc contract <2 x double> %21, %20
%bin.rdx84 = fadd reassoc contract <2 x double> %22, %bin.rdx
%bin.rdx85 = fadd reassoc contract <2 x double> %23, %bin.rdx84
%25 = call reassoc contract double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %bin.rdx85)
%cmp.n = icmp eq i64 %14, %n.vec
br i1 %cmp.n, label %common.ret, label %L95
L95: ; preds = %L95, %middle.block, %L95.lr.ph
%value_phi2377 = phi i64 [ %26, %L95 ], [ 0, %L95.lr.ph ], [ %n.vec, %middle.block ]
%value_phi2276 = phi double [ %28, %L95 ], [ %8, %L95.lr.ph ], [ %25, %middle.block ]
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
%26 = add nuw nsw i64 %value_phi2377, 1
; │└└
; │ @ simdloop.jl:77 within `macro expansion` @ reduce.jl:262
; │┌ @ /Users/mose/.julia/packages/FixedSizeArrays/22QFl/src/FixedSizeArray.jl:167 within `getindex` @ essentials.jl:386
%gep = getelementptr double, ptr %invariant.gep, i64 %value_phi2377
%27 = load double, ptr %gep, align 8
; │└
; │ @ simdloop.jl:77 within `macro expansion` @ reduce.jl:263
; │┌ @ reduce.jl:19 within `add_sum`
; ││┌ @ float.jl:492 within `+`
%28 = fadd reassoc contract double %value_phi2276, %27
; │└└
; │ @ simdloop.jl:75 within `macro expansion`
; │┌ @ int.jl:83 within `<`
%exitcond.not = icmp eq i64 %value_phi2377, %11
; │└
br i1 %exitcond.not, label %common.ret, label %L95
L142: ; preds = %L26
; └
; @ reduce.jl:268 within `mapreduce_impl`
; ┌ @ int.jl:542 within `>>` @ int.jl:535
%29 = ashr i64 %4, 1
; └
; ┌ @ int.jl:87 within `+`
%30 = add i64 %29, %"ifirst::Int64"
; └
; @ reduce.jl:269 within `mapreduce_impl`
store ptr %memoryref_mem, ptr %0, align 8
%31 = call double @julia_mapreduce_impl_7339(ptr nocapture readonly %"A::FixedSizeArray", ptr nocapture nonnull readonly %0, i64 signext %"ifirst::Int64", i64 signext %30, i64 signext %"blksize::Int64")
; @ reduce.jl:270 within `mapreduce_impl`
; ┌ @ int.jl:87 within `+`
%32 = add i64 %30, 1
; └
store ptr %memoryref_mem, ptr %1, align 8
%33 = call double @julia_mapreduce_impl_7339(ptr nocapture readonly %"A::FixedSizeArray", ptr nocapture nonnull readonly %1, i64 signext %32, i64 signext %"ilast::Int64", i64 signext %"blksize::Int64")
; @ reduce.jl:271 within `mapreduce_impl`
; ┌ @ reduce.jl:19 within `add_sum`
; │┌ @ float.jl:492 within `+`
%34 = fadd double %31, %33
; └└
br label %common.ret
}
Reported by @matthias314 on discourse,
summing aFixedSizeVectorseems to be slightly slower than aVector:The difference isn't dramatic, but mostly systematic. But what really puzzles me is that I can't tell why. The LLVM IR looks nearly identical for the two containers, except for the fact
FixedSizeVectorneeds to construct theMemoryRef, which is something which came up already in #70.LLVM IR for Vector
LLVM IR for FixedSizeVectorDefault
Base.mapreduce_implis a recursive function to do pairwaise summation, so even a small overhead at the beginning of the generated code for the function scales with the size of the input data.