Skip to content

Commit d940162

Browse files
Support vpinsrq in delocater (#1543)
Instead of generating portable code, gcc can be configured with e.g. march=cpu-type that allows it to generate code using instructions from instruction sets supported on up-to cpu-type. This fails because vpinsrq is not supported in the delocater. This PR adds support for vpinsrq an instruction from the set AVX512DQ. I don't think there are other 4-argument instructions, for where a GOT reloc can be emitted, that we need to support right now. This also makes the implementation a tad easier, because we do not need to cater for the relocation being either the first or second argument - it can only be the second. Otherwise, the implementation follows the one for the type instrThreeArg.
1 parent 4cd6d21 commit d940162

4 files changed

Lines changed: 138 additions & 2 deletions

File tree

util/fipstools/delocate/delocate.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,8 @@ const (
11891189
instrMemoryVectorCombine
11901190
// instrThreeArg merges two sources into a destination in some fashion.
11911191
instrThreeArg
1192+
// instrFourArg merges three sources into a destination in some fashion.
1193+
instrFourArg
11921194
// instrCompare takes two arguments and writes outputs to the flags register.
11931195
instrCompare
11941196
instrOther
@@ -1197,7 +1199,7 @@ const (
11971199
func (index instructionType) String() string {
11981200
return [...]string{"instrPush", "instrMove", "instrTransformingMove",
11991201
"instrJump", "instrConditionalMove", "instrCombine",
1200-
"instrMemoryVectorCombine", "instrThreeArg",
1202+
"instrMemoryVectorCombine", "instrThreeArg", "instrFourArg",
12011203
"instrCompare", "instrOther"}[index]
12021204
}
12031205

@@ -1238,6 +1240,11 @@ func classifyInstruction(instr string, args []*node32) instructionType {
12381240
return instrThreeArg
12391241
}
12401242

1243+
case "vpinsrq":
1244+
if len(args) == 4 {
1245+
return instrFourArg
1246+
}
1247+
12411248
case "vpbroadcastq":
12421249
if len(args) == 2 {
12431250
return instrTransformingMove
@@ -1346,6 +1353,13 @@ func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest s
13461353
}
13471354
}
13481355

1356+
func fourArgCombineOp(w stringWriter, instructionName, source1, source2, source3, dest string) wrapperFunc {
1357+
return func(k func()) {
1358+
k()
1359+
w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + source3 + ", " + dest + "\n")
1360+
}
1361+
}
1362+
13491363
func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
13501364
return func(k func()) {
13511365
k()
@@ -1484,7 +1498,7 @@ Args:
14841498
}
14851499

14861500
classification := classifyInstruction(instructionName, argNodes)
1487-
if classification != instrThreeArg && classification != instrCompare && i != 0 {
1501+
if classification != instrFourArg && classification != instrThreeArg && classification != instrCompare && i != 0 {
14881502
return nil, fmt.Errorf("GOT access must be source operand, %s", classification)
14891503
}
14901504

@@ -1565,6 +1579,29 @@ Args:
15651579
wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
15661580
}
15671581
targetReg = tempReg
1582+
case instrFourArg:
1583+
if n := len(argNodes); n != 4 {
1584+
return nil, fmt.Errorf("four-argument instruction has %d arguments", n)
1585+
}
1586+
// Only support vpinsrq where the second argument is the GOT reloc.
1587+
if i != 1 {
1588+
return nil, errors.New("GOT access must be from source operand")
1589+
}
1590+
1591+
// vpinsrq imm8, r64/m64, xmm2, xmm1
1592+
targetReg = d.contents(argNodes[3])
1593+
otherSource := d.contents(argNodes[2])
1594+
gotSource := d.contents(argNodes[1])
1595+
immediate := d.contents(argNodes[0])
1596+
1597+
// Choose free register and prepare stack.
1598+
saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, gotSource})
1599+
redzoneCleared = true
1600+
wrappers = append(wrappers, saveRegWrapper)
1601+
1602+
// Rewrite instruction arguments to use the free register.
1603+
wrappers = append(wrappers, fourArgCombineOp(d.output, instructionName, immediate, tempReg, otherSource, targetReg))
1604+
targetReg = tempReg
15681605
default:
15691606
return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
15701607
}

util/fipstools/delocate/delocate_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ var delocateTests = []delocateTest{
5555
{"x86_64-LabelRewrite", nil, []string{"in1.s", "in2.s"}, "out.s", true},
5656
{"x86_64-Sections", nil, []string{"in.s"}, "out.s", true},
5757
{"x86_64-ThreeArg", nil, []string{"in.s"}, "out.s", true},
58+
{"x86_64-FourArg", nil, []string{"in.s"}, "out.s", true},
5859
{"aarch64-Basic", nil, []string{"in.s"}, "out.s", true},
5960
}
6061

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
.type foo, @function
2+
.globl foo
3+
foo:
4+
movq %rbx, %rbx # instruction allowing delocator to detect architecture
5+
vpinsrq $0x08, kBoringSSLRSASqrtTwo@GOTPCREL(%rip), %xmm1, %xmm0
6+
vpinsrq $1, fooExternal@GOTPCREL(%rip), %xmm14, %xmm15
7+
8+
.type kBoringSSLRSASqrtTwo,@object # @kBoringSSLRSASqrtTwo
9+
.section .rodata,"a",@progbits,unique,760
10+
.globl kBoringSSLRSASqrtTwo
11+
.p2align 4
12+
kBoringSSLRSASqrtTwo:
13+
.quad -2404814165548301886 # 0xdea06241f7aa81c2
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
.text
2+
.file 1 "inserted_by_delocate.c"
3+
.loc 1 1 0
4+
BORINGSSL_bcm_text_start:
5+
.type foo, @function
6+
.globl foo
7+
.Lfoo_local_target:
8+
foo:
9+
movq %rbx, %rbx # instruction allowing delocator to detect architecture
10+
# WAS vpinsrq $0x08, kBoringSSLRSASqrtTwo@GOTPCREL(%rip), %xmm1, %xmm0
11+
leaq -128(%rsp), %rsp
12+
pushq %rax
13+
leaq .LkBoringSSLRSASqrtTwo_local_target(%rip), %rax
14+
vpinsrq $0x08, %rax, %xmm1, %xmm0
15+
popq %rax
16+
leaq 128(%rsp), %rsp
17+
# WAS vpinsrq $1, fooExternal@GOTPCREL(%rip), %xmm14, %xmm15
18+
leaq -128(%rsp), %rsp
19+
pushq %rax
20+
pushf
21+
leaq fooExternal_GOTPCREL_external(%rip), %rax
22+
addq (%rax), %rax
23+
movq (%rax), %rax
24+
popf
25+
vpinsrq $1, %rax, %xmm14, %xmm15
26+
popq %rax
27+
leaq 128(%rsp), %rsp
28+
29+
.type kBoringSSLRSASqrtTwo,@object # @kBoringSSLRSASqrtTwo
30+
# WAS .section .rodata,"a",@progbits,unique,760
31+
.text
32+
.globl kBoringSSLRSASqrtTwo
33+
.p2align 4
34+
.LkBoringSSLRSASqrtTwo_local_target:
35+
kBoringSSLRSASqrtTwo:
36+
.quad -2404814165548301886 # 0xdea06241f7aa81c2
37+
.text
38+
.loc 1 2 0
39+
BORINGSSL_bcm_text_end:
40+
.type fooExternal_GOTPCREL_external, @object
41+
.size fooExternal_GOTPCREL_external, 8
42+
fooExternal_GOTPCREL_external:
43+
.long fooExternal@GOTPCREL
44+
.long 0
45+
.type OPENSSL_ia32cap_get, @function
46+
.globl OPENSSL_ia32cap_get
47+
.LOPENSSL_ia32cap_get_local_target:
48+
OPENSSL_ia32cap_get:
49+
leaq OPENSSL_ia32cap_P(%rip), %rax
50+
ret
51+
.type BORINGSSL_bcm_text_hash, @object
52+
.size BORINGSSL_bcm_text_hash, 32
53+
BORINGSSL_bcm_text_hash:
54+
.byte 0xae
55+
.byte 0x2c
56+
.byte 0xea
57+
.byte 0x2a
58+
.byte 0xbd
59+
.byte 0xa6
60+
.byte 0xf3
61+
.byte 0xec
62+
.byte 0x97
63+
.byte 0x7f
64+
.byte 0x9b
65+
.byte 0xf6
66+
.byte 0x94
67+
.byte 0x9a
68+
.byte 0xfc
69+
.byte 0x83
70+
.byte 0x68
71+
.byte 0x27
72+
.byte 0xcb
73+
.byte 0xa0
74+
.byte 0xa0
75+
.byte 0x9f
76+
.byte 0x6b
77+
.byte 0x6f
78+
.byte 0xde
79+
.byte 0x52
80+
.byte 0xcd
81+
.byte 0xe2
82+
.byte 0xcd
83+
.byte 0xff
84+
.byte 0x31
85+
.byte 0x80

0 commit comments

Comments
 (0)