Skip to content

Commit ebc4919

Browse files
author
Mike Pall
committed
ARM64: Improve generation of immediates.
1 parent a4c9fc3 commit ebc4919

File tree

1 file changed

+33
-31
lines changed

1 file changed

+33
-31
lines changed

src/lj_emit_arm64.h

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
194194

195195
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
196196
{
197-
uint32_t k13 = emit_isk13(u64, is64);
198-
if (k13) { /* Can the constant be represented as a bitmask immediate? */
199-
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
200-
} else {
201-
int i, zeros = 0, ones = 0, neg;
202-
if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
203-
/* Count homogeneous 16 bit fragments. */
204-
for (i = 0; i < 4; i++) {
205-
uint64_t frag = (u64 >> i*16) & 0xffff;
206-
zeros += (frag == 0);
207-
ones += (frag == 0xffff);
197+
int i, zeros = 0, ones = 0, neg;
198+
if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
199+
/* Count homogeneous 16 bit fragments. */
200+
for (i = 0; i < 4; i++) {
201+
uint64_t frag = (u64 >> i*16) & 0xffff;
202+
zeros += (frag == 0);
203+
ones += (frag == 0xffff);
204+
}
205+
neg = ones > zeros; /* Use MOVN if it pays off. */
206+
if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
207+
uint32_t k13 = emit_isk13(u64, is64);
208+
if (k13) {
209+
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
210+
return;
208211
}
209-
neg = ones > zeros; /* Use MOVN if it pays off. */
210-
if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
211-
int shift = 0, lshift = 0;
212-
uint64_t n64 = neg ? ~u64 : u64;
213-
if (n64 != 0) {
214-
/* Find first/last fragment to be filled. */
215-
shift = (63-emit_clz64(n64)) & ~15;
216-
lshift = emit_ctz64(n64) & ~15;
217-
}
218-
/* MOVK requires the original value (u64). */
219-
while (shift > lshift) {
220-
uint32_t u16 = (u64 >> shift) & 0xffff;
221-
/* Skip fragments that are correctly filled by MOVN/MOVZ. */
222-
if (u16 != (neg ? 0xffff : 0))
223-
emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
224-
shift -= 16;
225-
}
226-
/* But MOVN needs an inverted value (n64). */
227-
emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
228-
A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
212+
}
213+
if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
214+
int shift = 0, lshift = 0;
215+
uint64_t n64 = neg ? ~u64 : u64;
216+
if (n64 != 0) {
217+
/* Find first/last fragment to be filled. */
218+
shift = (63-emit_clz64(n64)) & ~15;
219+
lshift = emit_ctz64(n64) & ~15;
220+
}
221+
/* MOVK requires the original value (u64). */
222+
while (shift > lshift) {
223+
uint32_t u16 = (u64 >> shift) & 0xffff;
224+
/* Skip fragments that are correctly filled by MOVN/MOVZ. */
225+
if (u16 != (neg ? 0xffff : 0))
226+
emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
227+
shift -= 16;
229228
}
229+
/* But MOVN needs an inverted value (n64). */
230+
emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
231+
A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
230232
}
231233
}
232234

0 commit comments

Comments
 (0)