-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Closed
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarch-arm64area-System.Runtime.Intrinsics
Milestone
Description
class AdvSimd
{
// uint8x8_t vld1_lane_u8 (uint8_t const * ptr, uint8x8_t src, const int lane)
// A32: VLD1.8 { Dd[index] }, [Rn]
// A64: LD1 { Vt.B }[index], [Xn]
public static unsafe Vector64<byte> LoadAndInsertScalar(byte* address, Vector64<byte> value, byte index);
// int16x4_t vld1_lane_s16 (int16_t const * ptr, int16x4_t src, const int lane)
// A32: VLD1.16 { Dd[index] }, [Rn]
// A64: LD1 { Vt.H }[index], [Xn]
public static unsafe Vector64<short> LoadAndInsertScalar(short* address, Vector64<short> value, byte index);
// int32x2_t vld1_lane_s32 (int32_t const * ptr, int32x2_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector64<int> LoadAndInsertScalar(int* address, Vector64<int> value, byte index);
// int8x8_t vld1_lane_s8 (int8_t const * ptr, int8x8_t src, const int lane)
// A32: VLD1.8 { Dd[index] }, [Rn]
// A64: LD1 { Vt.B }[index], [Xn]
public static unsafe Vector64<sbyte> LoadAndInsertScalar(sbyte* address, Vector64<sbyte> value, byte index);
// float32x2_t vld1_lane_f32 (float32_t const * ptr, float32x2_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector64<float> LoadAndInsertScalar(float* address, Vector64<float> value, byte index);
// uint16x4_t vld1_lane_u16 (uint16_t const * ptr, uint16x4_t src, const int lane)
// A32: VLD1.16 { Dd[index] }, [Rn]
// A64: LD1 { Vt.H }[index], [Xn]
public static unsafe Vector64<ushort> LoadAndInsertScalar(ushort* address, Vector64<ushort> value, byte index);
// uint32x2_t vld1_lane_u32 (uint32_t const * ptr, uint32x2_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector64<uint> LoadAndInsertScalar(uint* address, Vector64<uint> value, byte index);
// uint8x16_t vld1q_lane_u8 (uint8_t const * ptr, uint8x16_t src, const int lane)
// A32: VLD1.8 { Dd[index] }, [Rn]
// A64: LD1 { Vt.B }[index], [Xn]
public static unsafe Vector128<byte> LoadAndInsertScalar(byte* address, Vector128<byte> value, byte index);
// float64x2_t vld1q_lane_f64 (float64_t const * ptr, float64x2_t src, const int lane)
// A32: VLDR.64 Dd, [Rn]
// A64: LD1 { Vt.D }[index], [Xn]
public static unsafe Vector128<double> LoadAndInsertScalar(double* address, Vector128<double> value, byte index);
// int16x8_t vld1q_lane_s16 (int16_t const * ptr, int16x8_t src, const int lane)
// A32: VLD1.16 { Dd[index] }, [Rn]
// A64: LD1 { Vt.H }[index], [Xn]
public static unsafe Vector128<short> LoadAndInsertScalar(short* address, Vector128<short> value, byte index);
// int32x4_t vld1q_lane_s32 (int32_t const * ptr, int32x4_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector128<int> LoadAndInsertScalar(int* address, Vector128<int> value, byte index);
// int64x2_t vld1q_lane_s64 (int64_t const * ptr, int64x2_t src, const int lane)
// A32: VLDR.64 Dd, [Rn]
// A64: LD1 { Vt.D }[index], [Xn]
public static unsafe Vector128<long> LoadAndInsertScalar(long* address, Vector128<long> value, byte index);
// int8x16_t vld1q_lane_s8 (int8_t const * ptr, int8x16_t src, const int lane)
// A32: VLD1.8 { Dd[index] }, [Rn]
// A64: LD1 { Vt.B }[index], [Xn]
public static unsafe Vector128<sbyte> LoadAndInsertScalar(sbyte* address, Vector128<sbyte> value, byte index);
// float32x4_t vld1q_lane_f32 (float32_t const * ptr, float32x4_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector128<float> LoadAndInsertScalar(float* address, Vector128<float> value, byte index);
// uint16x8_t vld1q_lane_u16 (uint16_t const * ptr, uint16x8_t src, const int lane)
// A32: VLD1.16 { Dd[index] }, [Rn]
// A64: LD1 { Vt.H }[index], [Xn]
public static unsafe Vector128<ushort> LoadAndInsertScalar(ushort* address, Vector128<ushort> value, byte index);
// uint32x4_t vld1q_lane_u32 (uint32_t const * ptr, uint32x4_t src, const int lane)
// A32: VLD1.32 { Dd[index] }, [Rn]
// A64: LD1 { Vt.S }[index], [Xn]
public static unsafe Vector128<uint> LoadAndInsertScalar(uint* address, Vector128<uint> value, byte index);
// uint64x2_t vld1q_lane_u64 (uint64_t const * ptr, uint64x2_t src, const int lane)
// A32: VLDR.64 Dd, [Rn]
// A64: LD1 { Vt.D }[index], [Xn]
public static unsafe Vector128<ulong> LoadAndInsertScalar(ulong* address, Vector128<ulong> value, byte index);
}cc @TamarChristinaArm @CarolEidt @tannergooding
Note that there is no LoadAndInsertScalar for Vector64<double>, Vector64<long>, Vector64<ulong> since this functionality can be achieved by the corresponding LoadVector64 intrinsics
I believe all of them can be implemented on Arm32 - even the ones that load 64-bit element into a lane of Vector128<T>. However, for these we would need to use VLDR.64 Dd or VLDR.64 Dd+1 depending on the value of index. @TamarChristinaArm Is there any better way to do this?
Metadata
Metadata
Assignees
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarch-arm64area-System.Runtime.Intrinsics