@@ -323,7 +323,7 @@ public ByteVector unary(ByteVector xVec, int vectorOpcode) {
323323 case Bytecode .VECTOR_V128_NOT -> unop (x , I8X16 , VectorOperators .NOT );
324324 case Bytecode .VECTOR_I8X16_ABS -> unop (x , I8X16 , VectorOperators .ABS );
325325 case Bytecode .VECTOR_I8X16_NEG -> unop (x , I8X16 , VectorOperators .NEG );
326- case Bytecode .VECTOR_I8X16_POPCNT -> unop ( x , I8X16 , VectorOperators . BIT_COUNT );
326+ case Bytecode .VECTOR_I8X16_POPCNT -> i8x16_popcnt ( x ); // GR-68892
327327 case Bytecode .VECTOR_I16X8_EXTADD_PAIRWISE_I8X16_S -> extadd_pairwise (x , I8X16 , VectorOperators .B2S );
328328 case Bytecode .VECTOR_I16X8_EXTADD_PAIRWISE_I8X16_U -> extadd_pairwise (x , I8X16 , VectorOperators .ZERO_EXTEND_B2S );
329329 case Bytecode .VECTOR_I16X8_EXTEND_LOW_I8X16_S -> extend (x , 0 , I8X16 , VectorOperators .B2S );
@@ -366,16 +366,16 @@ public ByteVector unary(ByteVector xVec, int vectorOpcode) {
366366 case Bytecode .VECTOR_F64X2_TRUNC -> trunc (x , F64X2 , I64X2 , VectorOperators .REINTERPRET_D2L , VectorOperators .REINTERPRET_L2D ,
367367 Vector128OpsVectorAPI ::getExponentDoubles , DOUBLE_SIGNIFICAND_WIDTH , I64X2 .broadcast (DOUBLE_SIGNIF_BIT_MASK ));
368368 case Bytecode .VECTOR_F64X2_NEAREST -> nearest (x , F64X2 , 1L << (DOUBLE_SIGNIFICAND_WIDTH - 1 ));
369- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_S , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_S -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
370- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_U , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_U -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
369+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_S , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_S -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
370+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_U , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_U -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
371371 case Bytecode .VECTOR_F32X4_CONVERT_I32X4_S -> convert (x , I32X4 , VectorOperators .I2F );
372- case Bytecode .VECTOR_F32X4_CONVERT_I32X4_U -> f32x4_convert_i32x4_u ( x );
373- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_S_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_S_ZERO -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
374- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_U_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_U_ZERO -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
372+ case Bytecode .VECTOR_F32X4_CONVERT_I32X4_U -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
373+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_S_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_S_ZERO -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
374+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_U_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_U_ZERO -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
375375 case Bytecode .VECTOR_F64X2_CONVERT_LOW_I32X4_S -> convert (x , I32X4 , VectorOperators .I2D );
376376 case Bytecode .VECTOR_F64X2_CONVERT_LOW_I32X4_U -> f64x2_convert_low_i32x4_u (x );
377- case Bytecode .VECTOR_F32X4_DEMOTE_F64X2_ZERO -> f32X4_demote_f64X2_zero ( x );
378- case Bytecode .VECTOR_F64X2_PROMOTE_LOW_F32X4 -> convert ( x , F32X4 , VectorOperators . F2D );
377+ case Bytecode .VECTOR_F32X4_DEMOTE_F64X2_ZERO -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
378+ case Bytecode .VECTOR_F64X2_PROMOTE_LOW_F32X4 -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
379379 default -> throw CompilerDirectives .shouldNotReachHere ();
380380 });
381381 }
@@ -441,30 +441,30 @@ public ByteVector binary(ByteVector xVec, ByteVector yVec, int vectorOpcode) {
441441 case Bytecode .VECTOR_I8X16_NARROW_I16X8_S -> narrow (x , y , I16X8 , I8X16 , Byte .MIN_VALUE , Byte .MAX_VALUE );
442442 case Bytecode .VECTOR_I8X16_NARROW_I16X8_U -> narrow (x , y , I16X8 , I8X16 , (short ) 0 , (short ) 0xff );
443443 case Bytecode .VECTOR_I8X16_ADD -> binop (x , y , I8X16 , VectorOperators .ADD );
444- case Bytecode .VECTOR_I8X16_ADD_SAT_S -> binop (x , y , I8X16 , VectorOperators .SADD );
445- case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .ADD , 0 , 0xff );
444+ case Bytecode .VECTOR_I8X16_ADD_SAT_S -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .B2S , VectorOperators . ADD , Byte . MIN_VALUE , Byte . MAX_VALUE ); // GR-68891
445+ case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .ADD , 0 , 0xff ); // GR-68891
446446 case Bytecode .VECTOR_I8X16_SUB -> binop (x , y , I8X16 , VectorOperators .SUB );
447- case Bytecode .VECTOR_I8X16_SUB_SAT_S -> binop (x , y , I8X16 , VectorOperators .SSUB );
448- case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .SUB , 0 , 0xff );
447+ case Bytecode .VECTOR_I8X16_SUB_SAT_S -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .B2S , VectorOperators . SUB , Byte . MIN_VALUE , Byte . MAX_VALUE ); // GR-68891
448+ case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .SUB , 0 , 0xff ); // GR-68891
449449 case Bytecode .VECTOR_I8X16_MIN_S -> binop (x , y , I8X16 , VectorOperators .MIN );
450- case Bytecode .VECTOR_I8X16_MIN_U -> binop ( x , y , I8X16 , VectorOperators . UMIN );
450+ case Bytecode .VECTOR_I8X16_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
451451 case Bytecode .VECTOR_I8X16_MAX_S -> binop (x , y , I8X16 , VectorOperators .MAX );
452- case Bytecode .VECTOR_I8X16_MAX_U -> binop ( x , y , I8X16 , VectorOperators . UMAX );
452+ case Bytecode .VECTOR_I8X16_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
453453 case Bytecode .VECTOR_I8X16_AVGR_U -> avgr_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S );
454454 case Bytecode .VECTOR_I16X8_NARROW_I32X4_S -> narrow (x , y , I32X4 , I16X8 , Short .MIN_VALUE , Short .MAX_VALUE );
455455 case Bytecode .VECTOR_I16X8_NARROW_I32X4_U -> narrow (x , y , I32X4 , I16X8 , 0 , 0xffff );
456456 case Bytecode .VECTOR_I16X8_Q15MULR_SAT_S , Bytecode .VECTOR_I16X8_RELAXED_Q15MULR_S -> i16x8_q15mulr_sat_s (x , y );
457457 case Bytecode .VECTOR_I16X8_ADD -> binop (x , y , I16X8 , VectorOperators .ADD );
458- case Bytecode .VECTOR_I16X8_ADD_SAT_S -> binop (x , y , I16X8 , VectorOperators .SADD );
459- case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .ADD , 0 , 0xffff );
458+ case Bytecode .VECTOR_I16X8_ADD_SAT_S -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .S2I , VectorOperators . ADD , Short . MIN_VALUE , Short . MAX_VALUE ); // GR-68891
459+ case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .ADD , 0 , 0xffff ); // GR-68891
460460 case Bytecode .VECTOR_I16X8_SUB -> binop (x , y , I16X8 , VectorOperators .SUB );
461- case Bytecode .VECTOR_I16X8_SUB_SAT_S -> binop (x , y , I16X8 , VectorOperators .SSUB );
462- case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .SUB , 0 , 0xffff );
461+ case Bytecode .VECTOR_I16X8_SUB_SAT_S -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .S2I , VectorOperators . SUB , Short . MIN_VALUE , Short . MAX_VALUE ); // GR-68891
462+ case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .SUB , 0 , 0xffff ); // GR-68891
463463 case Bytecode .VECTOR_I16X8_MUL -> binop (x , y , I16X8 , VectorOperators .MUL );
464464 case Bytecode .VECTOR_I16X8_MIN_S -> binop (x , y , I16X8 , VectorOperators .MIN );
465- case Bytecode .VECTOR_I16X8_MIN_U -> binop ( x , y , I16X8 , VectorOperators . UMIN );
465+ case Bytecode .VECTOR_I16X8_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
466466 case Bytecode .VECTOR_I16X8_MAX_S -> binop (x , y , I16X8 , VectorOperators .MAX );
467- case Bytecode .VECTOR_I16X8_MAX_U -> binop ( x , y , I16X8 , VectorOperators . UMAX );
467+ case Bytecode .VECTOR_I16X8_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
468468 case Bytecode .VECTOR_I16X8_AVGR_U -> avgr_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I );
469469 case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_S -> extmul (x , y , I8X16 , VectorOperators .B2S , 0 );
470470 case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_U -> extmul (x , y , I8X16 , VectorOperators .ZERO_EXTEND_B2S , 0 );
@@ -474,9 +474,9 @@ public ByteVector binary(ByteVector xVec, ByteVector yVec, int vectorOpcode) {
474474 case Bytecode .VECTOR_I32X4_SUB -> binop (x , y , I32X4 , VectorOperators .SUB );
475475 case Bytecode .VECTOR_I32X4_MUL -> binop (x , y , I32X4 , VectorOperators .MUL );
476476 case Bytecode .VECTOR_I32X4_MIN_S -> binop (x , y , I32X4 , VectorOperators .MIN );
477- case Bytecode .VECTOR_I32X4_MIN_U -> binop ( x , y , I32X4 , VectorOperators . UMIN );
477+ case Bytecode .VECTOR_I32X4_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
478478 case Bytecode .VECTOR_I32X4_MAX_S -> binop (x , y , I32X4 , VectorOperators .MAX );
479- case Bytecode .VECTOR_I32X4_MAX_U -> binop ( x , y , I32X4 , VectorOperators . UMAX );
479+ case Bytecode .VECTOR_I32X4_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
480480 case Bytecode .VECTOR_I32X4_DOT_I16X8_S -> i32x4_dot_i16x8_s (x , y );
481481 case Bytecode .VECTOR_I32X4_EXTMUL_LOW_I16X8_S -> extmul (x , y , I16X8 , VectorOperators .S2I , 0 );
482482 case Bytecode .VECTOR_I32X4_EXTMUL_LOW_I16X8_U -> extmul (x , y , I16X8 , VectorOperators .ZERO_EXTEND_S2I , 0 );
@@ -537,7 +537,7 @@ public int vectorToInt(ByteVector xVec, int vectorOpcode) {
537537 case Bytecode .VECTOR_I16X8_BITMASK -> bitmask (x , I16X8 );
538538 case Bytecode .VECTOR_I32X4_ALL_TRUE -> all_true (x , I32X4 );
539539 case Bytecode .VECTOR_I32X4_BITMASK -> bitmask (x , I32X4 );
540- case Bytecode .VECTOR_I64X2_ALL_TRUE -> all_true ( x , I64X2 );
540+ case Bytecode .VECTOR_I64X2_ALL_TRUE -> fallbackOps . vectorToInt ( x . toArray (), vectorOpcode ); // GR-68893
541541 case Bytecode .VECTOR_I64X2_BITMASK -> bitmask (x , I64X2 );
542542 default -> throw CompilerDirectives .shouldNotReachHere ();
543543 };
@@ -747,6 +747,13 @@ private static <E> ByteVector unop(ByteVector xBytes, Shape<E> shape, VectorOper
747747 return result .reinterpretAsBytes ();
748748 }
749749
750+ private static ByteVector i8x16_popcnt (ByteVector x ) {
751+ // Based on the same approach as Integer#bitCount
752+ ByteVector popcnt = x .sub (x .lanewise (VectorOperators .LSHR , 1 ).and ((byte ) 0x55 ));
753+ popcnt = popcnt .and ((byte ) 0x33 ).add (popcnt .lanewise (VectorOperators .LSHR , 2 ).and ((byte ) 0x33 ));
754+ return popcnt .add (popcnt .lanewise (VectorOperators .LSHR , 4 )).and ((byte ) 0x0F );
755+ }
756+
750757 private static <E , F > ByteVector extadd_pairwise (ByteVector xBytes , Shape <E > shape , VectorOperators .Conversion <E , F > conv ) {
751758 Vector <E > x = shape .reinterpret (xBytes );
752759 Vector <F > evens = x .compress (shape .evensMask ).convert (conv , 0 );
@@ -889,6 +896,7 @@ private static ByteVector i32x4_trunc_sat_f32x4_u(ByteVector xBytes) {
889896 return result .reinterpretAsBytes ();
890897 }
891898
899+ @ SuppressWarnings ("unused" )
892900 private static ByteVector f32x4_convert_i32x4_u (ByteVector xBytes ) {
893901 IntVector x = xBytes .reinterpretAsInts ();
894902 LongVector xUnsignedLow = castLong128 (x .convert (VectorOperators .ZERO_EXTEND_I2L , 0 ));
@@ -915,6 +923,7 @@ private static ByteVector f64x2_convert_low_i32x4_u(ByteVector xBytes) {
915923 return result .reinterpretAsBytes ();
916924 }
917925
926+ @ SuppressWarnings ("unused" )
918927 private static ByteVector f32X4_demote_f64X2_zero (ByteVector xBytes ) {
919928 DoubleVector x = F64X2 .reinterpret (xBytes );
920929 Vector <Float > result = compactGeneral (x , 0 , I64X2 , F32X4 , VectorOperators .D2F , VectorOperators .REINTERPRET_F2I , VectorOperators .ZERO_EXTEND_I2L );
@@ -1018,7 +1027,7 @@ private static <E, F> ByteVector narrow(ByteVector xBytes, ByteVector yBytes, Sh
10181027 return result .reinterpretAsBytes ();
10191028 }
10201029
1021- private static <E , F > ByteVector binop_sat_u (ByteVector xBytes , ByteVector yBytes ,
1030+ private static <E , F > ByteVector binop_sat (ByteVector xBytes , ByteVector yBytes ,
10221031 Shape <E > shape , Shape <F > extendedShape ,
10231032 VectorOperators .Conversion <E , F > upcast ,
10241033 VectorOperators .Binary op , long min , long max ) {
@@ -1033,8 +1042,7 @@ private static <E, F> ByteVector avgr_u(ByteVector xBytes, ByteVector yBytes,
10331042 Shape <E > shape , Shape <F > extendedShape ,
10341043 VectorOperators .Conversion <E , F > upcast ) {
10351044 Vector <F > one = extendedShape .broadcast (1 );
1036- Vector <F > two = extendedShape .broadcast (2 );
1037- return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> x .add (y ).add (one ).div (two ));
1045+ return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> x .add (y ).add (one ).lanewise (VectorOperators .LSHR , 1 ));
10381046 }
10391047
10401048 private static ByteVector i16x8_q15mulr_sat_s (ByteVector xBytes , ByteVector yBytes ) {
0 commit comments