@@ -3379,15 +3379,16 @@ static void ggml_vec_dot_turbo3_0_f32(int n, float * GGML_RESTRICT s, size_t bs,
33793379 GGML_UNUSED (bs ); GGML_UNUSED (bx ); GGML_UNUSED (by ); GGML_UNUSED (nrc );
33803380
33813381 // Dequantize turbo3 to f32 temp buffer, then dot
3382- float tmp [ 4096 ]; // max head_dim
3383- GGML_ASSERT (n <= 4096 );
3382+ float * tmp = ( float * ) malloc ( n * sizeof ( float ));
3383+ GGML_ASSERT (tmp != NULL );
33843384 ggml_get_type_traits (GGML_TYPE_TURBO3_0 )-> to_float (vx , tmp , n );
33853385
33863386 const float * y = (const float * )vy ;
33873387 float sum = 0.0f ;
33883388 for (int i = 0 ; i < n ; i ++ ) {
33893389 sum += tmp [i ] * y [i ];
33903390 }
3391+ free (tmp );
33913392 * s = sum ;
33923393}
33933394
@@ -3398,15 +3399,16 @@ static void ggml_vec_dot_turbo2_0_f32(int n, float * GGML_RESTRICT s, size_t bs,
33983399 GGML_ASSERT (nrc == 1 );
33993400 GGML_UNUSED (bs ); GGML_UNUSED (bx ); GGML_UNUSED (by ); GGML_UNUSED (nrc );
34003401
3401- float tmp [ 4096 ] ;
3402- GGML_ASSERT (n <= 4096 );
3402+ float * tmp = ( float * ) malloc ( n * sizeof ( float )) ;
3403+ GGML_ASSERT (tmp != NULL );
34033404 ggml_get_type_traits (GGML_TYPE_TURBO2_0 )-> to_float (vx , tmp , n );
34043405
34053406 const float * y = (const float * )vy ;
34063407 float sum = 0.0f ;
34073408 for (int i = 0 ; i < n ; i ++ ) {
34083409 sum += tmp [i ] * y [i ];
34093410 }
3411+ free (tmp );
34103412 * s = sum ;
34113413}
34123414
@@ -3417,15 +3419,16 @@ static void ggml_vec_dot_turbo4_0_f32(int n, float * GGML_RESTRICT s, size_t bs,
34173419 GGML_ASSERT (nrc == 1 );
34183420 GGML_UNUSED (bs ); GGML_UNUSED (bx ); GGML_UNUSED (by ); GGML_UNUSED (nrc );
34193421
3420- float tmp [ 4096 ] ;
3421- GGML_ASSERT (n <= 4096 );
3422+ float * tmp = ( float * ) malloc ( n * sizeof ( float )) ;
3423+ GGML_ASSERT (tmp != NULL );
34223424 ggml_get_type_traits (GGML_TYPE_TURBO4_0 )-> to_float (vx , tmp , n );
34233425
34243426 const float * y = (const float * )vy ;
34253427 float sum = 0.0f ;
34263428 for (int i = 0 ; i < n ; i ++ ) {
34273429 sum += tmp [i ] * y [i ];
34283430 }
3431+ free (tmp );
34293432 * s = sum ;
34303433}
34313434
@@ -3437,18 +3440,21 @@ static void ggml_vec_dot_tq3_1s_q8_0(int n, float * GGML_RESTRICT s, size_t bs,
34373440 GGML_ASSERT (nrc == 1 );
34383441 GGML_UNUSED (bs ); GGML_UNUSED (bx ); GGML_UNUSED (by ); GGML_UNUSED (nrc );
34393442
3440- float tmp [ 4096 ] ;
3441- GGML_ASSERT (n <= 4096 );
3443+ float * tmp = ( float * ) malloc ( n * sizeof ( float )) ;
3444+ GGML_ASSERT (tmp != NULL );
34423445 ggml_get_type_traits (GGML_TYPE_TQ3_1S )-> to_float (vx , tmp , n );
34433446
34443447 // Dequantize q8_0 and dot
3445- float tmp2 [4096 ];
3448+ float * tmp2 = (float * )malloc (n * sizeof (float ));
3449+ GGML_ASSERT (tmp2 != NULL );
34463450 ggml_get_type_traits (GGML_TYPE_Q8_0 )-> to_float (vy , tmp2 , n );
34473451
34483452 float sum = 0.0f ;
34493453 for (int i = 0 ; i < n ; i ++ ) {
34503454 sum += tmp [i ] * tmp2 [i ];
34513455 }
3456+ free (tmp );
3457+ free (tmp2 );
34523458 * s = sum ;
34533459}
34543460
@@ -3460,17 +3466,20 @@ static void ggml_vec_dot_tq4_1s_q8_0(int n, float * GGML_RESTRICT s, size_t bs,
34603466 GGML_ASSERT (nrc == 1 );
34613467 GGML_UNUSED (bs ); GGML_UNUSED (bx ); GGML_UNUSED (by ); GGML_UNUSED (nrc );
34623468
3463- float tmp [ 4096 ] ;
3464- GGML_ASSERT (n <= 4096 );
3469+ float * tmp = ( float * ) malloc ( n * sizeof ( float )) ;
3470+ GGML_ASSERT (tmp != NULL );
34653471 ggml_get_type_traits (GGML_TYPE_TQ4_1S )-> to_float (vx , tmp , n );
34663472
3467- float tmp2 [4096 ];
3473+ float * tmp2 = (float * )malloc (n * sizeof (float ));
3474+ GGML_ASSERT (tmp2 != NULL );
34683475 ggml_get_type_traits (GGML_TYPE_Q8_0 )-> to_float (vy , tmp2 , n );
34693476
34703477 float sum = 0.0f ;
34713478 for (int i = 0 ; i < n ; i ++ ) {
34723479 sum += tmp [i ] * tmp2 [i ];
34733480 }
3481+ free (tmp );
3482+ free (tmp2 );
34743483 * s = sum ;
34753484}
34763485
0 commit comments