@@ -325,6 +325,22 @@ struct jit_context {
325325/* Number of bytes that will be skipped on tailcall */
326326#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
327327
328+ static void push_r9 (u8 * * pprog )
329+ {
330+ u8 * prog = * pprog ;
331+
332+ EMIT2 (0x41 , 0x51 ); /* push r9 */
333+ * pprog = prog ;
334+ }
335+
336+ static void pop_r9 (u8 * * pprog )
337+ {
338+ u8 * prog = * pprog ;
339+
340+ EMIT2 (0x41 , 0x59 ); /* pop r9 */
341+ * pprog = prog ;
342+ }
343+
328344static void push_r12 (u8 * * pprog )
329345{
330346 u8 * prog = * pprog ;
@@ -1404,6 +1420,24 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
14041420 * pprog = prog ;
14051421}
14061422
1423+ static void emit_priv_frame_ptr (u8 * * pprog , void __percpu * priv_frame_ptr )
1424+ {
1425+ u8 * prog = * pprog ;
1426+
1427+ /* movabs r9, priv_frame_ptr */
1428+ emit_mov_imm64 (& prog , X86_REG_R9 , (__force long ) priv_frame_ptr >> 32 ,
1429+ (u32 ) (__force long ) priv_frame_ptr );
1430+
1431+ #ifdef CONFIG_SMP
1432+ /* add <r9>, gs:[<off>] */
1433+ EMIT2 (0x65 , 0x4c );
1434+ EMIT3 (0x03 , 0x0c , 0x25 );
1435+ EMIT ((u32 )(unsigned long )& this_cpu_off , 4 );
1436+ #endif
1437+
1438+ * pprog = prog ;
1439+ }
1440+
14071441#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
14081442
14091443#define __LOAD_TCC_PTR (off ) \
@@ -1412,6 +1446,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
14121446#define LOAD_TAIL_CALL_CNT_PTR (stack ) \
14131447 __LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
14141448
1449+ /* Memory size/value to protect private stack overflow/underflow */
1450+ #define PRIV_STACK_GUARD_SZ 8
1451+ #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
1452+
14151453static int do_jit (struct bpf_prog * bpf_prog , int * addrs , u8 * image , u8 * rw_image ,
14161454 int oldproglen , struct jit_context * ctx , bool jmp_padding )
14171455{
@@ -1421,14 +1459,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
14211459 int insn_cnt = bpf_prog -> len ;
14221460 bool seen_exit = false;
14231461 u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
1462+ void __percpu * priv_frame_ptr = NULL ;
14241463 u64 arena_vm_start , user_vm_start ;
1464+ void __percpu * priv_stack_ptr ;
14251465 int i , excnt = 0 ;
14261466 int ilen , proglen = 0 ;
14271467 u8 * prog = temp ;
14281468 u32 stack_depth ;
14291469 int err ;
14301470
14311471 stack_depth = bpf_prog -> aux -> stack_depth ;
1472+ priv_stack_ptr = bpf_prog -> aux -> priv_stack_ptr ;
1473+ if (priv_stack_ptr ) {
1474+ priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up (stack_depth , 8 );
1475+ stack_depth = 0 ;
1476+ }
14321477
14331478 arena_vm_start = bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena );
14341479 user_vm_start = bpf_arena_get_user_vm_start (bpf_prog -> aux -> arena );
@@ -1457,6 +1502,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
14571502 emit_mov_imm64 (& prog , X86_REG_R12 ,
14581503 arena_vm_start >> 32 , (u32 ) arena_vm_start );
14591504
1505+ if (priv_frame_ptr )
1506+ emit_priv_frame_ptr (& prog , priv_frame_ptr );
1507+
14601508 ilen = prog - temp ;
14611509 if (rw_image )
14621510 memcpy (rw_image + proglen , temp , ilen );
@@ -1476,6 +1524,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
14761524 u8 * func ;
14771525 int nops ;
14781526
1527+ if (priv_frame_ptr ) {
1528+ if (src_reg == BPF_REG_FP )
1529+ src_reg = X86_REG_R9 ;
1530+
1531+ if (dst_reg == BPF_REG_FP )
1532+ dst_reg = X86_REG_R9 ;
1533+ }
1534+
14791535 switch (insn -> code ) {
14801536 /* ALU */
14811537 case BPF_ALU | BPF_ADD | BPF_X :
@@ -2136,9 +2192,15 @@ st: if (is_imm8(insn->off))
21362192 }
21372193 if (!imm32 )
21382194 return - EINVAL ;
2195+ if (priv_frame_ptr ) {
2196+ push_r9 (& prog );
2197+ ip += 2 ;
2198+ }
21392199 ip += x86_call_depth_emit_accounting (& prog , func , ip );
21402200 if (emit_call (& prog , func , ip ))
21412201 return - EINVAL ;
2202+ if (priv_frame_ptr )
2203+ pop_r9 (& prog );
21422204 break ;
21432205 }
21442206
@@ -3306,6 +3368,42 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
33063368 return emit_bpf_dispatcher (& prog , 0 , num_funcs - 1 , funcs , image , buf );
33073369}
33083370
3371+ static const char * bpf_get_prog_name (struct bpf_prog * prog )
3372+ {
3373+ if (prog -> aux -> ksym .prog )
3374+ return prog -> aux -> ksym .name ;
3375+ return prog -> aux -> name ;
3376+ }
3377+
3378+ static void priv_stack_init_guard (void __percpu * priv_stack_ptr , int alloc_size )
3379+ {
3380+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
3381+ u64 * stack_ptr ;
3382+
3383+ for_each_possible_cpu (cpu ) {
3384+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
3385+ stack_ptr [0 ] = PRIV_STACK_GUARD_VAL ;
3386+ stack_ptr [underflow_idx ] = PRIV_STACK_GUARD_VAL ;
3387+ }
3388+ }
3389+
3390+ static void priv_stack_check_guard (void __percpu * priv_stack_ptr , int alloc_size ,
3391+ struct bpf_prog * prog )
3392+ {
3393+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
3394+ u64 * stack_ptr ;
3395+
3396+ for_each_possible_cpu (cpu ) {
3397+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
3398+ if (stack_ptr [0 ] != PRIV_STACK_GUARD_VAL ||
3399+ stack_ptr [underflow_idx ] != PRIV_STACK_GUARD_VAL ) {
3400+ pr_err ("BPF private stack overflow/underflow detected for prog %sx\n" ,
3401+ bpf_get_prog_name (prog ));
3402+ break ;
3403+ }
3404+ }
3405+ }
3406+
33093407struct x64_jit_data {
33103408 struct bpf_binary_header * rw_header ;
33113409 struct bpf_binary_header * header ;
@@ -3323,7 +3421,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
33233421 struct bpf_binary_header * rw_header = NULL ;
33243422 struct bpf_binary_header * header = NULL ;
33253423 struct bpf_prog * tmp , * orig_prog = prog ;
3424+ void __percpu * priv_stack_ptr = NULL ;
33263425 struct x64_jit_data * jit_data ;
3426+ int priv_stack_alloc_sz ;
33273427 int proglen , oldproglen = 0 ;
33283428 struct jit_context ctx = {};
33293429 bool tmp_blinded = false;
@@ -3359,6 +3459,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
33593459 }
33603460 prog -> aux -> jit_data = jit_data ;
33613461 }
3462+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3463+ if (!priv_stack_ptr && prog -> aux -> jits_use_priv_stack ) {
3464+ /* Allocate actual private stack size with verifier-calculated
3465+ * stack size plus two memory guards to protect overflow and
3466+ * underflow.
3467+ */
3468+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 8 ) +
3469+ 2 * PRIV_STACK_GUARD_SZ ;
3470+ priv_stack_ptr = __alloc_percpu_gfp (priv_stack_alloc_sz , 8 , GFP_KERNEL );
3471+ if (!priv_stack_ptr ) {
3472+ prog = orig_prog ;
3473+ goto out_priv_stack ;
3474+ }
3475+
3476+ priv_stack_init_guard (priv_stack_ptr , priv_stack_alloc_sz );
3477+ prog -> aux -> priv_stack_ptr = priv_stack_ptr ;
3478+ }
33623479 addrs = jit_data -> addrs ;
33633480 if (addrs ) {
33643481 ctx = jit_data -> ctx ;
@@ -3494,6 +3611,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
34943611 bpf_prog_fill_jited_linfo (prog , addrs + 1 );
34953612out_addrs :
34963613 kvfree (addrs );
3614+ if (!image && priv_stack_ptr ) {
3615+ free_percpu (priv_stack_ptr );
3616+ prog -> aux -> priv_stack_ptr = NULL ;
3617+ }
3618+ out_priv_stack :
34973619 kfree (jit_data );
34983620 prog -> aux -> jit_data = NULL ;
34993621 }
@@ -3532,6 +3654,8 @@ void bpf_jit_free(struct bpf_prog *prog)
35323654 if (prog -> jited ) {
35333655 struct x64_jit_data * jit_data = prog -> aux -> jit_data ;
35343656 struct bpf_binary_header * hdr ;
3657+ void __percpu * priv_stack_ptr ;
3658+ int priv_stack_alloc_sz ;
35353659
35363660 /*
35373661 * If we fail the final pass of JIT (from jit_subprogs),
@@ -3547,6 +3671,13 @@ void bpf_jit_free(struct bpf_prog *prog)
35473671 prog -> bpf_func = (void * )prog -> bpf_func - cfi_get_offset ();
35483672 hdr = bpf_jit_binary_pack_hdr (prog );
35493673 bpf_jit_binary_pack_free (hdr , NULL );
3674+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3675+ if (priv_stack_ptr ) {
3676+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 8 ) +
3677+ 2 * PRIV_STACK_GUARD_SZ ;
3678+ priv_stack_check_guard (priv_stack_ptr , priv_stack_alloc_sz , prog );
3679+ free_percpu (prog -> aux -> priv_stack_ptr );
3680+ }
35503681 WARN_ON_ONCE (!bpf_prog_kallsyms_verify_off (prog ));
35513682 }
35523683
@@ -3562,6 +3693,11 @@ bool bpf_jit_supports_exceptions(void)
35623693 return IS_ENABLED (CONFIG_UNWINDER_ORC );
35633694}
35643695
3696+ bool bpf_jit_supports_private_stack (void )
3697+ {
3698+ return true;
3699+ }
3700+
35653701void arch_bpf_stack_walk (bool (* consume_fn )(void * cookie , u64 ip , u64 sp , u64 bp ), void * cookie )
35663702{
35673703#if defined(CONFIG_UNWINDER_ORC )
0 commit comments