Skip to content

Commit 8544737

Browse files
committed
bpf: Workaround for netkit + L7 policy redirect failure
The previous changed our local delivery logic to only enforce policies at the source if we also do a bpf_redirect_peer. If we're not doing a bpf_redirect_peer to send the packet to its destination pod (and endpoint routes are enabled), then there's no need to enforce policies at the source: the packet will already go through ingress policy enforcement at the bpf_lxc program. This change however breaks L7 policy enforcement in case of netkit devices. In case of L7 policies, we need to redirect packets from the bpf_lxc ingress program to bpf_host@cilium_net to prepare for a redirect to the userspace proxy. That redirection to cilium_net fails and the packet is dropped. The following pwru trace shows this happening, with the source bpf_lxc program marked as 'x' and the destination bpf_lxc program marked as '*'. NETNS MARK/x IFACE FUNC x 4026535635 0 ~c8d066b3393b5:7 skb_ensure_writable x 4026535635 0 ~c8d066b3393b5:7 skb_ensure_writable x 4026535635 0 ~c8d066b3393b5:7 skb_ensure_writable x 4026535635 0 ~c8d066b3393b5:7 skb_ensure_writable x 4026535635 0 ~c8d066b3393b5:7 skb_ensure_writable x 4026535635 1a670f00 ~c8d066b3393b5:7 skb_do_redirect 4026535635 1a670f00 ~c8d066b3393b5:7 __bpf_redirect 4026535635 1a670f00 ~c504d1e733445:9 __dev_queue_xmit 4026535635 1a670f00 ~c504d1e733445:9 qdisc_pkt_len_init 4026535635 1a670f00 ~c504d1e733445:9 netdev_core_pick_tx 4026535635 1a670f00 ~c504d1e733445:9 validate_xmit_skb 4026535635 1a670f00 ~c504d1e733445:9 netif_skb_features 4026535635 1a670f00 ~c504d1e733445:9 passthru_features_check 4026535635 1a670f00 ~c504d1e733445:9 skb_network_protocol 4026535635 1a670f00 ~c504d1e733445:9 skb_csum_hwoffload_help 4026535635 1a670f00 ~c504d1e733445:9 validate_xmit_xfrm 4026535635 1a670f00 ~c504d1e733445:9 dev_hard_start_xmit 4026535635 1a670f00 ~c504d1e733445:9 netkit_xmit 4026535635 1a670f00 ~c504d1e733445:9 skb_scrub_packet * 4026536207 0 eth0:8 skb_ensure_writable * 4026536207 0 eth0:8 skb_ensure_writable * 4026536207 0 eth0:8 skb_ensure_writable * 4026536207 0 eth0:8 skb_ensure_writable * 4026536207 1330200 eth0:8 skb_do_redirect 4026536207 1330200 eth0:8 sk_skb_reason_drop(SKB_DROP_REASON_NOT_SPECIFIED) Because we're using netkit devices, our source bpf_lxc program performs a normal bpf_redirect to the destination lxc interface. We see the packet navigate from the source lxc device (~c8d066b3393b5:7) to the destination one (~c504d1e733445:9). It then traverse to the peer device in the pod netns (cf. call to netkit_xmit) [1], after which point the destination bpf_lxc program is executed [2]. We can see the last skb_do_redirect call is for the proxy preparation because the packet carries the MARK_MAGIC_TO_PROXY mark (0x200). That redirect fails because we're in the pod netns and the destination ifindex cannot be found [3]. Preventing this will require a fix upstream to perform the netkit netns switch after we run the BPF program. In the meantime, we need a workaround for netkit + L7 policies. This commit simply reverts the change from the previous commit for the specific case of netkit devices. 1 - https://elixir.bootlin.com/linux/v6.13.4/source/drivers/net/netkit.c#L99 2 - https://elixir.bootlin.com/linux/v6.13.4/source/drivers/net/netkit.c#L102 3 - https://elixir.bootlin.com/linux/v6.13.4/source/net/core/filter.c#L2489 Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
1 parent 94fe32e commit 8544737

6 files changed

Lines changed: 33 additions & 5 deletions

File tree

bpf/lib/local_delivery.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include "l3.h"
99
#include "token_bucket.h"
1010

11+
DECLARE_CONFIG(bool, enable_netkit, "Use netkit devices for pods")
12+
1113
/* Global map to jump into policy enforcement of sending endpoint */
1214
struct {
1315
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -146,7 +148,12 @@ local_delivery(struct __ctx_buff *ctx, __u32 seclabel,
146148
* policy (the cil_to_container BPF program) is bypassed.
147149
*/
148150
use_fast_redirect = should_fast_redirect(ctx, from_host);
149-
if (is_defined(USE_BPF_PROG_FOR_INGRESS_POLICY) && !use_fast_redirect) {
151+
if (is_defined(USE_BPF_PROG_FOR_INGRESS_POLICY) && !use_fast_redirect &&
152+
/* We need to enforce policies at the source in case of netkit
153+
* devices because we can't redirect to proxy from bpf_lxc. That
154+
* needs a fix upstream.
155+
*/
156+
(!CONFIG(enable_netkit) || ctx_get_ingress_ifindex(ctx) > 0)) {
150157
set_identity_mark(ctx, seclabel, magic);
151158

152159
# if !defined(ENABLE_NODEPORT)

pkg/datapath/config/host_config.go

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/datapath/config/lxc_config.go

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/datapath/config/overlay_config.go

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/datapath/config/wireguard_config.go

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/datapath/loader/loader.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/cilium/cilium/pkg/datapath/linux/safenetlink"
3030
"github.com/cilium/cilium/pkg/datapath/linux/sysctl"
3131
"github.com/cilium/cilium/pkg/datapath/loader/metrics"
32+
datapathOption "github.com/cilium/cilium/pkg/datapath/option"
3233
"github.com/cilium/cilium/pkg/datapath/tables"
3334
datapath "github.com/cilium/cilium/pkg/datapath/types"
3435
"github.com/cilium/cilium/pkg/defaults"
@@ -266,6 +267,8 @@ func netdevRewrites(ep datapath.EndpointConfiguration, lnc *datapath.LocalNodeCo
266267
cfg.EnableExtendedIPProtocols = option.Config.EnableExtendedIPProtocols
267268
cfg.HostEpID = uint16(lnc.HostEndpointID)
268269
cfg.EnableNoServiceEndpointsRoutable = lnc.SvcRouteConfig.EnableNoServiceEndpointsRoutable
270+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
271+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
269272

270273
if lnc.EnableWireguard {
271274
cfg.WgIfindex = lnc.WireguardIfIndex
@@ -417,6 +420,8 @@ func ciliumHostRewrites(ep datapath.EndpointConfiguration, lnc *datapath.LocalNo
417420
cfg.SecurityLabel = ep.GetIdentity().Uint32()
418421

419422
cfg.HostEpID = uint16(lnc.HostEndpointID)
423+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
424+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
420425

421426
if lnc.EnableWireguard {
422427
cfg.WgIfindex = lnc.WireguardIfIndex
@@ -501,6 +506,8 @@ func ciliumNetRewrites(ep datapath.EndpointConfiguration, lnc *datapath.LocalNod
501506

502507
cfg.EnableExtendedIPProtocols = option.Config.EnableExtendedIPProtocols
503508
cfg.EnableNoServiceEndpointsRoutable = lnc.SvcRouteConfig.EnableNoServiceEndpointsRoutable
509+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
510+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
504511

505512
ifindex := link.Attrs().Index
506513
cfg.InterfaceIfindex = uint32(ifindex)
@@ -679,6 +686,8 @@ func endpointRewrites(ep datapath.EndpointConfiguration, lnc *datapath.LocalNode
679686

680687
cfg.HostEpID = uint16(lnc.HostEndpointID)
681688
cfg.EnableNoServiceEndpointsRoutable = lnc.SvcRouteConfig.EnableNoServiceEndpointsRoutable
689+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
690+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
682691

683692
if option.Config.EnableVTEP {
684693
cfg.VtepMask = byteorder.NetIPv4ToHost32(net.IP(option.Config.VtepCidrMask))
@@ -797,6 +806,8 @@ func replaceOverlayDatapath(ctx context.Context, logger *slog.Logger, lnc *datap
797806

798807
cfg.EnableExtendedIPProtocols = option.Config.EnableExtendedIPProtocols
799808
cfg.EnableNoServiceEndpointsRoutable = lnc.SvcRouteConfig.EnableNoServiceEndpointsRoutable
809+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
810+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
800811

801812
if option.Config.EnableVTEP {
802813
cfg.VtepMask = byteorder.NetIPv4ToHost32(net.IP(option.Config.VtepCidrMask))
@@ -852,6 +863,8 @@ func replaceWireguardDatapath(ctx context.Context, logger *slog.Logger, lnc *dat
852863
}
853864

854865
cfg.EnableExtendedIPProtocols = option.Config.EnableExtendedIPProtocols
866+
cfg.EnableNetkit = option.Config.DatapathMode == datapathOption.DatapathModeNetkit ||
867+
option.Config.DatapathMode == datapathOption.DatapathModeNetkitL2
855868

856869
var obj wireguardObjects
857870
commit, err := bpf.LoadAndAssign(logger, &obj, spec, &bpf.CollectionOptions{

0 commit comments

Comments
 (0)