// SPDX-License-Identifier: GPL-2.0 // // piggyback.bpf.c — TC ingress + sk_lookup eBPF programs // // TC ingress: intercepts TCP packets on configured ports. If payload starts // with MAGIC bytes (or partial match in progress), steals the packet // (TC_ACT_STOLEN) and notifies userspace, or passes through (TC_ACT_OK). // // sk_lookup: steers new connections flagged by TC into the daemon's // SO_REUSEPORT socket instead of the application socket. // // Supports IPv4 + IPv6. Handles magic split across TCP segments via // per-connection LRU state map. #include #include #include #include #include #include #include #include #include #include // AF_ constants — cannot include glibc headers in BPF programs #define AF_INET 2 #define AF_INET6 10 // ── Configuration ───────────────────────────────────────────────────────────── #define LISTEN_PORT 80 // must match LISTEN_PORT in piggyback.c #define MAGIC "\xDE\xAD\xC0\xDE\xCA\xFE" #define MAGIC_LEN 6 // Signed header: 16 bytes fields + 64 bytes Ed25519 sig = 80 total // eBPF only checks structural validity; full Ed25519 verify is in userspace. #define HEADER_LEN 80 // ── Shared types ────────────────────────────────────────────────────────────── struct conn_key { __u8 src_ip[16]; // IPv4: first 4 bytes; IPv6: all 16 __u16 src_port; __u8 is_ipv6; __u8 pad; }; struct event { __u8 src_ip[16]; __u16 src_port; __u8 is_ipv6; __u8 flags; __u32 seq; __u32 ack_seq; __u8 header[HEADER_LEN]; __u8 header_valid; }; // ── Maps ────────────────────────────────────────────────────────────────────── struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 24); } events SEC(".maps"); // Per-connection magic match state: bytes matched so far (0..MAGIC_LEN) struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 4096); __type(key, struct conn_key); __type(value, __u8); } conn_state SEC(".maps"); // Connections awaiting sk_lookup steering: set by TC, cleared by sk_lookup struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 4096); __type(key, struct conn_key); __type(value, __u8); } pending SEC(".maps"); // Daemon's SO_REUSEPORT socket — populated by userspace after bind struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 1); __type(key, __u32); __type(value, __u32); } daemon_sock SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u64); } stats SEC(".maps"); #define STAT_TOTAL 0 #define STAT_MAGIC 1 #define STAT_PASSED 2 #define STAT_PARTIAL 3 static __always_inline void inc_stat(__u32 idx) { __u64 *v = bpf_map_lookup_elem(&stats, &idx); if (v) __sync_fetch_and_add(v, 1); } // ── Helpers ─────────────────────────────────────────────────────────────────── static __always_inline int port_watched(__u16 port_be) { return bpf_ntohs(port_be) == LISTEN_PORT; } static __always_inline __u8 magic_at(__u8 idx) { const __u8 magic[] = MAGIC; if (idx < MAGIC_LEN) return magic[idx]; return 0; } // ── TC ingress ──────────────────────────────────────────────────────────────── SEC("tc") int piggyback_ingress(struct __sk_buff *skb) { void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; inc_stat(STAT_TOTAL); struct ethhdr *eth = data; if ((void *)(eth + 1) > data_end) return TC_ACT_OK; __u8 is_ipv6 = 0; __u8 src_ip[16] = {}; void *transport; if (eth->h_proto == bpf_htons(ETH_P_IP)) { struct iphdr *ip = (void *)(eth + 1); if ((void *)(ip + 1) > data_end) return TC_ACT_OK; if (ip->protocol != IPPROTO_TCP) return TC_ACT_OK; __builtin_memcpy(src_ip, &ip->saddr, 4); transport = (void *)ip + ip->ihl * 4; } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct ipv6hdr *ip6 = (void *)(eth + 1); if ((void *)(ip6 + 1) > data_end) return TC_ACT_OK; if (ip6->nexthdr != IPPROTO_TCP) return TC_ACT_OK; __builtin_memcpy(src_ip, &ip6->saddr, 16); is_ipv6 = 1; transport = (void *)(ip6 + 1); } else { return TC_ACT_OK; } struct tcphdr *tcp = transport; if ((void *)(tcp + 1) > data_end) return TC_ACT_OK; if (!port_watched(tcp->dest)) return TC_ACT_OK; __u32 tcp_hlen = tcp->doff * 4; __u8 *payload = (__u8 *)tcp + tcp_hlen; if ((void *)payload > data_end) return TC_ACT_OK; __u32 payload_len = (__u8 *)data_end - payload; struct conn_key ck = {}; __builtin_memcpy(ck.src_ip, src_ip, 16); ck.src_port = tcp->source; ck.is_ipv6 = is_ipv6; // ── Multi-packet state machine ──────────────────────────────────────────── __u8 *state_p = bpf_map_lookup_elem(&conn_state, &ck); __u8 matched = state_p ? *state_p : 0; // Unrolled byte walk — verifier requires bounded, known iterations #define TRY_BYTE(i) \ if ((void *)(payload + (i) + 1) <= data_end) { \ __u8 b = payload[(i)]; \ if (b == magic_at(matched)) { \ matched++; \ } else { \ matched = (b == magic_at(0)) ? 1 : 0; \ } \ } TRY_BYTE(0) TRY_BYTE(1) TRY_BYTE(2) TRY_BYTE(3) TRY_BYTE(4) TRY_BYTE(5) #undef TRY_BYTE if (matched < MAGIC_LEN) { if (matched > 0) { bpf_map_update_elem(&conn_state, &ck, &matched, BPF_ANY); inc_stat(STAT_PARTIAL); } else { bpf_map_delete_elem(&conn_state, &ck); } goto pass; } // ── Magic matched ───────────────────────────────────────────────────────── bpf_map_delete_elem(&conn_state, &ck); { struct event *e = bpf_ringbuf_reserve(&events, sizeof(*e), 0); if (!e) return TC_ACT_OK; // ring buffer full — pass, don't drop silently __builtin_memcpy(e->src_ip, src_ip, 16); e->src_port = tcp->source; e->is_ipv6 = is_ipv6; e->flags = ((__u8 *)tcp)[13]; e->seq = tcp->seq; e->ack_seq = tcp->ack_seq; e->header_valid = 0; // Capture signed header bytes after magic for userspace Ed25519 verify if (payload_len >= MAGIC_LEN + HEADER_LEN) { __u32 hdr_offset = (__u8 *)payload - (__u8 *)data + MAGIC_LEN; if (bpf_skb_load_bytes(skb, hdr_offset, e->header, HEADER_LEN) == 0) e->header_valid = 1; } // Mark connection pending for sk_lookup steering __u8 one = 1; bpf_map_update_elem(&pending, &ck, &one, BPF_ANY); bpf_ringbuf_submit(e, 0); inc_stat(STAT_MAGIC); return TC_ACT_STOLEN; } pass: inc_stat(STAT_PASSED); return TC_ACT_OK; } // ── sk_lookup ───────────────────────────────────────────────────────────────── // // Runs when kernel looks up a socket for an incoming connection. // If the connection is in pending map (flagged by TC), redirect to daemon socket. SEC("sk_lookup") int piggyback_lookup(struct bpf_sk_lookup *ctx) { struct conn_key ck = {}; if (ctx->family == AF_INET) { __builtin_memcpy(ck.src_ip, &ctx->remote_ip4, 4); ck.is_ipv6 = 0; } else if (ctx->family == AF_INET6) { __builtin_memcpy(ck.src_ip, ctx->remote_ip6, 16); ck.is_ipv6 = 1; } else { return SK_PASS; } // remote_port in bpf_sk_lookup is __be16 (network byte order) ck.src_port = ctx->remote_port; __u8 *p = bpf_map_lookup_elem(&pending, &ck); if (!p) return SK_PASS; __u32 key = 0; struct bpf_sock *sk = bpf_map_lookup_elem(&daemon_sock, &key); if (!sk) return SK_PASS; long ret = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); if (ret == 0) bpf_map_delete_elem(&pending, &ck); // SK_PASS after bpf_sk_assign means "use the assigned socket" return SK_PASS; } char LICENSE[] SEC("license") = "GPL";