286 lines
9.2 KiB
C
286 lines
9.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
//
|
|
// piggyback.bpf.c — TC ingress + sk_lookup eBPF programs
|
|
//
|
|
// TC ingress: intercepts TCP packets on configured ports. If payload starts
|
|
// with MAGIC bytes (or partial match in progress), steals the packet
|
|
// (TC_ACT_STOLEN) and notifies userspace, or passes through (TC_ACT_OK).
|
|
//
|
|
// sk_lookup: steers new connections flagged by TC into the daemon's
|
|
// SO_REUSEPORT socket instead of the application socket.
|
|
//
|
|
// Supports IPv4 + IPv6. Handles magic split across TCP segments via
|
|
// per-connection LRU state map.
|
|
|
|
#include <linux/bpf.h>
|
|
#include <linux/pkt_cls.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/in.h>
|
|
#include <linux/in6.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_endian.h>
|
|
|
|
// AF_ constants — cannot include glibc headers in BPF programs
|
|
#define AF_INET 2
|
|
#define AF_INET6 10
|
|
|
|
// ── Configuration ─────────────────────────────────────────────────────────────
|
|
|
|
#define PORTS { 80, 8080 }
|
|
#define PORTS_N 2
|
|
|
|
#define MAGIC "\xDE\xAD\xC0\xDE\xCA\xFE"
|
|
#define MAGIC_LEN 6
|
|
|
|
// Signed header: 16 bytes fields + 64 bytes Ed25519 sig = 80 total
|
|
// eBPF only checks structural validity; full Ed25519 verify is in userspace.
|
|
#define HEADER_LEN 80
|
|
|
|
// ── Shared types ──────────────────────────────────────────────────────────────
|
|
|
|
struct conn_key {
|
|
__u8 src_ip[16]; // IPv4: first 4 bytes; IPv6: all 16
|
|
__u16 src_port;
|
|
__u8 is_ipv6;
|
|
__u8 pad;
|
|
};
|
|
|
|
struct event {
|
|
__u8 src_ip[16];
|
|
__u16 src_port;
|
|
__u8 is_ipv6;
|
|
__u8 flags;
|
|
__u32 seq;
|
|
__u32 ack_seq;
|
|
__u8 header[HEADER_LEN];
|
|
__u8 header_valid;
|
|
};
|
|
|
|
// ── Maps ──────────────────────────────────────────────────────────────────────
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
|
__uint(max_entries, 1 << 24);
|
|
} events SEC(".maps");
|
|
|
|
// Per-connection magic match state: bytes matched so far (0..MAGIC_LEN)
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_LRU_HASH);
|
|
__uint(max_entries, 4096);
|
|
__type(key, struct conn_key);
|
|
__type(value, __u8);
|
|
} conn_state SEC(".maps");
|
|
|
|
// Connections awaiting sk_lookup steering: set by TC, cleared by sk_lookup
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_LRU_HASH);
|
|
__uint(max_entries, 4096);
|
|
__type(key, struct conn_key);
|
|
__type(value, __u8);
|
|
} pending SEC(".maps");
|
|
|
|
// Daemon's SO_REUSEPORT socket — populated by userspace after bind
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
|
__uint(max_entries, 1);
|
|
__type(key, __u32);
|
|
__type(value, __u32);
|
|
} daemon_sock SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
|
__uint(max_entries, 4);
|
|
__type(key, __u32);
|
|
__type(value, __u64);
|
|
} stats SEC(".maps");
|
|
|
|
#define STAT_TOTAL 0
|
|
#define STAT_MAGIC 1
|
|
#define STAT_PASSED 2
|
|
#define STAT_PARTIAL 3
|
|
|
|
static __always_inline void inc_stat(__u32 idx) {
|
|
__u64 *v = bpf_map_lookup_elem(&stats, &idx);
|
|
if (v) __sync_fetch_and_add(v, 1);
|
|
}
|
|
|
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
static __always_inline int port_watched(__u16 port_be) {
|
|
__u16 ports[] = PORTS;
|
|
__u16 p = bpf_ntohs(port_be);
|
|
if (PORTS_N > 0 && p == ports[0]) return 1;
|
|
if (PORTS_N > 1 && p == ports[1]) return 1;
|
|
if (PORTS_N > 2 && p == ports[2]) return 1;
|
|
if (PORTS_N > 3 && p == ports[3]) return 1;
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline __u8 magic_at(__u8 idx) {
|
|
const __u8 magic[] = MAGIC;
|
|
if (idx < MAGIC_LEN) return magic[idx];
|
|
return 0;
|
|
}
|
|
|
|
// ── TC ingress ────────────────────────────────────────────────────────────────
|
|
|
|
SEC("tc")
|
|
int piggyback_ingress(struct __sk_buff *skb) {
|
|
void *data = (void *)(long)skb->data;
|
|
void *data_end = (void *)(long)skb->data_end;
|
|
|
|
inc_stat(STAT_TOTAL);
|
|
|
|
struct ethhdr *eth = data;
|
|
if ((void *)(eth + 1) > data_end) return TC_ACT_OK;
|
|
|
|
__u8 is_ipv6 = 0;
|
|
__u8 src_ip[16] = {};
|
|
void *transport;
|
|
|
|
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
|
|
struct iphdr *ip = (void *)(eth + 1);
|
|
if ((void *)(ip + 1) > data_end) return TC_ACT_OK;
|
|
if (ip->protocol != IPPROTO_TCP) return TC_ACT_OK;
|
|
__builtin_memcpy(src_ip, &ip->saddr, 4);
|
|
transport = (void *)ip + ip->ihl * 4;
|
|
|
|
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
|
|
struct ipv6hdr *ip6 = (void *)(eth + 1);
|
|
if ((void *)(ip6 + 1) > data_end) return TC_ACT_OK;
|
|
if (ip6->nexthdr != IPPROTO_TCP) return TC_ACT_OK;
|
|
__builtin_memcpy(src_ip, &ip6->saddr, 16);
|
|
is_ipv6 = 1;
|
|
transport = (void *)(ip6 + 1);
|
|
} else {
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
struct tcphdr *tcp = transport;
|
|
if ((void *)(tcp + 1) > data_end) return TC_ACT_OK;
|
|
if (!port_watched(tcp->dest)) return TC_ACT_OK;
|
|
|
|
__u32 tcp_hlen = tcp->doff * 4;
|
|
__u8 *payload = (__u8 *)tcp + tcp_hlen;
|
|
if ((void *)payload > data_end) return TC_ACT_OK;
|
|
__u32 payload_len = (__u8 *)data_end - payload;
|
|
|
|
struct conn_key ck = {};
|
|
__builtin_memcpy(ck.src_ip, src_ip, 16);
|
|
ck.src_port = tcp->source;
|
|
ck.is_ipv6 = is_ipv6;
|
|
|
|
// ── Multi-packet state machine ────────────────────────────────────────────
|
|
__u8 *state_p = bpf_map_lookup_elem(&conn_state, &ck);
|
|
__u8 matched = state_p ? *state_p : 0;
|
|
|
|
// Unrolled byte walk — verifier requires bounded, known iterations
|
|
#define TRY_BYTE(i) \
|
|
if ((void *)(payload + (i) + 1) <= data_end) { \
|
|
__u8 b = payload[(i)]; \
|
|
if (b == magic_at(matched)) { \
|
|
matched++; \
|
|
} else { \
|
|
matched = (b == magic_at(0)) ? 1 : 0; \
|
|
} \
|
|
}
|
|
|
|
TRY_BYTE(0)
|
|
TRY_BYTE(1)
|
|
TRY_BYTE(2)
|
|
TRY_BYTE(3)
|
|
TRY_BYTE(4)
|
|
TRY_BYTE(5)
|
|
|
|
#undef TRY_BYTE
|
|
|
|
if (matched < MAGIC_LEN) {
|
|
if (matched > 0) {
|
|
bpf_map_update_elem(&conn_state, &ck, &matched, BPF_ANY);
|
|
inc_stat(STAT_PARTIAL);
|
|
} else {
|
|
bpf_map_delete_elem(&conn_state, &ck);
|
|
}
|
|
goto pass;
|
|
}
|
|
|
|
// ── Magic matched ─────────────────────────────────────────────────────────
|
|
bpf_map_delete_elem(&conn_state, &ck);
|
|
|
|
{
|
|
struct event *e = bpf_ringbuf_reserve(&events, sizeof(*e), 0);
|
|
if (!e) return TC_ACT_OK; // ring buffer full — pass, don't drop silently
|
|
|
|
__builtin_memcpy(e->src_ip, src_ip, 16);
|
|
e->src_port = tcp->source;
|
|
e->is_ipv6 = is_ipv6;
|
|
e->flags = ((__u8 *)tcp)[13];
|
|
e->seq = tcp->seq;
|
|
e->ack_seq = tcp->ack_seq;
|
|
e->header_valid = 0;
|
|
|
|
// Capture signed header bytes after magic for userspace Ed25519 verify
|
|
if (payload_len >= MAGIC_LEN + HEADER_LEN) {
|
|
__u32 hdr_offset = (__u8 *)payload - (__u8 *)data + MAGIC_LEN;
|
|
if (bpf_skb_load_bytes(skb, hdr_offset, e->header, HEADER_LEN) == 0)
|
|
e->header_valid = 1;
|
|
}
|
|
|
|
// Mark connection pending for sk_lookup steering
|
|
__u8 one = 1;
|
|
bpf_map_update_elem(&pending, &ck, &one, BPF_ANY);
|
|
|
|
bpf_ringbuf_submit(e, 0);
|
|
inc_stat(STAT_MAGIC);
|
|
return TC_ACT_STOLEN;
|
|
}
|
|
|
|
pass:
|
|
inc_stat(STAT_PASSED);
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
// ── sk_lookup ─────────────────────────────────────────────────────────────────
|
|
//
|
|
// Runs when kernel looks up a socket for an incoming connection.
|
|
// If the connection is in pending map (flagged by TC), redirect to daemon socket.
|
|
|
|
SEC("sk_lookup")
|
|
int piggyback_lookup(struct bpf_sk_lookup *ctx) {
|
|
struct conn_key ck = {};
|
|
|
|
if (ctx->family == AF_INET) {
|
|
__builtin_memcpy(ck.src_ip, &ctx->remote_ip4, 4);
|
|
ck.is_ipv6 = 0;
|
|
} else if (ctx->family == AF_INET6) {
|
|
__builtin_memcpy(ck.src_ip, ctx->remote_ip6, 16);
|
|
ck.is_ipv6 = 1;
|
|
} else {
|
|
return SK_PASS;
|
|
}
|
|
|
|
// remote_port in bpf_sk_lookup is __be16 (network byte order)
|
|
ck.src_port = ctx->remote_port;
|
|
|
|
__u8 *p = bpf_map_lookup_elem(&pending, &ck);
|
|
if (!p) return SK_PASS;
|
|
|
|
__u32 key = 0;
|
|
struct bpf_sock *sk = bpf_map_lookup_elem(&daemon_sock, &key);
|
|
if (!sk) return SK_PASS;
|
|
|
|
long ret = bpf_sk_assign(ctx, sk, 0);
|
|
bpf_sk_release(sk);
|
|
|
|
if (ret == 0)
|
|
bpf_map_delete_elem(&pending, &ck);
|
|
|
|
// SK_PASS after bpf_sk_assign means "use the assigned socket"
|
|
return SK_PASS;
|
|
}
|
|
|
|
char LICENSE[] SEC("license") = "GPL";
|