Netfilter 的 Hook 方式初探

Netfilier,Linux 核心裡面的封包處理子系統。這篇文章唯一時心血來潮,看了一小段 code 以後所作的整理。

kernel: 2.6.23.14

下圖是 Linux 核心的網路封包處理流程:



讓我們到 ip_rcv 這隻函式來看一下,ip_rcv 是處理 IP 封包的主要進入函式。

net/ipv4/ip_input.c:

/*
* Main IP Receive routine.
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
....struct iphdr *iph;
....u32 len;

..../* When the interface is in promisc. mode, drop all the crap
....* that it receives, do not try to analyse it.
....*/
....if (skb->pkt_type == PACKET_OTHERHOST)
........goto drop;

....IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);

....if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
........IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
........goto out;
....}

....if (!pskb_may_pull(skb, sizeof(struct iphdr)))
........goto inhdr_error;

....iph = ip_hdr(skb);

..../*
....* RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
....*
....* Is the datagram acceptable?
....*
....* 1. Length at least the size of an ip header
....* 2. Version of 4
....* 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
....* 4. Doesn't have a bogus length
....*/
....if (iph->ihl <>version != 4)
........goto inhdr_error;

....if (!pskb_may_pull(skb, iph->ihl*4))
........goto inhdr_error;

....iph = ip_hdr(skb);

....if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
........goto inhdr_error;

....len = ntohs(iph->tot_len);
....if (skb->len <>ihl*4))
........goto inhdr_error;

..../* Our transport medium may have padded the buffer out. Now we know it
....* is IP we can trim to the true length of the frame.
....* Note this now means skb->len holds ntohs(iph->tot_len).
....*/
....if (pskb_trim_rcsum(skb, len)) {
........IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
........goto drop;
....}

..../* Remove any debris in the socket control block */
....memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));

....return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);

....inhdr_error:
....IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
....drop:
....kfree_skb(skb);
....out:
....return NET_RX_DROP;
}

當然根本不用貼這麼多程式碼啦,因為大部分都在作 Error Check 的動作(不過就是讓自己順便看一下囉)
比較重要的是紅色的部份,那隻 NF_HOOK 的 Macro。
NF 就是 netfilter 的縮寫,在這裡我們可以清楚看到在 Linux 核心裡面是使用 Hook 的技術來加上攔截點的功能。

那我們來看看那隻 Macro 囉~

include/linux/netfilter.h

#ifdef CONFIG_NETFILTER

#define
NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)

#else
/* !CONFIG_NETFILTER */

#define
NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)

#endif


在這裡我們可以看到兩個定義,一個是有啟用 netfilter Option的,另一個則是沒有。我們要觀察的當然是有啟用的囉。在這裡附帶一提,okfn 是 function pointer,以上面的例子來說,就是 ip_rcv_finish,這樣就會繼續核心的封包處理流程。

繼續追蹤下去:

include/linux/netfilter.h

#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
({int __ret; \
if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
__ret = (okfn)(skb); \
__ret;})

static inline int nf_hook_thresh(..int pf, unsigned int hook,
....................................................struct sk_buff **pskb,
....................................................struct net_device *indev,
....................................................struct net_device *outdev,
....................................................int (*okfn)(struct sk_buff *), int thresh,
....................................................int cond)
{
....if (!cond)
........return 1;
....
....#ifndef CONFIG_NETFILTER_DEBUG
....if (list_empty(&nf_hooks[pf][hook]))
........return 1;
....#endif

....return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
}

從這裡可以看得出來,如果 nf_hook_thresh 回傳值是 1 的話,核心會進行原本的流程,不然則會交由 nf_hook_slow 來處理。Let's go~

net/netfilter/core.c:

/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow( int pf, unsigned int hook, struct sk_buff **pskb,
............................struct net_device *indev,
............................struct net_device *outdev,
............................int (*okfn)(struct sk_buff *),
............................int hook_thresh)
{
....struct list_head *elem;
....unsigned int verdict;
....int ret = 0;

..../* We may already have this, but read-locks nest anyway */
....rcu_read_lock();

....elem = &nf_hooks[pf][hook];

next_hook:

....verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
....outdev, &elem, okfn, hook_thresh);
....if (verdict == NF_ACCEPT || verdict == NF_STOP) {
........ret = 1;
........goto unlock;
....} else if (verdict == NF_DROP) {
........kfree_skb(*pskb);
........ret = -EPERM;
....} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
........NFDEBUG("nf_hook: Verdict = QUEUE.\n");
........if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
............verdict >> NF_VERDICT_BITS))
............goto next_hook;
....}
unlock:
....rcu_read_unlock();
....return ret;
}

那就先停在這裡吧~下次在繼續追蹤~

PS:
如果要了解 tresh 和 cond,請參考下面的文章:
http://blog.chinaunix.net/u/12313/showart_185315.html

留言

  1. 你好,可不可以请教一下,你的流程图使用什么software做的啊?谢谢!!

    回覆刪除
  2. 其實這張圖是在網路上找到的,並不是我畫得唷

    (我個人流程圖都是用 Dia 畫)

    回覆刪除

張貼留言

這個網誌中的熱門文章

如何將Linux打造成OpenFlow Switch:Openvswitch

我弟家的新居感恩禮拜分享:善頌善禱

Linux Virtual Interface: TUN/TAP