diff options
author | Guy Harris <guy@alum.mit.edu> | 2013-12-18 12:29:35 -0800 |
---|---|---|
committer | Guy Harris <guy@alum.mit.edu> | 2013-12-18 12:29:35 -0800 |
commit | 91758e29467459dc45f25f0e558e3e76afcee66d (patch) | |
tree | 496ce91dd66508f72027b2f928051932ad9c7566 /pcap-linux.c | |
parent | 0bfad74b57fe342603e2f73b88f53fb57b077f86 (diff) |
Count *ring buffer blocks*, not *packets* to be filtered in userland.
We count the number of free blocks in the ring buffer, and calculate the
number of blocks in the ring buffer containing packets. With TPACKET_V1
and TPACKET_V2, there's one packet per block, but, with TPACKET_V3,
there can be more than one packet per block, so we need to count blocks,
not packets, to decide when we no longer need to filter packets in
userland.
Don't overload the "filtering in kernel" flag to hold the count of
blocks; instead, have a "filter in userland" flag, indicating that we
have to filter packets in userland (whether that's because the filter
wasn't accepted by the kernel or because we need to deal with packets
that passed an earlier kernel filter but wouldn't have passed the new
kernel filter) and a count of blocks to filter in userland.
After counting the number of free blocks, if we find any, decrement that
count by 1, just in case some packet that had passed the old filter got
added to the ring after we counted the free blocks. (For TPACKET_V1 and
TPACKET_V2, that would mark the block as no longer free, so the window
in which that could happen is narrower than for TPACKET_V3, where the
block might not be full and the timeout might not have expired, so it
could take some time before that block is handed to userland.)
Should fix GitHub issue #331.
(While we're at it, use the term "block" consistently for a fixed-length
item in the ring.)
Diffstat (limited to 'pcap-linux.c')
-rw-r--r-- | pcap-linux.c | 129 |
1 files changed, 108 insertions, 21 deletions
diff --git a/pcap-linux.c b/pcap-linux.c index 48a2cde2..74de6a3f 100644 --- a/pcap-linux.c +++ b/pcap-linux.c @@ -289,7 +289,8 @@ struct pcap_linux { struct pcap_stat stat; char *device; /* device name */ - int filtering_in_kernel; /* using kernel filter */ + int filter_in_userland; /* must filter in userland */ + int blocks_to_filter_in_userland; int must_do_on_close; /* stuff we must do when we close */ int timeout; /* timeout for buffering */ int sock_packet; /* using Linux 2.0 compatible interface */ @@ -1703,7 +1704,7 @@ pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata) caplen = handle->snapshot; /* Run the packet filter if not using kernel filter */ - if (!handlep->filtering_in_kernel && handle->fcode.bf_insns) { + if (handlep->filter_in_userland && handle->fcode.bf_insns) { if (bpf_filter(handle->fcode.bf_insns, bp, packet_len, caplen) == 0) { @@ -2387,7 +2388,7 @@ pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter, * Run user level packet filter by default. Will be overriden if * installing a kernel filter succeeds. */ - handlep->filtering_in_kernel = 0; + handlep->filter_in_userland = 1; /* Install kernel level filter if possible */ @@ -2475,8 +2476,11 @@ pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter, if (can_filter_in_kernel) { if ((err = set_kernel_filter(handle, &fcode)) == 0) { - /* Installation succeded - using kernel filter. */ - handlep->filtering_in_kernel = 1; + /* + * Installation succeded - using kernel filter, + * so userland filtering not needed. + */ + handlep->filter_in_userland = 0; } else if (err == -1) /* Non-fatal error */ { @@ -2501,7 +2505,7 @@ pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter, * calling "pcap_setfilter()". Otherwise, the kernel filter may * filter out packets that would pass the new userland filter. */ - if (!handlep->filtering_in_kernel) + if (handlep->filter_in_userland) reset_kernel_filter(handle); /* @@ -4198,7 +4202,6 @@ static int pcap_handle_packet_mmap( { struct pcap_linux *handlep = handle->priv; unsigned char *bp; - int run_bpf; struct sockaddr_ll *sll; struct pcap_pkthdr pcaphdr; @@ -4215,15 +4218,13 @@ static int pcap_handle_packet_mmap( * If the kernel filtering is enabled we need to run the * filter until all the frames present into the ring * at filter creation time are processed. - * In such case filtering_in_kernel is used as a counter for the - * packet we need to filter. + * In this case, blocks_to_filter_in_userland is used + * as a counter for the packet we need to filter. * Note: alternatively it could be possible to stop applying * the filter when the ring became empty, but it can possibly * happen a lot later... */ bp = frame + tp_mac; - run_bpf = (!handlep->filtering_in_kernel) || - ((handlep->filtering_in_kernel>1) && handlep->filtering_in_kernel--); - if (run_bpf && handle->fcode.bf_insns && + if (handlep->filter_in_userland && handle->fcode.bf_insns && (bpf_filter(handle->fcode.bf_insns, bp, tp_len, tp_snaplen) == 0)) return 0; @@ -4361,9 +4362,25 @@ pcap_read_linux_mmap_v1(pcap_t *handle, int max_packets, pcap_handler callback, return ret; } - /* next packet */ + /* + * Hand this block back to the kernel, and, if we're + * counting blocks that need to be filtered in userland + * after having been filtered by the kernel, count + * the one we've just processed. + */ h.h1->tp_status = TP_STATUS_KERNEL; + if (handlep->blocks_to_filter_in_userland > 0) { + handlep->blocks_to_filter_in_userland--; + if (handlep->blocks_to_filter_in_userland == 0) { + /* + * No more blocks need to be filtered + * in userland. + */ + handlep->filter_in_userland = 0; + } + } + /* next block */ if (++handle->offset >= handle->cc) handle->offset = 0; @@ -4423,9 +4440,25 @@ pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, return ret; } - /* next packet */ + /* + * Hand this block back to the kernel, and, if we're + * counting blocks that need to be filtered in userland + * after having been filtered by the kernel, count + * the one we've just processed. + */ h.h2->tp_status = TP_STATUS_KERNEL; + if (handlep->blocks_to_filter_in_userland > 0) { + handlep->blocks_to_filter_in_userland--; + if (handlep->blocks_to_filter_in_userland == 0) { + /* + * No more blocks need to be filtered + * in userland. + */ + handlep->filter_in_userland = 0; + } + } + /* next block */ if (++handle->offset >= handle->cc) handle->offset = 0; @@ -4507,9 +4540,26 @@ pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, } if (handlep->packets_left <= 0) { - /* next block */ + /* + * Hand this block back to the kernel, and, if + * we're counting blocks that need to be + * filtered in userland after having been + * filtered by the kernel, count the one we've + * just processed. + */ h.h3->hdr.bh1.block_status = TP_STATUS_KERNEL; + if (handlep->blocks_to_filter_in_userland > 0) { + handlep->blocks_to_filter_in_userland--; + if (handlep->blocks_to_filter_in_userland == 0) { + /* + * No more blocks need to be filtered + * in userland. + */ + handlep->filter_in_userland = 0; + } + } + /* next block */ if (++handle->offset >= handle->cc) handle->offset = 0; @@ -4543,13 +4593,22 @@ pcap_setfilter_linux_mmap(pcap_t *handle, struct bpf_program *filter) if (ret < 0) return ret; - /* if the kernel filter is enabled, we need to apply the filter on - * all packets present into the ring. Get an upper bound of their number + /* + * If we're filtering in userland, there's nothing to do; + * the new filter will be used for the next packet. */ - if (!handlep->filtering_in_kernel) + if (handlep->filter_in_userland) return ret; - /* walk the ring backward and count the free slot */ + /* + * We're filtering in the kernel; the packets present in + * all blocks currently in the ring were already filtered + * by the old filter, and so will need to be filtered in + * userland by the new filter. + * + * Get an upper bound for the number of such blocks; first, + * walk the ring backward and count the free blocks. + */ offset = handle->offset; if (--handle->offset < 0) handle->offset = handle->cc - 1; @@ -4560,11 +4619,39 @@ pcap_setfilter_linux_mmap(pcap_t *handle, struct bpf_program *filter) break; } + /* + * If we found free blocks, decrement the count of free + * blocks by 1, just in case we lost a race with another + * thread of control that was adding a packet while + * we were counting and that had run the filter before + * we changed it. + * + * XXX - could there be more than one block added in + * this fashion? + * + * XXX - is there a way to avoid that race, e.g. somehow + * wait for all packets that passed the old filter to + * be added to the ring? + */ + if (n != 0) + n--; + /* be careful to not change current ring position */ handle->offset = offset; - /* store the number of packets currently present in the ring */ - handlep->filtering_in_kernel = 1 + (handle->cc - n); + /* + * Set the count of blocks worth of packets to filter + * in userland to the total number of blocks in the + * ring minus the number of free blocks we found, and + * turn on userland filtering. (The count of blocks + * worth of packets to filter in userland is guaranteed + * not to be zero - n, above, couldn't be set to a + * value > handle->cc, and if it were equal to + * handle->cc, it wouldn't be zero, and thus would + * be decremented to handle->cc - 1.) + */ + handlep->blocks_to_filter_in_userland = handle->cc - n; + handlep->filter_in_userland = 1; return ret; } |