diff options
Diffstat (limited to 'vendor/github.com/google/gopacket/afpacket/afpacket.go')
-rw-r--r-- | vendor/github.com/google/gopacket/afpacket/afpacket.go | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/vendor/github.com/google/gopacket/afpacket/afpacket.go b/vendor/github.com/google/gopacket/afpacket/afpacket.go new file mode 100644 index 0000000..13937c1 --- /dev/null +++ b/vendor/github.com/google/gopacket/afpacket/afpacket.go @@ -0,0 +1,476 @@ +// Copyright 2012 Google, Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. + +// +build linux + +// Package afpacket provides Go bindings for MMap'd AF_PACKET socket reading. +package afpacket + +// Couldn't have done this without: +// http://lxr.free-electrons.com/source/Documentation/networking/packet_mmap.txt +// http://codemonkeytips.blogspot.co.uk/2011/07/asynchronous-packet-socket-reading-with.html + +import ( + "errors" + "fmt" + "net" + "runtime" + "sync" + "sync/atomic" + "time" + "unsafe" + + "golang.org/x/net/bpf" + "golang.org/x/sys/unix" + + "github.com/google/gopacket" +) + +/* +#include <linux/if_packet.h> // AF_PACKET, sockaddr_ll +#include <linux/if_ether.h> // ETH_P_ALL +#include <sys/socket.h> // socket() +#include <unistd.h> // close() +#include <arpa/inet.h> // htons() +#include <sys/mman.h> // mmap(), munmap() +#include <poll.h> // poll() +*/ +import "C" + +var pageSize = unix.Getpagesize() +var tpacketAlignment = uint(C.TPACKET_ALIGNMENT) + +// ErrPoll returned by poll +var ErrPoll = errors.New("packet poll failed") + +// ErrTimeout returned on poll timeout +var ErrTimeout = errors.New("packet poll timeout expired") + +func tpacketAlign(v int) int { + return int((uint(v) + tpacketAlignment - 1) & ((^tpacketAlignment) - 1)) +} + +// Stats is a set of counters detailing the work TPacket has done so far. +type Stats struct { + // Packets is the total number of packets returned to the caller. + Packets int64 + // Polls is the number of blocking syscalls made waiting for packets. + // This should always be <= Packets, since with TPacket one syscall + // can (and often does) return many results. + Polls int64 +} + +// SocketStats is a struct where socket stats are stored +type SocketStats C.struct_tpacket_stats + +// SocketStatsV3 is a struct where socket stats for TPacketV3 are stored +type SocketStatsV3 C.struct_tpacket_stats_v3 + +// TPacket implements packet receiving for Linux AF_PACKET versions 1, 2, and 3. +type TPacket struct { + // fd is the C file descriptor. + fd int + // ring points to the memory space of the ring buffer shared by tpacket and the kernel. + ring []byte + // rawring is the unsafe pointer that we use to poll for packets + rawring unsafe.Pointer + // opts contains read-only options for the TPacket object. + opts options + mu sync.Mutex // guards below + // offset is the offset into the ring of the current header. + offset int + // current is the current header. + current header + // pollset is used by TPacket for its poll() call. + pollset unix.PollFd + // shouldReleasePacket is set to true whenever we return packet data, to make sure we remember to release that data back to the kernel. + shouldReleasePacket bool + // headerNextNeeded is set to true when header need to move to the next packet. No need to move it case of poll error. + headerNextNeeded bool + // tpVersion is the version of TPacket actually in use, set by setRequestedTPacketVersion. + tpVersion OptTPacketVersion + // Hackity hack hack hack. We need to return a pointer to the header with + // getTPacketHeader, and we don't want to allocate a v3wrapper every time, + // so we leave it in the TPacket object and return a pointer to it. + v3 v3wrapper + + statsMu sync.Mutex // guards stats below + // stats is simple statistics on TPacket's run. + stats Stats + // socketStats contains stats from the socket + socketStats SocketStats + // same as socketStats, but with an extra field freeze_q_cnt + socketStatsV3 SocketStatsV3 +} + +var _ gopacket.ZeroCopyPacketDataSource = &TPacket{} + +// bindToInterface binds the TPacket socket to a particular named interface. +func (h *TPacket) bindToInterface(ifaceName string) error { + ifIndex := 0 + // An empty string here means to listen to all interfaces + if ifaceName != "" { + iface, err := net.InterfaceByName(ifaceName) + if err != nil { + return fmt.Errorf("InterfaceByName: %v", err) + } + ifIndex = iface.Index + } + s := &unix.SockaddrLinklayer{ + Protocol: htons(uint16(unix.ETH_P_ALL)), + Ifindex: ifIndex, + } + return unix.Bind(h.fd, s) +} + +// setTPacketVersion asks the kernel to set TPacket to a particular version, and returns an error on failure. +func (h *TPacket) setTPacketVersion(version OptTPacketVersion) error { + if err := unix.SetsockoptInt(h.fd, unix.SOL_PACKET, unix.PACKET_VERSION, int(version)); err != nil { + return fmt.Errorf("setsockopt packet_version: %v", err) + } + return nil +} + +// setRequestedTPacketVersion tries to set TPacket to the requested version or versions. +func (h *TPacket) setRequestedTPacketVersion() error { + switch { + case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion3) && h.setTPacketVersion(TPacketVersion3) == nil: + h.tpVersion = TPacketVersion3 + case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion2) && h.setTPacketVersion(TPacketVersion2) == nil: + h.tpVersion = TPacketVersion2 + case (h.opts.version == TPacketVersionHighestAvailable || h.opts.version == TPacketVersion1) && h.setTPacketVersion(TPacketVersion1) == nil: + h.tpVersion = TPacketVersion1 + default: + return errors.New("no known tpacket versions work on this machine") + } + return nil +} + +// setUpRing sets up the shared-memory ring buffer between the user process and the kernel. +func (h *TPacket) setUpRing() (err error) { + totalSize := int(h.opts.framesPerBlock * h.opts.numBlocks * h.opts.frameSize) + switch h.tpVersion { + case TPacketVersion1, TPacketVersion2: + var tp C.struct_tpacket_req + tp.tp_block_size = C.uint(h.opts.blockSize) + tp.tp_block_nr = C.uint(h.opts.numBlocks) + tp.tp_frame_size = C.uint(h.opts.frameSize) + tp.tp_frame_nr = C.uint(h.opts.framesPerBlock * h.opts.numBlocks) + if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil { + return fmt.Errorf("setsockopt packet_rx_ring: %v", err) + } + case TPacketVersion3: + var tp C.struct_tpacket_req3 + tp.tp_block_size = C.uint(h.opts.blockSize) + tp.tp_block_nr = C.uint(h.opts.numBlocks) + tp.tp_frame_size = C.uint(h.opts.frameSize) + tp.tp_frame_nr = C.uint(h.opts.framesPerBlock * h.opts.numBlocks) + tp.tp_retire_blk_tov = C.uint(h.opts.blockTimeout / time.Millisecond) + if err := setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_RX_RING, unsafe.Pointer(&tp), unsafe.Sizeof(tp)); err != nil { + return fmt.Errorf("setsockopt packet_rx_ring v3: %v", err) + } + default: + return errors.New("invalid tpVersion") + } + h.ring, err = unix.Mmap(h.fd, 0, totalSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) + if err != nil { + return err + } + if h.ring == nil { + return errors.New("no ring") + } + h.rawring = unsafe.Pointer(&h.ring[0]) + return nil +} + +// Close cleans up the TPacket. It should not be used after the Close call. +func (h *TPacket) Close() { + if h.fd == -1 { + return // already closed. + } + if h.ring != nil { + unix.Munmap(h.ring) + } + h.ring = nil + unix.Close(h.fd) + h.fd = -1 + runtime.SetFinalizer(h, nil) +} + +// NewTPacket returns a new TPacket object for reading packets off the wire. +// Its behavior may be modified by passing in any/all of afpacket.Opt* to this +// function. +// If this function succeeds, the user should be sure to Close the returned +// TPacket when finished with it. +func NewTPacket(opts ...interface{}) (h *TPacket, err error) { + h = &TPacket{} + if h.opts, err = parseOptions(opts...); err != nil { + return nil, err + } + fd, err := unix.Socket(unix.AF_PACKET, int(h.opts.socktype), int(htons(unix.ETH_P_ALL))) + if err != nil { + return nil, err + } + h.fd = fd + if err = h.bindToInterface(h.opts.iface); err != nil { + goto errlbl + } + if err = h.setRequestedTPacketVersion(); err != nil { + goto errlbl + } + if err = h.setUpRing(); err != nil { + goto errlbl + } + // Clear stat counter from socket + if err = h.InitSocketStats(); err != nil { + goto errlbl + } + runtime.SetFinalizer(h, (*TPacket).Close) + return h, nil +errlbl: + h.Close() + return nil, err +} + +// SetBPF attaches a BPF filter to the underlying socket +func (h *TPacket) SetBPF(filter []bpf.RawInstruction) error { + var p unix.SockFprog + if len(filter) > int(^uint16(0)) { + return errors.New("filter too large") + } + p.Len = uint16(len(filter)) + p.Filter = (*unix.SockFilter)(unsafe.Pointer(&filter[0])) + + return setsockopt(h.fd, unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, unsafe.Pointer(&p), unix.SizeofSockFprog) +} + +func (h *TPacket) releaseCurrentPacket() error { + h.current.clearStatus() + h.offset++ + h.shouldReleasePacket = false + return nil +} + +// ZeroCopyReadPacketData reads the next packet off the wire, and returns its data. +// The slice returned by ZeroCopyReadPacketData points to bytes owned by the +// TPacket. Each call to ZeroCopyReadPacketData invalidates any data previously +// returned by ZeroCopyReadPacketData. Care must be taken not to keep pointers +// to old bytes when using ZeroCopyReadPacketData... if you need to keep data past +// the next time you call ZeroCopyReadPacketData, use ReadPacketData, which copies +// the bytes into a new buffer for you. +// tp, _ := NewTPacket(...) +// data1, _, _ := tp.ZeroCopyReadPacketData() +// // do everything you want with data1 here, copying bytes out of it if you'd like to keep them around. +// data2, _, _ := tp.ZeroCopyReadPacketData() // invalidates bytes in data1 +func (h *TPacket) ZeroCopyReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) { + h.mu.Lock() +retry: + if h.current == nil || !h.headerNextNeeded || !h.current.next() { + if h.shouldReleasePacket { + h.releaseCurrentPacket() + } + h.current = h.getTPacketHeader() + if err = h.pollForFirstPacket(h.current); err != nil { + h.headerNextNeeded = false + h.mu.Unlock() + return + } + // We received an empty block + if h.current.getLength() == 0 { + goto retry + } + } + data = h.current.getData() + ci.Timestamp = h.current.getTime() + ci.CaptureLength = len(data) + ci.Length = h.current.getLength() + ci.InterfaceIndex = h.current.getIfaceIndex() + atomic.AddInt64(&h.stats.Packets, 1) + h.headerNextNeeded = true + h.mu.Unlock() + + return +} + +// Stats returns statistics on the packets the TPacket has seen so far. +func (h *TPacket) Stats() (Stats, error) { + return Stats{ + Polls: atomic.LoadInt64(&h.stats.Polls), + Packets: atomic.LoadInt64(&h.stats.Packets), + }, nil +} + +// InitSocketStats clears socket counters and return empty stats. +func (h *TPacket) InitSocketStats() error { + if h.tpVersion == TPacketVersion3 { + socklen := unsafe.Sizeof(h.socketStatsV3) + slt := C.socklen_t(socklen) + var ssv3 SocketStatsV3 + + err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt))) + if err != nil { + return err + } + h.socketStatsV3 = SocketStatsV3{} + } else { + socklen := unsafe.Sizeof(h.socketStats) + slt := C.socklen_t(socklen) + var ss SocketStats + + err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt))) + if err != nil { + return err + } + h.socketStats = SocketStats{} + } + return nil +} + +// SocketStats saves stats from the socket to the TPacket instance. +func (h *TPacket) SocketStats() (SocketStats, SocketStatsV3, error) { + h.statsMu.Lock() + defer h.statsMu.Unlock() + // We need to save the counters since asking for the stats will clear them + if h.tpVersion == TPacketVersion3 { + socklen := unsafe.Sizeof(h.socketStatsV3) + slt := C.socklen_t(socklen) + var ssv3 SocketStatsV3 + + err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ssv3), uintptr(unsafe.Pointer(&slt))) + if err != nil { + return SocketStats{}, SocketStatsV3{}, err + } + + h.socketStatsV3.tp_packets += ssv3.tp_packets + h.socketStatsV3.tp_drops += ssv3.tp_drops + h.socketStatsV3.tp_freeze_q_cnt += ssv3.tp_freeze_q_cnt + return h.socketStats, h.socketStatsV3, nil + } + socklen := unsafe.Sizeof(h.socketStats) + slt := C.socklen_t(socklen) + var ss SocketStats + + err := getsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_STATISTICS, unsafe.Pointer(&ss), uintptr(unsafe.Pointer(&slt))) + if err != nil { + return SocketStats{}, SocketStatsV3{}, err + } + + h.socketStats.tp_packets += ss.tp_packets + h.socketStats.tp_drops += ss.tp_drops + return h.socketStats, h.socketStatsV3, nil +} + +// ReadPacketDataTo reads packet data into a user-supplied buffer. +// This function reads up to the length of the passed-in slice. +// The number of bytes read into data will be returned in ci.CaptureLength, +// which is the minimum of the size of the passed-in buffer and the size of +// the captured packet. +func (h *TPacket) ReadPacketDataTo(data []byte) (ci gopacket.CaptureInfo, err error) { + var d []byte + d, ci, err = h.ZeroCopyReadPacketData() + if err != nil { + return + } + ci.CaptureLength = copy(data, d) + return +} + +// ReadPacketData reads the next packet, copies it into a new buffer, and returns +// that buffer. Since the buffer is allocated by ReadPacketData, it is safe for long-term +// use. This implements gopacket.PacketDataSource. +func (h *TPacket) ReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) { + var d []byte + d, ci, err = h.ZeroCopyReadPacketData() + if err != nil { + return + } + data = make([]byte, len(d)) + copy(data, d) + return +} + +func (h *TPacket) getTPacketHeader() header { + switch h.tpVersion { + case TPacketVersion1: + if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks { + h.offset = 0 + } + position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset) + return (*v1header)(unsafe.Pointer(position)) + case TPacketVersion2: + if h.offset >= h.opts.framesPerBlock*h.opts.numBlocks { + h.offset = 0 + } + position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset) + return (*v2header)(unsafe.Pointer(position)) + case TPacketVersion3: + // TPacket3 uses each block to return values, instead of each frame. Hence we need to rotate when we hit #blocks, not #frames. + if h.offset >= h.opts.numBlocks { + h.offset = 0 + } + position := uintptr(h.rawring) + uintptr(h.opts.frameSize*h.offset*h.opts.framesPerBlock) + h.v3 = initV3Wrapper(unsafe.Pointer(position)) + return &h.v3 + } + panic("handle tpacket version is invalid") +} + +func (h *TPacket) pollForFirstPacket(hdr header) error { + tm := int(h.opts.pollTimeout / time.Millisecond) + for hdr.getStatus()&C.TP_STATUS_USER == 0 { + h.pollset.Fd = int32(h.fd) + h.pollset.Events = unix.POLLIN + h.pollset.Revents = 0 + n, err := unix.Poll([]unix.PollFd{h.pollset}, tm) + if n == 0 { + return ErrTimeout + } + + atomic.AddInt64(&h.stats.Polls, 1) + if h.pollset.Revents&unix.POLLERR > 0 { + return ErrPoll + } + if err != nil { + return err + } + } + + h.shouldReleasePacket = true + return nil +} + +// FanoutType determines the type of fanout to use with a TPacket SetFanout call. +type FanoutType int + +// FanoutType values. +const ( + FanoutHash FanoutType = 0 + // It appears that defrag only works with FanoutHash, see: + // http://lxr.free-electrons.com/source/net/packet/af_packet.c#L1204 + FanoutHashWithDefrag FanoutType = 0x8000 + FanoutLoadBalance FanoutType = 1 + FanoutCPU FanoutType = 2 +) + +// SetFanout activates TPacket's fanout ability. +// Use of Fanout requires creating multiple TPacket objects and the same id/type to +// a SetFanout call on each. Note that this can be done cross-process, so if two +// different processes both call SetFanout with the same type/id, they'll share +// packets between them. The same should work for multiple TPacket objects within +// the same process. +func (h *TPacket) SetFanout(t FanoutType, id uint16) error { + h.mu.Lock() + defer h.mu.Unlock() + arg := C.int(t) << 16 + arg |= C.int(id) + return setsockopt(h.fd, unix.SOL_PACKET, unix.PACKET_FANOUT, unsafe.Pointer(&arg), unsafe.Sizeof(arg)) +} + +// WritePacketData transmits a raw packet. +func (h *TPacket) WritePacketData(pkt []byte) error { + _, err := unix.Write(h.fd, pkt) + return err +} |