aboutsummaryrefslogtreecommitdiffstats
path: root/test/packetdrill/socket.h
blob: 55e43b03c5f7945c4df022af6263b3bda2b48e29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/*
 * Copyright 2013 Google Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */
/*
 * Author: ncardwell@google.com (Neal Cardwell)
 *
 * Interface for tracking sockets in the kernel under test.
 */

#ifndef __SOCKET_H__
#define __SOCKET_H__

#include "types.h"

#include <netinet/in.h>
#include <string.h>
#include <sys/socket.h>
#include "config.h"
#include "fd_state.h"
#include "hash_map.h"
#include "logging.h"
#include "packet.h"

/* All possible states for a socket we're tracking. */
enum socket_state_t {
	SOCKET_INIT,			/* uninitialized */
	SOCKET_NEW,			/* after socket() call */
	SOCKET_PASSIVE_LISTENING,	/* after listen() call */
	SOCKET_PASSIVE_PACKET_RECEIVED,	/* after receiving first packet */
	SOCKET_PASSIVE_SYNACK_SENT,	/* after sending SYNACK */
	SOCKET_PASSIVE_SYNACK_ACKED,	/* after server's SYN is ACKed */
	SOCKET_ACTIVE_CONNECTING,	/* after connect() call */
	SOCKET_ACTIVE_SYN_SENT,		/* after sending client's SYN */
	SOCKET_ACTIVE_SYN_ACKED,	/* after client's SYN is ACKed */
};

/* A TCP/UDP/IP address for an endpoint. */
struct endpoint {
	struct ip_address ip;		/* IP address */
	__be16 port;			/* TCP/UDP port (network order) */
};

/* The 4-tuple for a TCP/UDP/IP packet. */
struct tuple {
	struct endpoint src;
	struct endpoint dst;
};

/* The scripted or live aspects of socket state */
struct socket_state {
	struct endpoint local;		/* local endpoint address */
	u32 local_isn;			/* initial TCP sequence (host order) */
	struct endpoint remote;		/* remote endpoint address */
	u32 remote_isn;			/* initial TCP sequence (host order) */
};

/* Flowlabel mapping between script and live */
struct flowlabel_map {
	u32 flowlabel_script;
	u32 flowlabel_live;
};

/* The runtime state for a socket */
struct socket {
	/* NOTE: struct fd_state must be first field in all fd flavors. */
	struct fd_state fd;		/* info about fd for this socket */

	enum socket_state_t state;	/* current state of socket */
	int address_family;		/* AF_INET or AF_INET6 */
	int type;			/* e.g. SOCK_STREAM, SOCK_DGRAM */
	int protocol;			/* IPPROTO_UDP or IPPROTO_TCP */

	/* The "canned" info from the test script */
	struct socket_state script;

	/* The "live" info at runtime while executing the test */
	struct socket_state live;

	/* We look at outgoing TCP timestamp values and learn the
	 * mapping between script values and live values. We store
	 * this mapping in a hash map mapping outgoing TCP timestamp
	 * values from scripted value to live value. Then we use this
	 * to map incoming TCP timestamp echo replies from their
	 * script value to their live value.
	 */
	struct hash_map *ts_val_map;

	/* Baseline to map TCP timestamp val from live to script space. */
	bool found_first_tcp_ts;
	u32 first_script_ts_val;
	u32 first_actual_ts_val;
	u32 first_script_ts_ecr;
	u32 first_actual_ts_ecr;

	/* We remember the last inbound/outbound TCP header so we can send a
	 * RST packet that the kernel will accept for this socket, in
	 * order to induce the kernel to free the socket.
	 */
	struct tcp last_outbound_tcp_header;
	struct tcp last_injected_tcp_header;
	u32 last_injected_tcp_payload_len;

	/* flowlabel mapping */
	struct flowlabel_map flowlabel_map;
};

/* Convert to socket pointer if the fd is a socket, otherwise return NULL. */
static inline struct socket *fd_to_socket(struct fd_state *fd)
{
	if (fd && fd->ops->type == FD_SOCKET)
		return (struct socket *)fd;
	else
		return NULL;
}

struct state;

/* Allocate and return a new socket object. */
extern struct socket *socket_new(struct state *state);

/* Deallocate a socket. */
extern void socket_free(struct socket *socket);

/* Get the tuple we expect to see in outbound packets from this socket. */
static inline void socket_get_outbound(
	const struct socket_state *socket_state, struct tuple *tuple)
{
	memset(tuple, 0, sizeof(*tuple));
	tuple->src = socket_state->local;
	tuple->dst = socket_state->remote;
}

/* Get the tuple we expect to see in inbound packets from this socket. */
static inline void socket_get_inbound(
	const struct socket_state *socket_state, struct tuple *tuple)
{
	memset(tuple, 0, sizeof(*tuple));
	tuple->src = socket_state->remote;
	tuple->dst = socket_state->local;
}

/* Return true iff the two tuples are equal. */
static inline bool is_equal_tuple(const struct tuple *a,
				  const struct tuple *b)
{
	return memcmp(a, b, sizeof(*a)) == 0;
}

/* Fill in the *dst_tuple with the tuple for packet flow in the
 * direction opposite that of *src_tuple
 */
static inline void reverse_tuple(const struct tuple *src_tuple,
				 struct tuple *dst_tuple)
{
	dst_tuple->src.ip	= src_tuple->dst.ip;
	dst_tuple->dst.ip	= src_tuple->src.ip;
	dst_tuple->src.port	= src_tuple->dst.port;
	dst_tuple->dst.port	= src_tuple->src.port;
}

/* Get the tuple for a packet. */
static inline void get_packet_tuple(const struct packet *packet,
				    struct tuple *tuple)
{
	memset(tuple, 0, sizeof(*tuple));
	if (packet->ipv4 != NULL) {
		ip_from_ipv4(&packet->ipv4->src_ip, &tuple->src.ip);
		ip_from_ipv4(&packet->ipv4->dst_ip, &tuple->dst.ip);
	} else if (packet->ipv6 != NULL) {
		ip_from_ipv6(&packet->ipv6->src_ip, &tuple->src.ip);
		ip_from_ipv6(&packet->ipv6->dst_ip, &tuple->dst.ip);
	} else {
		assert(!"bad IP version in packet");
	}
	if (packet->tcp != NULL) {
		tuple->src.port	= packet->tcp->src_port;
		tuple->dst.port	= packet->tcp->dst_port;
	} else if (packet->udp != NULL) {
		tuple->src.port	= packet->udp->src_port;
		tuple->dst.port	= packet->udp->dst_port;
	}
}

/* Set the tuple inside some TCP/IPv4 or TCP/IPv6 headers. */
static inline void set_headers_tuple(struct ipv4 *ipv4,
				     struct ipv6 *ipv6,
				     struct tcp *tcp,
				     struct udp *udp,
				     const struct tuple *tuple)
{
	if (ipv4 != NULL) {
		ip_to_ipv4(&tuple->src.ip, &ipv4->src_ip);
		ip_to_ipv4(&tuple->dst.ip, &ipv4->dst_ip);
	} else if (ipv6 != NULL) {
		ip_to_ipv6(&tuple->src.ip, &ipv6->src_ip);
		ip_to_ipv6(&tuple->dst.ip, &ipv6->dst_ip);
	} else {
		assert(!"bad IP version in packet");
	}
	if (tcp != NULL) {
		tcp->src_port = tuple->src.port;
		tcp->dst_port = tuple->dst.port;
	} else if (udp != NULL) {
		udp->src_port = tuple->src.port;
		udp->dst_port = tuple->dst.port;
	}
}

/* Set the tuple for a packet header echoed inside an ICMPv4/ICMPv6 message. */
static inline void set_icmp_echoed_tuple(struct packet *packet,
					 const struct tuple *tuple)
{
	/* All currently supported ICMP message types include a copy
	 * of the outbound IP header and the first few bytes inside,
	 * which so far always means the first ICMP_ECHO_BYTES of
	 * TCP header.
	 */
	DEBUGP("set_icmp_echoed_tuple");

	/* Flip the direction of the tuple, since the ICMP message is
	 * flowing in the direction opposite that of the echoed TCP/IP
	 * packet, and then fill in the fields of the echoed packet.
	 */
	struct tuple echoed_tuple;
	reverse_tuple(tuple, &echoed_tuple);
	set_headers_tuple(packet_echoed_ipv4_header(packet),
			  packet_echoed_ipv6_header(packet),
			  packet_echoed_tcp_header(packet),
			  packet_echoed_udp_header(packet),
			  &echoed_tuple);
}

/* Set the tuple for a packet. */
static inline void set_packet_tuple(struct packet *packet,
				    const struct tuple *tuple)
{
	set_headers_tuple(packet->ipv4, packet->ipv6, packet->tcp, packet->udp,
			  tuple);
	if ((packet->icmpv4 != NULL) || (packet->icmpv6 != NULL))
		set_icmp_echoed_tuple(packet, tuple);
}


/* Helpers for translating between script and live sequence numbers.
 *
 * We try to interpret sequence numbers in scripts in
 * a manner that is similar to tcpdump output: sequence numbers and
 * ACK numbers in all packets with the SYN flag set are absolute, and
 * for other packets the sequence numbers and ACK numbers are relative
 * to the first SYN.
 *
 * Using this approach has several advantages:
 *
 * o tcpdump output may be more easily converted into packetdrill scripts.
 *
 * o we follow the principle of least surprise: it's basically what
 *   tcpdump does, so users should be more used to that and thus it
 *   should lead to fewer bugs and it should requires less
 *   documentation.
 *
 * o it gives convenience and expressiveness in allowing arbitrary
 *   ISNs without requiring a command line argument, so tests can be
 *   more self-contained..
 *
 * The code below for remote and local cases are different because the
 * packetdrill tool gets to pick the live ISN for remote packets but the
 * local kernel under test always gets to pick its live ISN.
 */

static inline u32 remote_seq_script_to_live_offset(struct socket *socket,
						   bool is_syn)
{
	return is_syn ? 0 : socket->live.remote_isn;
}

static inline u32 remote_seq_live_to_script_offset(struct socket *socket,
						   bool is_syn)
{
	return -remote_seq_script_to_live_offset(socket, is_syn);
}

static inline u32 local_seq_script_to_live_offset(struct socket *socket,
						  bool is_syn)
{
	return is_syn ?
		(socket->live.local_isn - socket->script.local_isn) :
		socket->live.local_isn;
}

static inline u32 local_seq_live_to_script_offset(struct socket *socket,
						  bool is_syn)
{
	return -local_seq_script_to_live_offset(socket, is_syn);
}

#endif /* __SOCKET_H__ */