summaryrefslogtreecommitdiffstats
path: root/drivers/net/tap/tap_bpf_program.c
blob: 1cb73822f3990d06415caae092714fd9cf42811d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
 * Copyright 2017 Mellanox Technologies, Ltd
 */

#include <stdint.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <asm/types.h>
#include <linux/in.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_tunnel.h>
#include <linux/filter.h>
#include <linux/bpf.h>

#include "tap_rss.h"

/** Create IPv4 address */
#define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
		(((b) & 0xff) << 16) | \
		(((c) & 0xff) << 8)  | \
		((d) & 0xff))

#define PORT(a, b) ((__u16)(((a) & 0xff) << 8) | \
		((b) & 0xff))

/*
 * The queue number is offset by a unique QUEUE_OFFSET, to distinguish
 * packets that have gone through this rule (skb->cb[1] != 0) from others.
 */
#define QUEUE_OFFSET		0x7cafe800
#define PIN_GLOBAL_NS		2

#define KEY_IDX			0
#define BPF_MAP_ID_KEY	1

struct vlan_hdr {
	__be16 proto;
	__be16 tci;
};

struct bpf_elf_map __attribute__((section("maps"), used))
map_keys = {
	.type           =       BPF_MAP_TYPE_HASH,
	.id             =       BPF_MAP_ID_KEY,
	.size_key       =       sizeof(__u32),
	.size_value     =       sizeof(struct rss_key),
	.max_elem       =       256,
	.pinning        =       PIN_GLOBAL_NS,
};

__section("cls_q") int
match_q(struct __sk_buff *skb)
{
	__u32 queue = skb->cb[1];
	volatile __u32 q = 0xdeadbeef;
	__u32 match_queue = QUEUE_OFFSET + q;

	/* printt("match_q$i() queue = %d\n", queue); */

	if (queue != match_queue)
		return TC_ACT_OK;

	/* queue match */
	skb->cb[1] = 0;
	return TC_ACT_UNSPEC;
}


struct ipv4_l3_l4_tuple {
	__u32    src_addr;
	__u32    dst_addr;
	__u16    dport;
	__u16    sport;
} __attribute__((packed));

struct ipv6_l3_l4_tuple {
	__u8        src_addr[16];
	__u8        dst_addr[16];
	__u16       dport;
	__u16       sport;
} __attribute__((packed));

static const __u8 def_rss_key[TAP_RSS_HASH_KEY_SIZE] = {
	0xd1, 0x81, 0xc6, 0x2c,
	0xf7, 0xf4, 0xdb, 0x5b,
	0x19, 0x83, 0xa2, 0xfc,
	0x94, 0x3e, 0x1a, 0xdb,
	0xd9, 0x38, 0x9e, 0x6b,
	0xd1, 0x03, 0x9c, 0x2c,
	0xa7, 0x44, 0x99, 0xad,
	0x59, 0x3d, 0x56, 0xd9,
	0xf3, 0x25, 0x3c, 0x06,
	0x2a, 0xdc, 0x1f, 0xfc,
};

static __u32  __attribute__((always_inline))
rte_softrss_be(const __u32 *input_tuple, const uint8_t *rss_key,
		__u8 input_len)
{
	__u32 i, j, hash = 0;
#pragma unroll
	for (j = 0; j < input_len; j++) {
#pragma unroll
		for (i = 0; i < 32; i++) {
			if (input_tuple[j] & (1 << (31 - i))) {
				hash ^= ((const __u32 *)def_rss_key)[j] << i |
				(__u32)((uint64_t)
				(((const __u32 *)def_rss_key)[j + 1])
					>> (32 - i));
			}
		}
	}
	return hash;
}

static int __attribute__((always_inline))
rss_l3_l4(struct __sk_buff *skb)
{
	void *data_end = (void *)(long)skb->data_end;
	void *data = (void *)(long)skb->data;
	__u16 proto = (__u16)skb->protocol;
	__u32 key_idx = 0xdeadbeef;
	__u32 hash;
	struct rss_key *rsskey;
	__u64 off = ETH_HLEN;
	int j;
	__u8 *key = 0;
	__u32 len;
	__u32 queue = 0;

	rsskey = map_lookup_elem(&map_keys, &key_idx);
	if (!rsskey) {
		printt("hash(): rss key is not configured\n");
		return TC_ACT_OK;
	}
	key = (__u8 *)rsskey->key;

	/* Get correct proto for 802.1ad */
	if (skb->vlan_present && skb->vlan_proto == htons(ETH_P_8021AD)) {
		if (data + ETH_ALEN * 2 + sizeof(struct vlan_hdr) +
		    sizeof(proto) > data_end)
			return TC_ACT_OK;
		proto = *(__u16 *)(data + ETH_ALEN * 2 +
				   sizeof(struct vlan_hdr));
		off += sizeof(struct vlan_hdr);
	}

	if (proto == htons(ETH_P_IP)) {
		if (data + off + sizeof(struct iphdr) + sizeof(__u32)
			> data_end)
			return TC_ACT_OK;

		__u8 *src_dst_addr = data + off + offsetof(struct iphdr, saddr);
		__u8 *src_dst_port = data + off + sizeof(struct iphdr);
		struct ipv4_l3_l4_tuple v4_tuple = {
			.src_addr = IPv4(*(src_dst_addr + 0),
					*(src_dst_addr + 1),
					*(src_dst_addr + 2),
					*(src_dst_addr + 3)),
			.dst_addr = IPv4(*(src_dst_addr + 4),
					*(src_dst_addr + 5),
					*(src_dst_addr + 6),
					*(src_dst_addr + 7)),
			.sport = PORT(*(src_dst_port + 0),
					*(src_dst_port + 1)),
			.dport = PORT(*(src_dst_port + 2),
					*(src_dst_port + 3)),
		};
		__u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
		if (rsskey->hash_fields & (1 << HASH_FIELD_IPV4_L3))
			input_len--;
		hash = rte_softrss_be((__u32 *)&v4_tuple, key, 3);
	} else if (proto == htons(ETH_P_IPV6)) {
		if (data + off + sizeof(struct ipv6hdr) +
					sizeof(__u32) > data_end)
			return TC_ACT_OK;
		__u8 *src_dst_addr = data + off +
					offsetof(struct ipv6hdr, saddr);
		__u8 *src_dst_port = data + off +
					sizeof(struct ipv6hdr);
		struct ipv6_l3_l4_tuple v6_tuple;
		for (j = 0; j < 4; j++)
			*((uint32_t *)&v6_tuple.src_addr + j) =
				__builtin_bswap32(*((uint32_t *)
						src_dst_addr + j));
		for (j = 0; j < 4; j++)
			*((uint32_t *)&v6_tuple.dst_addr + j) =
				__builtin_bswap32(*((uint32_t *)
						src_dst_addr + 4 + j));
		v6_tuple.sport = PORT(*(src_dst_port + 0),
			      *(src_dst_port + 1));
		v6_tuple.dport = PORT(*(src_dst_port + 2),
			      *(src_dst_port + 3));

		__u8 input_len = sizeof(v6_tuple) / sizeof(__u32);
		if (rsskey->hash_fields & (1 << HASH_FIELD_IPV6_L3))
			input_len--;
		hash = rte_softrss_be((__u32 *)&v6_tuple, key, 9);
	} else {
		return TC_ACT_PIPE;
	}

	queue = rsskey->queues[(hash % rsskey->nb_queues) &
				       (TAP_MAX_QUEUES - 1)];
	skb->cb[1] = QUEUE_OFFSET + queue;
	/* printt(">>>>> rss_l3_l4 hash=0x%x queue=%u\n", hash, queue); */

	return TC_ACT_RECLASSIFY;
}

#define RSS(L)						\
	__section(#L) int				\
		L ## _hash(struct __sk_buff *skb)	\
	{						\
		return rss_ ## L (skb);			\
	}

RSS(l3_l4)

BPF_LICENSE("Dual BSD/GPL");