diff options
author | Alexander Chernavin <achernavin@netgate.com> | 2023-03-29 16:09:37 +0000 |
---|---|---|
committer | Matthew Smith <mgsmith@netgate.com> | 2023-06-02 14:41:53 +0000 |
commit | f2b6edb149a32f455ce3ee13aff1acd0a3c2ab1d (patch) | |
tree | f5e5671d46cf901aab4e6eeb5239b164ddac02b2 | |
parent | b1239c48871a3ff3ab6bd6df71d922898a935c4d (diff) |
wireguard: add support for chained buffers
Type: feature
With this change, packets that are larger than a single buffer can fit
will be able to be sent and received over a Wireguard tunnel. Also,
cover this with tests.
Signed-off-by: Alexander Chernavin <achernavin@netgate.com>
Change-Id: Ifaf7325676d728580097bc389b51a9be39e44d88
-rw-r--r-- | src/plugins/wireguard/wireguard.h | 3 | ||||
-rw-r--r-- | src/plugins/wireguard/wireguard_input.c | 194 | ||||
-rw-r--r-- | src/plugins/wireguard/wireguard_output_tun.c | 235 | ||||
-rw-r--r-- | test/test_wireguard.py | 227 |
4 files changed, 580 insertions, 79 deletions
diff --git a/src/plugins/wireguard/wireguard.h b/src/plugins/wireguard/wireguard.h index 3a6248ba6b5..05cefc4f073 100644 --- a/src/plugins/wireguard/wireguard.h +++ b/src/plugins/wireguard/wireguard.h @@ -31,9 +31,12 @@ typedef struct wg_per_thread_data_t_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); vnet_crypto_op_t *crypto_ops; + vnet_crypto_op_t *chained_crypto_ops; + vnet_crypto_op_chunk_t *chunks; vnet_crypto_async_frame_t **async_frames; u8 data[WG_DEFAULT_DATA_SIZE]; } wg_per_thread_data_t; + typedef struct { /* convenience */ diff --git a/src/plugins/wireguard/wireguard_input.c b/src/plugins/wireguard/wireguard_input.c index 6b8c803c97d..db37fa54175 100644 --- a/src/plugins/wireguard/wireguard_input.c +++ b/src/plugins/wireguard/wireguard_input.c @@ -34,7 +34,7 @@ _ (HANDSHAKE_RECEIVE, "Failed while receiving Handshake") \ _ (COOKIE_DECRYPTION, "Failed during Cookie decryption") \ _ (COOKIE_SEND, "Failed during sending Cookie") \ - _ (TOO_BIG, "Packet too big") \ + _ (NO_BUFFERS, "No buffers") \ _ (UNDEFINED, "Undefined error") \ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") @@ -340,6 +340,7 @@ wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next, { next[0] = WG_INPUT_NEXT_PUNT; noise_keypair_t *kp; + vlib_buffer_t *lb; if ((kp = wg_get_active_keypair (&peer->remote, data->receiver_index)) == NULL) @@ -350,11 +351,16 @@ wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next, return -1; } - u16 encr_len = b->current_length - sizeof (message_data_t); + lb = b; + /* Find last buffer in the chain */ + while (lb->flags & VLIB_BUFFER_NEXT_PRESENT) + lb = vlib_get_buffer (vm, lb->next_buffer); + + u16 encr_len = vlib_buffer_length_in_chain (vm, b) - sizeof (message_data_t); u16 decr_len = encr_len - NOISE_AUTHTAG_LEN; vlib_buffer_advance (b, sizeof (message_data_t)); - b->current_length = decr_len; + vlib_buffer_chain_increase_length (b, lb, -NOISE_AUTHTAG_LEN); vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); /* Keepalive packet has zero length */ @@ -433,9 +439,75 @@ wg_input_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node, } } +static_always_inline void +wg_input_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_crypto_op_t *ops, vlib_buffer_t *b[], + u16 *nexts, vnet_crypto_op_chunk_t *chunks, + u16 drop_next) +{ + u32 n_fail, n_ops = vec_len (ops); + vnet_crypto_op_t *op = ops; + + if (n_ops == 0) + return; + + n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops); + + while (n_fail) + { + ASSERT (op - ops < n_ops); + + if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) + { + u32 bi = op->user_data; + b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION]; + nexts[bi] = drop_next; + n_fail--; + } + op++; + } +} + +static_always_inline void +wg_input_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start, + u32 start_len, u16 *n_ch) +{ + vnet_crypto_op_chunk_t *ch; + vlib_buffer_t *cb = b; + u32 n_chunks = 1; + + vec_add2 (ptd->chunks, ch, 1); + ch->len = start_len; + ch->src = ch->dst = start; + cb = vlib_get_buffer (vm, cb->next_buffer); + + while (1) + { + vec_add2 (ptd->chunks, ch, 1); + n_chunks += 1; + if (lb == cb) + ch->len = cb->current_length - NOISE_AUTHTAG_LEN; + else + ch->len = cb->current_length; + + ch->src = ch->dst = vlib_buffer_get_current (cb); + + if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + + cb = vlib_get_buffer (vm, cb->next_buffer); + } + + if (n_ch) + *n_ch = n_chunks; +} + always_inline void -wg_prepare_sync_dec_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, - u8 *src, u32 src_len, u8 *dst, u8 *aad, u32 aad_len, +wg_prepare_sync_dec_op (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, + vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len, + u8 *dst, u8 *aad, u32 aad_len, vnet_crypto_key_index_t key_index, u32 bi, u8 *iv) { vnet_crypto_op_t _op, *op = &_op; @@ -445,16 +517,28 @@ wg_prepare_sync_dec_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC); op->tag_len = NOISE_AUTHTAG_LEN; - op->tag = src + src_len; - op->src = !src ? src_ : src; - op->len = src_len; - op->dst = dst; + op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN; op->key_index = key_index; op->aad = aad; op->aad_len = aad_len; op->iv = iv; op->user_data = bi; op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK; + + if (b != lb) + { + /* Chained buffers */ + op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + op->chunk_index = vec_len (ptd->chunks); + wg_input_chain_crypto (vm, ptd, b, lb, src, src_len + NOISE_AUTHTAG_LEN, + &op->n_chunks); + } + else + { + op->src = !src ? src_ : src; + op->len = src_len; + op->dst = dst; + } } static_always_inline void @@ -485,10 +569,10 @@ static_always_inline enum noise_state_crypt wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd, vnet_crypto_op_t **crypto_ops, vnet_crypto_async_frame_t **async_frame, vlib_buffer_t *b, - u32 buf_idx, noise_remote_t *r, uint32_t r_idx, - uint64_t nonce, uint8_t *src, size_t srclen, uint8_t *dst, - u32 from_idx, u8 *iv, f64 time, u8 is_async, - u16 async_next_node) + vlib_buffer_t *lb, u32 buf_idx, noise_remote_t *r, + uint32_t r_idx, uint64_t nonce, uint8_t *src, size_t srclen, + size_t srclen_total, uint8_t *dst, u32 from_idx, u8 *iv, + f64 time, u8 is_async, u16 async_next_node) { noise_keypair_t *kp; enum noise_state_crypt ret = SC_FAILED; @@ -516,6 +600,12 @@ wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd, if (is_async) { + u8 flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK; + u8 *tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN; + + if (b != lb) + flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + if (NULL == *async_frame || vnet_crypto_async_frame_is_full (*async_frame)) { @@ -525,14 +615,14 @@ wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd, vec_add1 (ptd->async_frames, *async_frame); } - wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen, - src - b->data, buf_idx, async_next_node, iv, - src + srclen, VNET_CRYPTO_OP_FLAG_HMAC_CHECK); + wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen_total, + src - b->data, buf_idx, async_next_node, iv, tag, + flags); } else { - wg_prepare_sync_dec_op (vm, crypto_ops, src, srclen, dst, NULL, 0, - kp->kp_recv_index, from_idx, iv); + wg_prepare_sync_dec_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst, + NULL, 0, kp->kp_recv_index, from_idx, iv); } /* If we've received the handshake confirming data packet then move the @@ -605,8 +695,9 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 n_left_from = frame->n_vectors; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + vlib_buffer_t *lb; u32 thread_index = vm->thread_index; - vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; + vnet_crypto_op_t **crypto_ops; const u16 drop_next = WG_INPUT_NEXT_PUNT; message_type_t header_type; vlib_buffer_t *data_bufs[VLIB_FRAME_SIZE]; @@ -620,6 +711,8 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_get_buffers (vm, from, bufs, n_left_from); vec_reset_length (ptd->crypto_ops); + vec_reset_length (ptd->chained_crypto_ops); + vec_reset_length (ptd->chunks); vec_reset_length (ptd->async_frames); f64 time = clib_time_now (&vm->clib_time) + vm->time_offset; @@ -655,6 +748,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, message_data_t *data = vlib_buffer_get_current (b[0]); u8 *iv_data = b[0]->pre_data; u32 buf_idx = from[b - bufs]; + u32 n_bufs; peer_idx = wg_index_table_lookup (&wmp->index_table, data->receiver_index); @@ -701,21 +795,63 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto next; } - u16 encr_len = b[0]->current_length - sizeof (message_data_t); - u16 decr_len = encr_len - NOISE_AUTHTAG_LEN; - if (PREDICT_FALSE (decr_len >= WG_DEFAULT_DATA_SIZE)) + lb = b[0]; + n_bufs = vlib_buffer_chain_linearize (vm, b[0]); + if (n_bufs == 0) { - b[0]->error = node->errors[WG_INPUT_ERROR_TOO_BIG]; + other_next[n_other] = WG_INPUT_NEXT_ERROR; + b[0]->error = node->errors[WG_INPUT_ERROR_NO_BUFFERS]; other_bi[n_other] = buf_idx; n_other += 1; goto out; } - enum noise_state_crypt state_cr = wg_input_process ( - vm, ptd, crypto_ops, &async_frame, b[0], buf_idx, &peer->remote, - data->receiver_index, data->counter, data->encrypted_data, - decr_len, data->encrypted_data, n_data, iv_data, time, is_async, - async_next_node); + if (n_bufs > 1) + { + vlib_buffer_t *before_last = b[0]; + + /* Find last and before last buffer in the chain */ + while (lb->flags & VLIB_BUFFER_NEXT_PRESENT) + { + before_last = lb; + lb = vlib_get_buffer (vm, lb->next_buffer); + } + + /* Ensure auth tag is contiguous and not splitted into two last + * buffers */ + if (PREDICT_FALSE (lb->current_length < NOISE_AUTHTAG_LEN)) + { + u32 len_diff = NOISE_AUTHTAG_LEN - lb->current_length; + + before_last->current_length -= len_diff; + if (before_last == b[0]) + before_last->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + + vlib_buffer_advance (lb, (signed) -len_diff); + + clib_memcpy_fast (vlib_buffer_get_current (lb), + vlib_buffer_get_tail (before_last), + len_diff); + } + } + + u16 encr_len = b[0]->current_length - sizeof (message_data_t); + u16 decr_len = encr_len - NOISE_AUTHTAG_LEN; + u16 encr_len_total = + vlib_buffer_length_in_chain (vm, b[0]) - sizeof (message_data_t); + u16 decr_len_total = encr_len_total - NOISE_AUTHTAG_LEN; + + if (lb != b[0]) + crypto_ops = &ptd->chained_crypto_ops; + else + crypto_ops = &ptd->crypto_ops; + + enum noise_state_crypt state_cr = + wg_input_process (vm, ptd, crypto_ops, &async_frame, b[0], lb, + buf_idx, &peer->remote, data->receiver_index, + data->counter, data->encrypted_data, decr_len, + decr_len_total, data->encrypted_data, n_data, + iv_data, time, is_async, async_next_node); if (PREDICT_FALSE (state_cr == SC_FAILED)) { @@ -796,6 +932,8 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* decrypt packets */ wg_input_process_ops (vm, node, ptd->crypto_ops, data_bufs, data_nexts, drop_next); + wg_input_process_chained_ops (vm, node, ptd->chained_crypto_ops, data_bufs, + data_nexts, ptd->chunks, drop_next); /* process after decryption */ b = data_bufs; diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c index 4d85a5912d6..4ff1621b4a3 100644 --- a/src/plugins/wireguard/wireguard_output_tun.c +++ b/src/plugins/wireguard/wireguard_output_tun.c @@ -25,7 +25,7 @@ _ (NONE, "No error") \ _ (PEER, "Peer error") \ _ (KEYPAIR, "Keypair error") \ - _ (TOO_BIG, "packet too big") \ + _ (NO_BUFFERS, "No buffers") \ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") typedef enum @@ -115,10 +115,46 @@ format_wg_output_tun_post_trace (u8 *s, va_list *args) } static_always_inline void -wg_prepare_sync_enc_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, - u8 *src, u32 src_len, u8 *dst, u8 *aad, u32 aad_len, - u64 nonce, vnet_crypto_key_index_t key_index, u32 bi, - u8 *iv) +wg_output_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start, + u32 start_len, u16 *n_ch) +{ + vnet_crypto_op_chunk_t *ch; + vlib_buffer_t *cb = b; + u32 n_chunks = 1; + + vec_add2 (ptd->chunks, ch, 1); + ch->len = start_len; + ch->src = ch->dst = start; + cb = vlib_get_buffer (vm, cb->next_buffer); + + while (1) + { + vec_add2 (ptd->chunks, ch, 1); + n_chunks += 1; + if (lb == cb) + ch->len = cb->current_length - NOISE_AUTHTAG_LEN; + else + ch->len = cb->current_length; + + ch->src = ch->dst = vlib_buffer_get_current (cb); + + if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + + cb = vlib_get_buffer (vm, cb->next_buffer); + } + + if (n_ch) + *n_ch = n_chunks; +} + +static_always_inline void +wg_prepare_sync_enc_op (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, + vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len, + u8 *dst, u8 *aad, u32 aad_len, u64 nonce, + vnet_crypto_key_index_t key_index, u32 bi, u8 *iv) { vnet_crypto_op_t _op, *op = &_op; u8 src_[] = {}; @@ -130,15 +166,55 @@ wg_prepare_sync_enc_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC); op->tag_len = NOISE_AUTHTAG_LEN; - op->tag = dst + src_len; - op->src = !src ? src_ : src; - op->len = src_len; - op->dst = dst; + op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN; op->key_index = key_index; op->aad = aad; op->aad_len = aad_len; op->iv = iv; op->user_data = bi; + + if (b != lb) + { + /* Chained buffers */ + op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + op->chunk_index = vec_len (ptd->chunks); + wg_output_chain_crypto (vm, ptd, b, lb, src, src_len, &op->n_chunks); + } + else + { + op->src = !src ? src_ : src; + op->len = src_len; + op->dst = dst; + } +} + +static_always_inline void +wg_output_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_crypto_op_t *ops, vlib_buffer_t *b[], + u16 *nexts, vnet_crypto_op_chunk_t *chunks, + u16 drop_next) +{ + u32 n_fail, n_ops = vec_len (ops); + vnet_crypto_op_t *op = ops; + + if (n_ops == 0) + return; + + n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops); + + while (n_fail) + { + ASSERT (op - ops < n_ops); + + if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED) + { + u32 bi = op->user_data; + b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR]; + nexts[bi] = drop_next; + n_fail--; + } + op++; + } } static_always_inline void @@ -194,10 +270,11 @@ wg_output_tun_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f, } static_always_inline enum noise_state_crypt -wq_output_tun_process (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, - noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce, - uint8_t *src, size_t srclen, uint8_t *dst, u32 bi, - u8 *iv, f64 time) +wg_output_tun_process (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vlib_buffer_t *b, vlib_buffer_t *lb, + vnet_crypto_op_t **crypto_ops, noise_remote_t *r, + uint32_t *r_idx, uint64_t *nonce, uint8_t *src, + size_t srclen, uint8_t *dst, u32 bi, u8 *iv, f64 time) { noise_keypair_t *kp; enum noise_state_crypt ret = SC_FAILED; @@ -223,8 +300,8 @@ wq_output_tun_process (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, * are passed back out to the caller through the provided data pointer. */ *r_idx = kp->kp_remote_index; - wg_prepare_sync_enc_op (vm, crypto_ops, src, srclen, dst, NULL, 0, *nonce, - kp->kp_send_index, bi, iv); + wg_prepare_sync_enc_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst, NULL, + 0, *nonce, kp->kp_send_index, bi, iv); /* If our values are still within tolerances, but we are approaching * the tolerances, we notify the caller with ESTALE that they should @@ -247,12 +324,14 @@ error: static_always_inline enum noise_state_crypt wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd, vnet_crypto_async_frame_t **async_frame, - vlib_buffer_t *b, u8 *payload, u32 payload_len, u32 bi, - u16 next, u16 async_next, noise_remote_t *r, - uint32_t *r_idx, uint64_t *nonce, u8 *iv, f64 time) + vlib_buffer_t *b, vlib_buffer_t *lb, u8 *payload, + u32 payload_len, u32 bi, u16 next, u16 async_next, + noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce, + u8 *iv, f64 time) { wg_post_data_t *post = wg_post_data (b); u8 flag = 0; + u8 *tag; noise_keypair_t *kp; post->next_index = next; @@ -293,10 +372,15 @@ wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd, vec_add1 (ptd->async_frames, *async_frame); } + if (b != lb) + flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + + tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN; + /* this always succeeds because we know the frame is not full */ wg_output_tun_add_to_frame (vm, *async_frame, kp->kp_send_index, payload_len, - payload - b->data, bi, async_next, iv, - payload + payload_len, flag); + payload - b->data, bi, async_next, iv, tag, + flag); /* If our values are still within tolerances, but we are approaching * the tolerances, we notify the caller with ESTALE that they should @@ -346,7 +430,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, ip6_udp_wg_header_t *hdr6_out = NULL; message_data_t *message_data_wg = NULL; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; + vlib_buffer_t *lb; + vnet_crypto_op_t **crypto_ops; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE]; u32 thread_index = vm->thread_index; @@ -362,6 +447,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_get_buffers (vm, from, bufs, n_left_from); vec_reset_length (ptd->crypto_ops); + vec_reset_length (ptd->chained_crypto_ops); + vec_reset_length (ptd->chunks); vec_reset_length (ptd->async_frames); wg_peer_t *peer = NULL; @@ -377,6 +464,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u8 is_ip4_out = 1; u8 *plain_data; u16 plain_data_len; + u16 plain_data_len_total; + u16 n_bufs; + u16 b_space_left_at_beginning; + u32 bi = from[b - bufs]; if (n_left_from > 2) { @@ -432,34 +523,72 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto out; } - iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length; - plain_data_len = vlib_buffer_length_in_chain (vm, b[0]) - iph_offset; - u8 *iv_data = b[0]->pre_data; + lb = b[0]; + n_bufs = vlib_buffer_chain_linearize (vm, b[0]); + if (n_bufs == 0) + { + b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS]; + goto out; + } - size_t encrypted_packet_len = message_data_len (plain_data_len); + if (n_bufs > 1) + { + /* Find last buffer in the chain */ + while (lb->flags & VLIB_BUFFER_NEXT_PRESENT) + lb = vlib_get_buffer (vm, lb->next_buffer); + } - /* - * Ensure there is enough space to write the encrypted data - * into the packet + /* Ensure there is enough free space at the beginning of the first buffer + * to write ethernet header (e.g. IPv6 VxLAN over IPv6 Wireguard will + * trigger this) */ - if (PREDICT_FALSE (encrypted_packet_len >= WG_DEFAULT_DATA_SIZE) || - PREDICT_FALSE ((iph_offset + encrypted_packet_len) >= - vlib_buffer_get_default_data_size (vm))) + ASSERT ((signed) b[0]->current_data >= + (signed) -VLIB_BUFFER_PRE_DATA_SIZE); + b_space_left_at_beginning = + b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE; + if (PREDICT_FALSE (b_space_left_at_beginning < + sizeof (ethernet_header_t))) { - b[0]->error = node->errors[WG_OUTPUT_ERROR_TOO_BIG]; - goto out; + u32 size_diff = + sizeof (ethernet_header_t) - b_space_left_at_beginning; + + /* Can only move buffer when it's single and has enough free space*/ + if (lb == b[0] && + vlib_buffer_space_left_at_end (vm, b[0]) >= size_diff) + { + vlib_buffer_move (vm, b[0], + b[0]->current_data + (signed) size_diff); + } + else + { + b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS]; + goto out; + } } /* - * Move the buffer to fit ethernet header - */ - if (b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE < - sizeof (ethernet_header_t)) + * Ensure there is enough free space at the end of the last buffer to + * write auth tag */ + if (PREDICT_FALSE (vlib_buffer_space_left_at_end (vm, lb) < + NOISE_AUTHTAG_LEN)) { - vlib_buffer_move (vm, b[0], 0); + u32 tmp_bi = 0; + if (vlib_buffer_alloc (vm, &tmp_bi, 1) != 1) + { + b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS]; + goto out; + } + lb = vlib_buffer_chain_buffer (vm, lb, tmp_bi); } + iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length; plain_data = vlib_buffer_get_current (b[0]) + iph_offset; + plain_data_len = b[0]->current_length - iph_offset; + plain_data_len_total = + vlib_buffer_length_in_chain (vm, b[0]) - iph_offset; + size_t encrypted_packet_len = message_data_len (plain_data_len_total); + vlib_buffer_chain_increase_length (b[0], lb, NOISE_AUTHTAG_LEN); + u8 *iv_data = b[0]->pre_data; is_ip4_out = ip46_address_is_ip4 (&peer->src.addr); if (is_ip4_out) @@ -484,22 +613,27 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* Here we are sure that can send packet to next node */ next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT; + if (lb != b[0]) + crypto_ops = &ptd->chained_crypto_ops; + else + crypto_ops = &ptd->crypto_ops; + enum noise_state_crypt state; if (is_async) { state = wg_add_to_async_frame ( - vm, ptd, &async_frame, b[0], plain_data, plain_data_len, - from[b - bufs], next[0], async_next_node, &peer->remote, + vm, ptd, &async_frame, b[0], lb, plain_data, plain_data_len_total, + bi, next[0], async_next_node, &peer->remote, &message_data_wg->receiver_index, &message_data_wg->counter, iv_data, time); } else { - state = wq_output_tun_process ( - vm, crypto_ops, &peer->remote, &message_data_wg->receiver_index, - &message_data_wg->counter, plain_data, plain_data_len, plain_data, - n_sync, iv_data, time); + state = wg_output_tun_process ( + vm, ptd, b[0], lb, crypto_ops, &peer->remote, + &message_data_wg->receiver_index, &message_data_wg->counter, + plain_data, plain_data_len, plain_data, n_sync, iv_data, time); } if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH)) @@ -522,10 +656,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, hdr4_out->wg.header.type = MESSAGE_DATA; hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len + sizeof (udp_header_t)); - b[0]->current_length = - (encrypted_packet_len + sizeof (ip4_udp_header_t)); ip4_header_set_len_w_chksum ( - &hdr4_out->ip4, clib_host_to_net_u16 (b[0]->current_length)); + &hdr4_out->ip4, clib_host_to_net_u16 (encrypted_packet_len + + sizeof (ip4_udp_header_t))); } else { @@ -533,8 +666,6 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, hdr6_out->ip6.payload_length = hdr6_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len + sizeof (udp_header_t)); - b[0]->current_length = - (encrypted_packet_len + sizeof (ip6_udp_header_t)); } out: @@ -555,14 +686,14 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next: if (PREDICT_FALSE (err != WG_OUTPUT_NEXT_INTERFACE_OUTPUT)) { - noop_bi[n_noop] = from[b - bufs]; + noop_bi[n_noop] = bi; n_noop++; noop_next++; goto next_left; } if (!is_async) { - sync_bi[n_sync] = from[b - bufs]; + sync_bi[n_sync] = bi; sync_bufs[n_sync] = b[0]; n_sync += 1; next += 1; @@ -581,6 +712,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* wg-output-process-ops */ wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts, drop_next); + wg_output_process_chained_ops (vm, node, ptd->chained_crypto_ops, + sync_bufs, nexts, ptd->chunks, drop_next); int n_left_from_sync_bufs = n_sync; while (n_left_from_sync_bufs > 0) diff --git a/test/test_wireguard.py b/test/test_wireguard.py index b9713f6fc08..e63508af459 100644 --- a/test/test_wireguard.py +++ b/test/test_wireguard.py @@ -11,6 +11,7 @@ from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP from scapy.layers.inet import IP, UDP from scapy.layers.inet6 import IPv6 +from scapy.layers.vxlan import VXLAN from scapy.contrib.wireguard import ( Wireguard, WireguardResponse, @@ -40,6 +41,8 @@ from vpp_ipip_tun_interface import VppIpIpTunInterface from vpp_interface import VppInterface from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_l2 import VppBridgeDomain, VppBridgeDomainPort +from vpp_vxlan_tunnel import VppVxlanTunnel from vpp_object import VppObject from vpp_papi import VppEnum from framework import is_distro_ubuntu2204, is_distro_debian11, tag_fixme_vpp_debug @@ -470,6 +473,7 @@ class VppWgPeer(VppObject): return self.noise.encrypt(bytes(p)) def validate_encapped(self, rxs, tx, is_tunnel_ip6=False, is_transport_ip6=False): + ret_rxs = [] for rx in rxs: rx = self.decrypt_transport(rx, is_tunnel_ip6) if is_transport_ip6 is False: @@ -482,6 +486,8 @@ class VppWgPeer(VppObject): # check the original packet is present self._test.assertEqual(rx[IPv6].dst, tx[IPv6].dst) self._test.assertEqual(rx[IPv6].hlim, tx[IPv6].hlim - 1) + ret_rxs.append(rx) + return ret_rxs def want_events(self): self._test.vapi.want_wireguard_peer_events( @@ -2510,6 +2516,227 @@ class TestWg(VppTestCase): peer_1.remove_vpp_config() wg0.remove_vpp_config() + def _test_wg_large_packet_tmpl(self, is_async, is_ip6): + self.vapi.wg_set_async_mode(is_async) + port = 12323 + + # create wg interface + if is_ip6: + wg0 = VppWgInterface(self, self.pg1.local_ip6, port).add_vpp_config() + wg0.admin_up() + wg0.config_ip6() + else: + wg0 = VppWgInterface(self, self.pg1.local_ip4, port).add_vpp_config() + wg0.admin_up() + wg0.config_ip4() + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # create a peer + if is_ip6: + peer_1 = VppWgPeer( + self, wg0, self.pg1.remote_ip6, port + 1, ["1::3:0/112"] + ).add_vpp_config() + else: + peer_1 = VppWgPeer( + self, wg0, self.pg1.remote_ip4, port + 1, ["10.11.3.0/24"] + ).add_vpp_config() + self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1) + + # create a route to rewrite traffic into the wg interface + if is_ip6: + r1 = VppIpRoute( + self, "1::3:0", 112, [VppRoutePath("1::3:1", wg0.sw_if_index)] + ).add_vpp_config() + else: + r1 = VppIpRoute( + self, "10.11.3.0", 24, [VppRoutePath("10.11.3.1", wg0.sw_if_index)] + ).add_vpp_config() + + # wait for the peer to send a handshake initiation + rxs = self.pg1.get_capture(1, timeout=2) + + # prepare and send a handshake response + # expect a keepalive message + resp = peer_1.consume_init(rxs[0], self.pg1, is_ip6=is_ip6) + rxs = self.send_and_expect(self.pg1, [resp], self.pg1) + + # verify the keepalive message + b = peer_1.decrypt_transport(rxs[0], is_ip6=is_ip6) + self.assertEqual(0, len(b)) + + # prepare and send data packets + # expect to receive them decrypted + if is_ip6: + ip_header = IPv6(src="1::3:1", dst=self.pg0.remote_ip6, hlim=20) + else: + ip_header = IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20) + packet_len_opts = ( + 2500, # two buffers + 1500, # one buffer + 4500, # three buffers + 1910 if is_ip6 else 1950, # auth tag is not contiguous + ) + txs = [] + for l in packet_len_opts: + txs.append( + peer_1.mk_tunnel_header(self.pg1, is_ip6=is_ip6) + / Wireguard(message_type=4, reserved_zero=0) + / WireguardTransport( + receiver_index=peer_1.sender, + counter=len(txs), + encrypted_encapsulated_packet=peer_1.encrypt_transport( + ip_header / UDP(sport=222, dport=223) / Raw(b"\xfe" * l) + ), + ) + ) + rxs = self.send_and_expect(self.pg1, txs, self.pg0) + + # verify decrypted packets + for i, l in enumerate(packet_len_opts): + if is_ip6: + self.assertEqual(rxs[i][IPv6].dst, self.pg0.remote_ip6) + self.assertEqual(rxs[i][IPv6].hlim, ip_header.hlim - 1) + else: + self.assertEqual(rxs[i][IP].dst, self.pg0.remote_ip4) + self.assertEqual(rxs[i][IP].ttl, ip_header.ttl - 1) + self.assertEqual(len(rxs[i][Raw]), l) + self.assertEqual(bytes(rxs[i][Raw]), b"\xfe" * l) + + # prepare and send packets that will be rewritten into the wg interface + # expect data packets sent + if is_ip6: + ip_header = IPv6(src=self.pg0.remote_ip6, dst="1::3:2") + else: + ip_header = IP(src=self.pg0.remote_ip4, dst="10.11.3.2") + packet_len_opts = ( + 2500, # two buffers + 1500, # one buffer + 4500, # three buffers + 1980 if is_ip6 else 2000, # no free space to write auth tag + ) + txs = [] + for l in packet_len_opts: + txs.append( + Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) + / ip_header + / UDP(sport=555, dport=556) + / Raw(b"\xfe" * l) + ) + rxs = self.send_and_expect(self.pg0, txs, self.pg1) + + # verify the data packets + rxs_decrypted = peer_1.validate_encapped( + rxs, ip_header, is_tunnel_ip6=is_ip6, is_transport_ip6=is_ip6 + ) + + for i, l in enumerate(packet_len_opts): + self.assertEqual(len(rxs_decrypted[i][Raw]), l) + self.assertEqual(bytes(rxs_decrypted[i][Raw]), b"\xfe" * l) + + # remove configs + r1.remove_vpp_config() + peer_1.remove_vpp_config() + wg0.remove_vpp_config() + + def test_wg_large_packet_v4_sync(self): + """Large packet (v4, sync)""" + self._test_wg_large_packet_tmpl(is_async=False, is_ip6=False) + + def test_wg_large_packet_v6_sync(self): + """Large packet (v6, sync)""" + self._test_wg_large_packet_tmpl(is_async=False, is_ip6=True) + + def test_wg_large_packet_v4_async(self): + """Large packet (v4, async)""" + self._test_wg_large_packet_tmpl(is_async=True, is_ip6=False) + + def test_wg_large_packet_v6_async(self): + """Large packet (v6, async)""" + self._test_wg_large_packet_tmpl(is_async=True, is_ip6=True) + + def test_wg_lack_of_buf_headroom(self): + """Lack of buffer's headroom (v6 vxlan over v6 wg)""" + port = 12323 + + # create wg interface + wg0 = VppWgInterface(self, self.pg1.local_ip6, port).add_vpp_config() + wg0.admin_up() + wg0.config_ip6() + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # create a peer + peer_1 = VppWgPeer( + self, wg0, self.pg1.remote_ip6, port + 1, ["::/0"] + ).add_vpp_config() + self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1) + + # create a route to enable communication between wg interface addresses + r1 = VppIpRoute( + self, wg0.remote_ip6, 128, [VppRoutePath("0.0.0.0", wg0.sw_if_index)] + ).add_vpp_config() + + # wait for the peer to send a handshake initiation + rxs = self.pg1.get_capture(1, timeout=2) + + # prepare and send a handshake response + # expect a keepalive message + resp = peer_1.consume_init(rxs[0], self.pg1, is_ip6=True) + rxs = self.send_and_expect(self.pg1, [resp], self.pg1) + + # verify the keepalive message + b = peer_1.decrypt_transport(rxs[0], is_ip6=True) + self.assertEqual(0, len(b)) + + # create vxlan interface over the wg interface + vxlan0 = VppVxlanTunnel(self, src=wg0.local_ip6, dst=wg0.remote_ip6, vni=1111) + vxlan0.add_vpp_config() + + # create bridge domain + bd1 = VppBridgeDomain(self, bd_id=1) + bd1.add_vpp_config() + + # add the vxlan interface and pg0 to the bridge domain + bd1_ports = ( + VppBridgeDomainPort(self, bd1, vxlan0).add_vpp_config(), + VppBridgeDomainPort(self, bd1, self.pg0).add_vpp_config(), + ) + + # prepare and send packets that will be rewritten into the vxlan interface + # expect they to be rewritten into the wg interface then and data packets sent + tx = ( + Ether(dst="00:00:00:00:00:01", src="00:00:00:00:00:02") + / IPv6(src="::1", dst="::2", hlim=20) + / UDP(sport=1111, dport=1112) + / Raw(b"\xfe" * 1900) + ) + rxs = self.send_and_expect(self.pg0, [tx] * 5, self.pg1) + + # verify the data packet + for rx in rxs: + rx_decrypted = IPv6(peer_1.decrypt_transport(rx, is_ip6=True)) + + self.assertEqual(rx_decrypted[VXLAN].vni, vxlan0.vni) + inner = rx_decrypted[VXLAN].payload + + # check the original packet is present + self.assertEqual(inner[IPv6].dst, tx[IPv6].dst) + self.assertEqual(inner[IPv6].hlim, tx[IPv6].hlim) + self.assertEqual(len(inner[Raw]), len(tx[Raw])) + self.assertEqual(bytes(inner[Raw]), bytes(tx[Raw])) + + # remove configs + for bdp in bd1_ports: + bdp.remove_vpp_config() + bd1.remove_vpp_config() + vxlan0.remove_vpp_config() + r1.remove_vpp_config() + peer_1.remove_vpp_config() + wg0.remove_vpp_config() + @tag_fixme_vpp_debug class WireguardHandoffTests(TestWg): |