From 243a0433ff05a37113d6890bbeb163bbea033687 Mon Sep 17 00:00:00 2001 From: Dou Chao Date: Tue, 29 Nov 2022 19:41:34 +0800 Subject: vcl: enable gso for 'sendmsg' in LDP mode. Some upon apps(e.g. Nginx-quic) package it's several protocol buffers into a struct msg which is a combination of gso_buffer and gso_size. but if HostStack regardless the gso_size to the buffer and split the buffer with default mss, that cause peer client failed on parsing the package. Type: improvement Signed-off-by: Dou Chao Change-Id: I805eb642be826038ba96d1b85dad8ec0c0f6c459 Signed-off-by: Dou Chao --- src/plugins/quic/quic.c | 1 + src/vcl/ldp.c | 32 +++++++++++++++++++++++++++----- src/vcl/vppcom.c | 28 +++++++++++++++++----------- src/vcl/vppcom.h | 18 ++++++++++++++++++ src/vnet/session/application_interface.h | 14 +++++++++----- src/vnet/session/session_node.c | 5 +++++ src/vnet/session/session_types.h | 6 +++--- 7 files changed, 80 insertions(+), 24 deletions(-) diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index e4ea38cfb48..898846d8ea4 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -676,6 +676,7 @@ quic_send_datagram (session_t *udp_session, struct iovec *packet, hdr.is_ip4 = tc->is_ip4; clib_memcpy (&hdr.lcl_ip, &tc->lcl_ip, sizeof (ip46_address_t)); hdr.lcl_port = tc->lcl_port; + hdr.gso_size = 0; /* Read dest address from quicly-provided sockaddr */ if (hdr.is_ip4) diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index 73a5bc20cb4..522e85d9719 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -1539,13 +1540,19 @@ __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags) return recv (fd, buf, n, flags); } -static int -ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags, +static inline int +ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, + vppcom_endpt_tlv_t *ep_tlv, int flags, __CONST_SOCKADDR_ARG addr, socklen_t addr_len) { vppcom_endpt_t *ep = 0; vppcom_endpt_t _ep; + if (ep_tlv) + { + _ep.app_data = *ep_tlv; + } + if (addr) { ep = &_ep; @@ -1614,7 +1621,7 @@ sendto (int fd, const void *buf, size_t n, int flags, vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { - size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len); + size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len); if (size < 0) { errno = -size; @@ -1670,11 +1677,26 @@ sendmsg (int fd, const struct msghdr * msg, int flags) struct iovec *iov = msg->msg_iov; ssize_t total = 0; int i, rv = 0; + struct cmsghdr *cmsg; + uint16_t *valp; + vppcom_endpt_tlv_t _app_data; + vppcom_endpt_tlv_t *p_app_data = NULL; + + cmsg = CMSG_FIRSTHDR (msg); + if (cmsg && cmsg->cmsg_type == UDP_SEGMENT) + { + p_app_data = &_app_data; + valp = (void *) CMSG_DATA (cmsg); + p_app_data->data_type = VCL_UDP_SEGMENT; + p_app_data->data_len = sizeof (*valp); + p_app_data->value = *valp; + } for (i = 0; i < msg->msg_iovlen; ++i) { - rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags, - msg->msg_name, msg->msg_namelen); + rv = + ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, p_app_data, + flags, msg->msg_name, msg->msg_namelen); if (rv < 0) break; else diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 3538a36f508..57355967900 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -2227,8 +2227,8 @@ vcl_fifo_is_writeable (svm_fifo_t * f, u32 len, u8 is_dgram) } always_inline int -vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf, - size_t n, u8 is_flush, u8 is_dgram) +vppcom_session_write_inline (vcl_worker_t *wrk, vcl_session_t *s, void *buf, + size_t n, u16 gso_size, u8 is_flush, u8 is_dgram) { int n_write, is_nonblocking; session_evt_type_t et; @@ -2293,9 +2293,9 @@ vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf, et = SESSION_IO_EVT_TX_FLUSH; if (is_dgram) - n_write = app_send_dgram_raw (tx_fifo, &s->transport, - s->vpp_evt_q, buf, n, et, - 0 /* do_evt */ , SVM_Q_WAIT); + n_write = + app_send_dgram_raw_gso (tx_fifo, &s->transport, s->vpp_evt_q, buf, n, + gso_size, et, 0 /* do_evt */, SVM_Q_WAIT); else n_write = app_send_stream_raw (tx_fifo, s->vpp_evt_q, buf, n, et, 0 /* do_evt */ , SVM_Q_WAIT); @@ -2324,8 +2324,8 @@ vppcom_session_write (uint32_t session_handle, void *buf, size_t n) if (PREDICT_FALSE (!s)) return VPPCOM_EBADFD; - return vppcom_session_write_inline (wrk, s, buf, n, - 0 /* is_flush */ , s->is_dgram ? 1 : 0); + return vppcom_session_write_inline (wrk, s, buf, n, 0, 0 /* is_flush */, + s->is_dgram ? 1 : 0); } int @@ -2338,8 +2338,8 @@ vppcom_session_write_msg (uint32_t session_handle, void *buf, size_t n) if (PREDICT_FALSE (!s)) return VPPCOM_EBADFD; - return vppcom_session_write_inline (wrk, s, buf, n, - 1 /* is_flush */ , s->is_dgram ? 1 : 0); + return vppcom_session_write_inline (wrk, s, buf, n, 0, 1 /* is_flush */, + s->is_dgram ? 1 : 0); } #define vcl_fifo_rx_evt_valid_or_break(_s) \ @@ -4008,7 +4008,6 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, VDBG (2, "VPPCOM_ATTR_GET_TCP_USER_MSS: %d, buflen %d", *(int *) buffer, *buflen); break; - case VPPCOM_ATTR_SET_TCP_USER_MSS: if (!(buffer && buflen && (*buflen == sizeof (u32)))) { @@ -4151,6 +4150,7 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer, { vcl_worker_t *wrk = vcl_worker_get_current (); vcl_session_t *s; + u16 gso_size = 0; s = vcl_session_get_w_handle (wrk, session_handle); if (PREDICT_FALSE (!s)) @@ -4165,6 +4165,12 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer, s->transport.rmt_port = ep->port; vcl_ip_copy_from_ep (&s->transport.rmt_ip, ep); + vppcom_endpt_tlv_t *p_app_data = &ep->app_data; + + if (p_app_data && (p_app_data->data_type == VCL_UDP_SEGMENT)) + { + gso_size = p_app_data->value; + } /* Session not connected/bound in vpp. Create it by 'connecting' it */ if (PREDICT_FALSE (s->session_state == VCL_STATE_CLOSED)) { @@ -4188,7 +4194,7 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer, VDBG (2, "handling flags 0x%u (%d) not implemented yet.", flags, flags); } - return (vppcom_session_write_inline (wrk, s, buffer, buflen, 1, + return (vppcom_session_write_inline (wrk, s, buffer, buflen, gso_size, 1, s->is_dgram ? 1 : 0)); } diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h index 08724cc31d8..c9f7b8b9403 100644 --- a/src/vcl/vppcom.h +++ b/src/vcl/vppcom.h @@ -63,6 +63,23 @@ extern "C" VPPCOM_IS_IP4, } vppcom_is_ip4_t; +#define VCL_UDP_OPTS_BASE (VPPCOM_PROTO_UDP << 16) +#define VCL_UDP_SEGMENT (VCL_UDP_OPTS_BASE + 0) + + typedef struct vppcom_endpt_tlv_t_ + { + uint32_t data_type; + uint32_t data_len; + union + { + /* data */ + uint64_t value; + uint32_t as_u32[2]; + uint16_t as_u16[4]; + uint8_t as_u8[8]; + }; + } vppcom_endpt_tlv_t; + typedef struct vppcom_endpt_t_ { uint8_t is_cut_thru; @@ -70,6 +87,7 @@ extern "C" uint8_t *ip; uint16_t port; uint64_t parent_handle; + vppcom_endpt_tlv_t app_data; } vppcom_endpt_t; typedef uint32_t vcl_session_handle_t; diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index e634a06b532..138953b4ed4 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -643,14 +643,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type, } } +#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \ + noblock) \ + app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \ + noblock) + always_inline int -app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, - svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type, - u8 do_evt, u8 noblock) +app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at, + svm_msg_q_t *vpp_evt_q, u8 *data, u32 len, + u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock) { session_dgram_hdr_t hdr; int rv; - if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len)) return 0; @@ -661,7 +665,7 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, hdr.rmt_port = at->rmt_port; clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t)); hdr.lcl_port = at->lcl_port; - + hdr.gso_size = gso_size; /* *INDENT-OFF* */ svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }}; /* *INDENT-ON* */ diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index be00925bb00..150da259da6 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -1222,6 +1222,11 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset); len = ctx->hdr.data_length - ctx->hdr.data_offset; + if (ctx->hdr.gso_size) + { + ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size); + } + /* Process multiple dgrams if smaller than min (buf_space, mss). * This avoids handling multiple dgrams if they require buffer * chains */ diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 95a88c5ab6e..dcbbd72ef8e 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -461,12 +461,12 @@ typedef struct session_dgram_header_ u16 rmt_port; u16 lcl_port; u8 is_ip4; + u16 gso_size; } __clib_packed session_dgram_hdr_t; #define SESSION_CONN_ID_LEN 37 -#define SESSION_CONN_HDR_LEN 45 - -STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), +#define SESSION_CONN_HDR_LEN 47 +STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10), "session conn id wrong length"); #define foreach_session_error \ -- cgit 1.2.3-korg