aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDou Chao <chao.dou@intel.com>2022-11-29 19:41:34 +0800
committerFlorin Coras <florin.coras@gmail.com>2022-12-14 16:57:14 +0000
commit243a0433ff05a37113d6890bbeb163bbea033687 (patch)
tree1ac3ec8b63ed22649a30f1c6666ac5460a1ff9b3
parent4b9935cd54e5ca31c192cb9113e3056016f0b728 (diff)
vcl: enable gso for 'sendmsg' in LDP mode.
Some upon apps(e.g. Nginx-quic) package it's several protocol buffers into a struct msg which is a combination of gso_buffer and gso_size. but if HostStack regardless the gso_size to the buffer and split the buffer with default mss, that cause peer client failed on parsing the package. Type: improvement Signed-off-by: Dou Chao <chao.dou@intel.com> Change-Id: I805eb642be826038ba96d1b85dad8ec0c0f6c459 Signed-off-by: Dou Chao <chao.dou@intel.com>
-rw-r--r--src/plugins/quic/quic.c1
-rw-r--r--src/vcl/ldp.c32
-rw-r--r--src/vcl/vppcom.c28
-rw-r--r--src/vcl/vppcom.h18
-rw-r--r--src/vnet/session/application_interface.h14
-rw-r--r--src/vnet/session/session_node.c5
-rw-r--r--src/vnet/session/session_types.h6
7 files changed, 80 insertions, 24 deletions
diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c
index e4ea38cfb48..898846d8ea4 100644
--- a/src/plugins/quic/quic.c
+++ b/src/plugins/quic/quic.c
@@ -676,6 +676,7 @@ quic_send_datagram (session_t *udp_session, struct iovec *packet,
hdr.is_ip4 = tc->is_ip4;
clib_memcpy (&hdr.lcl_ip, &tc->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = tc->lcl_port;
+ hdr.gso_size = 0;
/* Read dest address from quicly-provided sockaddr */
if (hdr.is_ip4)
diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c
index 73a5bc20cb4..522e85d9719 100644
--- a/src/vcl/ldp.c
+++ b/src/vcl/ldp.c
@@ -21,6 +21,7 @@
#include <stdarg.h>
#include <sys/resource.h>
#include <netinet/tcp.h>
+#include <linux/udp.h>
#include <vcl/ldp_socket_wrapper.h>
#include <vcl/ldp.h>
@@ -1539,13 +1540,19 @@ __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
return recv (fd, buf, n, flags);
}
-static int
-ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
+static inline int
+ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
+ vppcom_endpt_tlv_t *ep_tlv, int flags,
__CONST_SOCKADDR_ARG addr, socklen_t addr_len)
{
vppcom_endpt_t *ep = 0;
vppcom_endpt_t _ep;
+ if (ep_tlv)
+ {
+ _ep.app_data = *ep_tlv;
+ }
+
if (addr)
{
ep = &_ep;
@@ -1614,7 +1621,7 @@ sendto (int fd, const void *buf, size_t n, int flags,
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len);
+ size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
if (size < 0)
{
errno = -size;
@@ -1670,11 +1677,26 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
struct iovec *iov = msg->msg_iov;
ssize_t total = 0;
int i, rv = 0;
+ struct cmsghdr *cmsg;
+ uint16_t *valp;
+ vppcom_endpt_tlv_t _app_data;
+ vppcom_endpt_tlv_t *p_app_data = NULL;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+ if (cmsg && cmsg->cmsg_type == UDP_SEGMENT)
+ {
+ p_app_data = &_app_data;
+ valp = (void *) CMSG_DATA (cmsg);
+ p_app_data->data_type = VCL_UDP_SEGMENT;
+ p_app_data->data_len = sizeof (*valp);
+ p_app_data->value = *valp;
+ }
for (i = 0; i < msg->msg_iovlen; ++i)
{
- rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
- msg->msg_name, msg->msg_namelen);
+ rv =
+ ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, p_app_data,
+ flags, msg->msg_name, msg->msg_namelen);
if (rv < 0)
break;
else
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index 3538a36f508..57355967900 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -2227,8 +2227,8 @@ vcl_fifo_is_writeable (svm_fifo_t * f, u32 len, u8 is_dgram)
}
always_inline int
-vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
- size_t n, u8 is_flush, u8 is_dgram)
+vppcom_session_write_inline (vcl_worker_t *wrk, vcl_session_t *s, void *buf,
+ size_t n, u16 gso_size, u8 is_flush, u8 is_dgram)
{
int n_write, is_nonblocking;
session_evt_type_t et;
@@ -2293,9 +2293,9 @@ vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
et = SESSION_IO_EVT_TX_FLUSH;
if (is_dgram)
- n_write = app_send_dgram_raw (tx_fifo, &s->transport,
- s->vpp_evt_q, buf, n, et,
- 0 /* do_evt */ , SVM_Q_WAIT);
+ n_write =
+ app_send_dgram_raw_gso (tx_fifo, &s->transport, s->vpp_evt_q, buf, n,
+ gso_size, et, 0 /* do_evt */, SVM_Q_WAIT);
else
n_write = app_send_stream_raw (tx_fifo, s->vpp_evt_q, buf, n, et,
0 /* do_evt */ , SVM_Q_WAIT);
@@ -2324,8 +2324,8 @@ vppcom_session_write (uint32_t session_handle, void *buf, size_t n)
if (PREDICT_FALSE (!s))
return VPPCOM_EBADFD;
- return vppcom_session_write_inline (wrk, s, buf, n,
- 0 /* is_flush */ , s->is_dgram ? 1 : 0);
+ return vppcom_session_write_inline (wrk, s, buf, n, 0, 0 /* is_flush */,
+ s->is_dgram ? 1 : 0);
}
int
@@ -2338,8 +2338,8 @@ vppcom_session_write_msg (uint32_t session_handle, void *buf, size_t n)
if (PREDICT_FALSE (!s))
return VPPCOM_EBADFD;
- return vppcom_session_write_inline (wrk, s, buf, n,
- 1 /* is_flush */ , s->is_dgram ? 1 : 0);
+ return vppcom_session_write_inline (wrk, s, buf, n, 0, 1 /* is_flush */,
+ s->is_dgram ? 1 : 0);
}
#define vcl_fifo_rx_evt_valid_or_break(_s) \
@@ -4008,7 +4008,6 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
VDBG (2, "VPPCOM_ATTR_GET_TCP_USER_MSS: %d, buflen %d", *(int *) buffer,
*buflen);
break;
-
case VPPCOM_ATTR_SET_TCP_USER_MSS:
if (!(buffer && buflen && (*buflen == sizeof (u32))))
{
@@ -4151,6 +4150,7 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
{
vcl_worker_t *wrk = vcl_worker_get_current ();
vcl_session_t *s;
+ u16 gso_size = 0;
s = vcl_session_get_w_handle (wrk, session_handle);
if (PREDICT_FALSE (!s))
@@ -4165,6 +4165,12 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
s->transport.rmt_port = ep->port;
vcl_ip_copy_from_ep (&s->transport.rmt_ip, ep);
+ vppcom_endpt_tlv_t *p_app_data = &ep->app_data;
+
+ if (p_app_data && (p_app_data->data_type == VCL_UDP_SEGMENT))
+ {
+ gso_size = p_app_data->value;
+ }
/* Session not connected/bound in vpp. Create it by 'connecting' it */
if (PREDICT_FALSE (s->session_state == VCL_STATE_CLOSED))
{
@@ -4188,7 +4194,7 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
VDBG (2, "handling flags 0x%u (%d) not implemented yet.", flags, flags);
}
- return (vppcom_session_write_inline (wrk, s, buffer, buflen, 1,
+ return (vppcom_session_write_inline (wrk, s, buffer, buflen, gso_size, 1,
s->is_dgram ? 1 : 0));
}
diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h
index 08724cc31d8..c9f7b8b9403 100644
--- a/src/vcl/vppcom.h
+++ b/src/vcl/vppcom.h
@@ -63,6 +63,23 @@ extern "C"
VPPCOM_IS_IP4,
} vppcom_is_ip4_t;
+#define VCL_UDP_OPTS_BASE (VPPCOM_PROTO_UDP << 16)
+#define VCL_UDP_SEGMENT (VCL_UDP_OPTS_BASE + 0)
+
+ typedef struct vppcom_endpt_tlv_t_
+ {
+ uint32_t data_type;
+ uint32_t data_len;
+ union
+ {
+ /* data */
+ uint64_t value;
+ uint32_t as_u32[2];
+ uint16_t as_u16[4];
+ uint8_t as_u8[8];
+ };
+ } vppcom_endpt_tlv_t;
+
typedef struct vppcom_endpt_t_
{
uint8_t is_cut_thru;
@@ -70,6 +87,7 @@ extern "C"
uint8_t *ip;
uint16_t port;
uint64_t parent_handle;
+ vppcom_endpt_tlv_t app_data;
} vppcom_endpt_t;
typedef uint32_t vcl_session_handle_t;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index e634a06b532..138953b4ed4 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -643,14 +643,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
}
}
+#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \
+ noblock) \
+ app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \
+ noblock)
+
always_inline int
-app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
- svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type,
- u8 do_evt, u8 noblock)
+app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at,
+ svm_msg_q_t *vpp_evt_q, u8 *data, u32 len,
+ u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock)
{
session_dgram_hdr_t hdr;
int rv;
-
if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len))
return 0;
@@ -661,7 +665,7 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
hdr.rmt_port = at->rmt_port;
clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = at->lcl_port;
-
+ hdr.gso_size = gso_size;
/* *INDENT-OFF* */
svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }};
/* *INDENT-ON* */
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index be00925bb00..150da259da6 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -1222,6 +1222,11 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset);
len = ctx->hdr.data_length - ctx->hdr.data_offset;
+ if (ctx->hdr.gso_size)
+ {
+ ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size);
+ }
+
/* Process multiple dgrams if smaller than min (buf_space, mss).
* This avoids handling multiple dgrams if they require buffer
* chains */
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 95a88c5ab6e..dcbbd72ef8e 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -461,12 +461,12 @@ typedef struct session_dgram_header_
u16 rmt_port;
u16 lcl_port;
u8 is_ip4;
+ u16 gso_size;
} __clib_packed session_dgram_hdr_t;
#define SESSION_CONN_ID_LEN 37
-#define SESSION_CONN_HDR_LEN 45
-
-STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
+#define SESSION_CONN_HDR_LEN 47
+STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10),
"session conn id wrong length");
#define foreach_session_error \