From 74f0f97ddfd9ce079d68bb333d2206a789a36b6c Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 4 Aug 2021 18:20:24 -0700 Subject: tls: picotls optimize writes Type: improvement Signed-off-by: Florin Coras Change-Id: I76a1cd516849cfe5bc87ed2b3707c6f2257126d2 --- src/plugins/tlspicotls/tls_picotls.c | 291 ++++++++++++++++++++++------------- src/plugins/tlspicotls/tls_picotls.h | 10 +- 2 files changed, 188 insertions(+), 113 deletions(-) diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c index 876836675be..84c6d789928 100644 --- a/src/plugins/tlspicotls/tls_picotls.c +++ b/src/plugins/tlspicotls/tls_picotls.c @@ -49,7 +49,6 @@ picotls_ctx_free (tls_ctx_t * ctx) picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx; vec_free (ptls_ctx->rx_content); ptls_free (ptls_ctx->tls); - vec_free (ptls_ctx->write_content); pool_put_index (picotls_main.ctx_pool[ctx->c_thread_index], ptls_ctx->ptls_ctx_idx); } @@ -270,6 +269,39 @@ picotls_do_handshake (picotls_ctx_t *ptls_ctx, session_t *tcp_session) return write; } +static inline int +ptls_copy_buf_to_fs (ptls_buffer_t *buf, u32 to_copy, svm_fifo_seg_t *fs, + u32 *fs_idx, u32 max_fs) +{ + u32 idx = *fs_idx; + + while (to_copy) + { + if (fs[idx].len <= to_copy) + { + clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy), + fs[idx].len); + to_copy -= fs[idx].len; + idx += 1; + /* no more space in the app's rx fifo */ + if (idx == max_fs) + break; + } + else + { + clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy), + to_copy); + fs[idx].len -= to_copy; + fs[idx].data += to_copy; + to_copy = 0; + } + } + + *fs_idx = idx; + + return to_copy; +} + static inline int picotls_ctx_read (tls_ctx_t *ctx, session_t *tcp_session) { @@ -378,146 +410,193 @@ final_checks: return off; } -static inline int -picotls_content_process (picotls_ctx_t * ptls_ctx, svm_fifo_t * src_fifo, - svm_fifo_t * dst_fifo, int content_len, - int total_record_overhead, int is_no_copy) +static inline u32 +ptls_compute_deq_len (picotls_ctx_t *ptls_ctx, u32 dst_chunk, u32 src_chunk, + u32 dst_space, u8 *is_nocopy) { - ptls_buffer_t *buf = &ptls_ctx->write_buffer; - int total_length = content_len + total_record_overhead; - int to_dst_len; - if (is_no_copy) - { - ptls_buffer_init (buf, svm_fifo_tail (dst_fifo), total_length); - ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len); - - assert (!buf->is_allocated); - assert (buf->base == svm_fifo_tail (dst_fifo)); + int record_overhead = ptls_get_record_overhead (ptls_ctx->tls); + int num_records; + u32 deq_len, total_overhead; - svm_fifo_dequeue_drop (src_fifo, content_len); - svm_fifo_enqueue_nocopy (dst_fifo, buf->off); - to_dst_len = buf->off; + if (dst_chunk >= clib_min (8192, src_chunk + record_overhead)) + { + *is_nocopy = 1; + deq_len = clib_min (src_chunk, dst_chunk); + num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE); + total_overhead = num_records * record_overhead; + if (deq_len + total_overhead > dst_chunk) + deq_len = dst_chunk - total_overhead; } else { - assert (!TLS_WRITE_IS_LEFT (ptls_ctx)); - vec_validate (ptls_ctx->write_content, total_length); - ptls_buffer_init (buf, ptls_ctx->write_content, total_length); - - ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len); - svm_fifo_dequeue_drop (src_fifo, content_len); - - to_dst_len = svm_fifo_enqueue (dst_fifo, buf->off, buf->base); + deq_len = clib_min (src_chunk, dst_space); + num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE); + total_overhead = num_records * record_overhead; + if (deq_len + total_overhead > dst_space) + deq_len = dst_space - total_overhead; } - ptls_ctx->write_buffer_offset += to_dst_len; - return to_dst_len; + + return deq_len; } -static inline int -picotls_ctx_write (tls_ctx_t * ctx, session_t * app_session, - transport_send_params_t * sp) +static u32 +ptls_app_to_tcp_write (picotls_ctx_t *ptls_ctx, session_t *app_session, + svm_fifo_t *tcp_tx_fifo, u32 max_len) { - picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx; - u32 deq_max, deq_now; - u32 enq_max, enq_now; - int from_app_len = 0, to_tls_len = 0, is_nocopy = 0; - svm_fifo_t *tls_tx_fifo, *app_tx_fifo; - session_t *tls_session; - - int record_overhead = ptls_get_record_overhead (ptls_ctx->tls); - int num_records, total_overhead; + u32 wrote = 0, max_enq, thread_index, app_buf_len, left, ti = 0; + int read = 0, rv, i = 0, len, n_tcp_segs = 4, deq_len; + const int n_app_segs = 2, min_chunk = 2048; + svm_fifo_seg_t app_fs[n_app_segs], tcp_fs[n_tcp_segs]; + picotls_main_t *pm = &picotls_main; + ptls_buffer_t _buf, *buf = &_buf; + svm_fifo_t *app_tx_fifo; + u8 is_nocopy, *app_buf; + u32 first_chunk_len; - tls_session = session_get_from_handle (ctx->tls_session_handle); - tls_tx_fifo = tls_session->tx_fifo; + thread_index = app_session->thread_index; app_tx_fifo = app_session->tx_fifo; - if (PREDICT_FALSE (TLS_WRITE_IS_LEFT (ptls_ctx))) + len = svm_fifo_segments (app_tx_fifo, 0, app_fs, n_app_segs, max_len); + if (len <= 0) + return 0; + + n_tcp_segs = svm_fifo_provision_chunks (tcp_tx_fifo, tcp_fs, n_tcp_segs, + 1000 + max_len); + if (n_tcp_segs <= 0) + return 0; + + while ((left = len - read) && ti < n_tcp_segs) { - enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo); - int to_write = clib_min (enq_max, - ptls_ctx->write_buffer.off - - ptls_ctx->write_buffer_offset); - to_tls_len = - svm_fifo_enqueue (tls_tx_fifo, to_write, TLS_WRITE_OFFSET (ptls_ctx)); - if (to_tls_len < 0) - { - app_session->flags |= SESSION_F_CUSTOM_TX; - return 0; - } - ptls_ctx->write_buffer_offset += to_tls_len; + /* If we wrote something and are left with few bytes, postpone write + * as we may be able to encrypt a bigger chunk next time */ + if (wrote && left < min_chunk) + break; - if (TLS_WRITE_IS_LEFT (ptls_ctx)) + /* Avoid short records if possible */ + if (app_fs[i].len < min_chunk && min_chunk < left) { - app_session->flags |= SESSION_F_CUSTOM_TX; - return to_tls_len; + app_buf_len = app_fs[i].len + app_fs[i + 1].len; + app_buf = pm->rx_bufs[thread_index]; + vec_validate (pm->rx_bufs[thread_index], app_buf_len); + clib_memcpy_fast (pm->rx_bufs[thread_index], app_fs[i].data, + app_fs[i].len); + clib_memcpy_fast (pm->rx_bufs[thread_index] + app_fs[i].len, + app_fs[i + 1].data, app_buf_len - app_fs[i].len); + first_chunk_len = app_fs[i].len; + i += 1; } else { - ptls_buffer_init (&ptls_ctx->write_buffer, "", 0); - ptls_ctx->write_buffer_offset = 0; + app_buf = app_fs[i].data; + app_buf_len = app_fs[i].len; + first_chunk_len = 0; } - } - deq_max = svm_fifo_max_dequeue_cons (app_tx_fifo); - if (!deq_max) - return deq_max; + is_nocopy = 0; + max_enq = tcp_fs[ti].len; + max_enq += ti < (n_tcp_segs - 1) ? tcp_fs[ti + 1].len : 0; - deq_now = clib_min (deq_max, sp->max_burst_size); - deq_now = clib_min (deq_now, svm_fifo_max_read_chunk (app_tx_fifo)); + deq_len = ptls_compute_deq_len (ptls_ctx, tcp_fs[ti].len, app_buf_len, + max_enq, &is_nocopy); + if (is_nocopy) + { + ptls_buffer_init (buf, tcp_fs[ti].data, tcp_fs[ti].len); + rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len); - enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo); - /** There is no engough enqueue space for one record **/ - if (enq_max <= record_overhead) - { - app_session->flags |= SESSION_F_CUSTOM_TX; - return 0; - } + assert (rv == 0); + wrote += buf->off; - enq_now = clib_min (enq_max, svm_fifo_max_write_chunk (tls_tx_fifo)); + tcp_fs[ti].len -= buf->off; + tcp_fs[ti].data += buf->off; + if (!tcp_fs[ti].len) + ti += 1; + } + else + { + vec_validate (pm->tx_bufs[thread_index], max_enq); + ptls_buffer_init (buf, pm->tx_bufs[thread_index], max_enq); + rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len); - /** Allowed to execute no-copy crypto operation **/ - if (enq_now > record_overhead) - { - is_nocopy = 1; - from_app_len = clib_min (deq_now, enq_now); - num_records = - ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE); - total_overhead = num_records * record_overhead; - if (from_app_len + total_overhead > enq_now) - from_app_len = enq_now - total_overhead; + assert (rv == 0); + wrote += buf->off; + + left = ptls_copy_buf_to_fs (buf, buf->off, tcp_fs, &ti, n_tcp_segs); + assert (left == 0); + } + + read += deq_len; + ASSERT (deq_len >= first_chunk_len); + + if (deq_len == app_buf_len) + { + i += 1; + } + else + { + app_fs[i].len -= deq_len - first_chunk_len; + app_fs[i].data += deq_len - first_chunk_len; + } } - else + + if (read) { - from_app_len = clib_min (deq_now, enq_max); - num_records = - ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE); - total_overhead = num_records * record_overhead; - if (from_app_len + total_overhead > enq_max) - from_app_len = enq_max - total_overhead; + svm_fifo_dequeue_drop (app_tx_fifo, read); + if (svm_fifo_needs_deq_ntf (app_tx_fifo, read)) + session_dequeue_notify (app_session); } - to_tls_len = - picotls_content_process (ptls_ctx, app_tx_fifo, tls_tx_fifo, - from_app_len, total_overhead, is_nocopy); - if (!TLS_WRITE_IS_LEFT (ptls_ctx)) + if (wrote) { - ptls_ctx->write_buffer_offset = 0; - ptls_buffer_init (&ptls_ctx->write_buffer, "", 0); + svm_fifo_enqueue_nocopy (tcp_tx_fifo, wrote); + if (svm_fifo_set_event (tcp_tx_fifo)) + session_send_io_evt_to_thread (tcp_tx_fifo, SESSION_IO_EVT_TX); } - if (svm_fifo_needs_deq_ntf (app_tx_fifo, from_app_len)) - session_dequeue_notify (app_session); + return wrote; +} + +static inline int +picotls_ctx_write (tls_ctx_t *ctx, session_t *app_session, + transport_send_params_t *sp) +{ + picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx; + u32 deq_max, deq_now, enq_max, enq_buf, wrote = 0; + svm_fifo_t *tcp_tx_fifo; + session_t *tcp_session; - if (to_tls_len) - tls_add_vpp_q_tx_evt (tls_session); + tcp_session = session_get_from_handle (ctx->tls_session_handle); + tcp_tx_fifo = tcp_session->tx_fifo; - if (from_app_len < deq_max || TLS_WRITE_IS_LEFT (ptls_ctx)) - app_session->flags |= SESSION_F_CUSTOM_TX; + enq_max = svm_fifo_max_enqueue_prod (tcp_tx_fifo); + if (enq_max < 2048) + goto check_tls_fifo; + + deq_max = svm_fifo_max_dequeue_cons (app_session->tx_fifo); + deq_max = clib_min (deq_max, enq_max); + if (!deq_max) + goto check_tls_fifo; + + deq_now = clib_min (deq_max, sp->max_burst_size); + wrote = ptls_app_to_tcp_write (ptls_ctx, app_session, tcp_tx_fifo, deq_now); + +check_tls_fifo: if (ctx->app_closed) picotls_app_close (ctx); - return to_tls_len; + /* Deschedule and wait for deq notification if fifo is almost full */ + enq_buf = clib_min (svm_fifo_size (tcp_tx_fifo) / 2, TLSP_MIN_ENQ_SPACE); + if (enq_max < wrote + enq_buf) + { + svm_fifo_add_want_deq_ntf (tcp_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + else + /* Request tx reschedule of the app session */ + app_session->flags |= SESSION_F_CUSTOM_TX; + + return wrote; } static int @@ -538,7 +617,6 @@ picotls_ctx_init_server (tls_ctx_t * ctx) ptls_ctx->rx_len = 0; ptls_ctx->rx_offset = 0; - ptls_ctx->write_buffer_offset = 0; return 0; } @@ -562,7 +640,6 @@ picotls_ctx_init_client (tls_ctx_t *ctx) ptls_ctx->rx_len = 0; ptls_ctx->rx_offset = 0; - ptls_ctx->write_buffer_offset = 0; ptls_buffer_init (&hs_buf, "", 0); if (ptls_handshake (ptls_ctx->tls, &hs_buf, NULL, NULL, &hsprop) != @@ -629,6 +706,8 @@ tls_picotls_init (vlib_main_t * vm) vec_validate (pm->ctx_pool, num_threads - 1); vec_validate (pm->rx_bufs, num_threads - 1); + vec_validate (pm->tx_bufs, num_threads - 1); + clib_rwlock_init (&picotls_main.crypto_keys_rw_lock); tls_register_engine (&picotls_engine, CRYPTO_ENGINE_PICOTLS); diff --git a/src/plugins/tlspicotls/tls_picotls.h b/src/plugins/tlspicotls/tls_picotls.h index e86b30aedfc..d24d7e2f172 100644 --- a/src/plugins/tlspicotls/tls_picotls.h +++ b/src/plugins/tlspicotls/tls_picotls.h @@ -16,9 +16,7 @@ #define TLS_READ_IS_LEFT(x) ((x)->read_buffer.off != 0 && (x)->read_buffer.off != (x)->read_buffer_offset) #define TLS_READ_LEFT_LEN(x) ((x)->read_buffer.off - (x)->read_buffer_offset) -#define TLS_WRITE_OFFSET(x) ((x)->write_buffer.base + (x)->write_buffer_offset) -#define TLS_WRITE_IS_LEFT(x) ((x)->write_buffer.off != 0 && (x)->write_buffer.off != (x)->write_buffer_offset) - +#define TLSP_MIN_ENQ_SPACE (1 << 16) typedef struct tls_ctx_picotls_ { @@ -29,10 +27,7 @@ typedef struct tls_ctx_picotls_ int rx_offset; int rx_len; ptls_buffer_t read_buffer; - ptls_buffer_t write_buffer; - uint8_t *write_content; int read_buffer_offset; - int write_buffer_offset; } picotls_ctx_t; typedef struct tls_listen_ctx_picotls_ @@ -44,8 +39,9 @@ typedef struct tls_listen_ctx_picotls_ typedef struct picotls_main_ { picotls_ctx_t ***ctx_pool; - u8 **rx_bufs; picotls_listen_ctx_t *lctx_pool; + u8 **tx_bufs; + u8 **rx_bufs; ptls_context_t *client_ptls_ctx; clib_rwlock_t crypto_keys_rw_lock; } picotls_main_t; -- cgit 1.2.3-korg