summaryrefslogtreecommitdiffstats
path: root/src/vnet/session/session.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-09-05 14:03:37 -0400
committerDamjan Marion <dmarion.lists@gmail.com>2017-09-12 11:41:10 +0000
commit4eeeaaf5e822718eb222e6c49abd82e1bcb566fd (patch)
treef8ceca24b5f954bc615f1ef2e9383652b035052b /src/vnet/session/session.c
parent2504ac699e423f1ca840a63247ce55cb27735e0a (diff)
tcp: horizontal scaling improvments
- do not scale syn-ack window - fix the max number of outstanding syns in builtin client - fix syn-sent ack validation to use modulo arithmetic - improve retransmit timer handler - fix output buffer allocator leakeage - improved debugging Change-Id: Iac3bc0eadf7d0b494a93e22d210a3153b61b3273 Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/session/session.c')
-rw-r--r--src/vnet/session/session.c21
1 files changed, 11 insertions, 10 deletions
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 17644e292a9..4544f9a0f93 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -456,13 +456,16 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
st);
if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
{
- clib_warning ("This can't be good!");
+ clib_warning ("half-open was removed!");
return -1;
}
+ /* Cleanup half-open table */
+ stream_session_half_open_table_del (tc);
+
/* Get the app's index from the handle we stored when opening connection
* and the opaque (api_context for external apps) from transport session
- * index*/
+ * index */
app = application_get_if_valid (handle >> 32);
if (!app)
return -1;
@@ -499,9 +502,6 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
new_s->session_state = SESSION_STATE_READY;
}
- /* Cleanup session lookup */
- stream_session_half_open_table_del (tc);
-
return error;
}
@@ -535,7 +535,7 @@ stream_session_disconnect_notify (transport_connection_t * tc)
}
/**
- * Cleans up session and associated app if needed.
+ * Cleans up session and lookup table.
*/
void
stream_session_delete (stream_session_t * s)
@@ -559,9 +559,10 @@ stream_session_delete (stream_session_t * s)
/**
* Notification from transport that connection is being deleted
*
- * This should be called only on previously fully established sessions. For
- * instance failed connects should call stream_session_connect_notify and
- * indicate that the connect has failed.
+ * This removes the session if it is still valid. It should be called only on
+ * previously fully established sessions. For instance failed connects should
+ * call stream_session_connect_notify and indicate that the connect has
+ * failed.
*/
void
stream_session_delete_notify (transport_connection_t * tc)
@@ -748,7 +749,7 @@ session_send_session_evt_to_thread (u64 session_handle,
if (PREDICT_TRUE (q->cursize < q->maxsize))
{
if (unix_shared_memory_queue_add (q, (u8 *) & evt,
- 1 /* do wait for mutex */ ))
+ 0 /* do wait for mutex */ ))
{
clib_warning ("failed to enqueue evt");
}
> 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
#ifndef _FA_NODE_H_
#define _FA_NODE_H_

#include <stddef.h>
#include <vppinfra/bihash_16_8.h>
#include <vppinfra/bihash_40_8.h>

#include <plugins/acl/exported_types.h>

// #define FA_NODE_VERBOSE_DEBUG 3

#define TCP_FLAG_FIN    0x01
#define TCP_FLAG_SYN    0x02
#define TCP_FLAG_RST    0x04
#define TCP_FLAG_PUSH   0x08
#define TCP_FLAG_ACK    0x10
#define TCP_FLAG_URG    0x20
#define TCP_FLAG_ECE    0x40
#define TCP_FLAG_CWR    0x80
#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)

#define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1ULL<<30)
#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 500000

typedef union {
  u64 as_u64;
  struct {
    u32 lc_index;
    u16 mask_type_index_lsb;
    u8 tcp_flags;
    u8 tcp_flags_valid:1;
    u8 l4_valid:1;
    u8 is_nonfirst_fragment:1;
    u8 is_ip6:1;
    u8 flags_reserved:4;
  };
} fa_packet_info_t;

typedef enum {
  FA_SK_L4_FLAG_IS_INPUT    = (1 << 0),
  FA_SK_L4_FLAG_IS_SLOWPATH = (1 << 1),
} fa_session_l4_key_l4_flags_t;

typedef union {
  u64 as_u64;
  struct {
    u16 port[2];
    union {
      struct {
        u8 proto;
        u8 l4_flags;
        u16 lsb_of_sw_if_index;
      };
      u32 non_port_l4_data;
    };
  };
} fa_session_l4_key_t;


static_always_inline
int is_session_l4_key_u64_slowpath(u64 l4key) {
  fa_session_l4_key_t k = { .as_u64 = l4key };
  return (k.l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
}

typedef union {
  struct {
    union {
      struct {
        /* we put the IPv4 addresses
           after padding so we can still
           use them as (shorter) key together with
           L4 info */
        u32 l3_zero_pad[6];
        ip4_address_t ip4_addr[2];
      };
      ip6_address_t ip6_addr[2];
    };
    fa_session_l4_key_t l4;
    /* This field should align with u64 value in bihash_40_8 and bihash_16_8 keyvalue struct */
    fa_packet_info_t pkt;
  };
  clib_bihash_kv_40_8_t kv_40_8;
  struct {
    u64 padding_for_kv_16_8[3];
    clib_bihash_kv_16_8_t kv_16_8;
  };
} fa_5tuple_t;

static_always_inline u8 *
format_fa_session_l4_key(u8 * s, va_list * args)
{
  fa_session_l4_key_t *l4 = va_arg (*args, fa_session_l4_key_t *);
  int is_input = (l4->l4_flags & FA_SK_L4_FLAG_IS_INPUT) ? 1 : 0;
  int is_slowpath = (l4->l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;

  return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_flags 0x%02x port %d -> %d",
                  l4->lsb_of_sw_if_index,
                  l4->proto, is_input, is_slowpath,
                  l4->l4_flags, l4->port[0], l4->port[1]));
}

typedef struct {
  fa_5tuple_t info; /* (5+1)*8 = 48 bytes */
  u64 last_active_time;   /* +8 bytes = 56 */
  u32 sw_if_index;        /* +4 bytes = 60 */
  union {
    u8 as_u8[2];
    u16 as_u16;
  } tcp_flags_seen; ;     /* +2 bytes = 62 */
  u16 thread_index;          /* +2 bytes = 64 */
  u64 link_enqueue_time;  /* 8 byte = 8 */
  u32 link_prev_idx;      /* +4 bytes = 12 */
  u32 link_next_idx;      /* +4 bytes = 16 */
  u8 link_list_id;        /* +1 bytes = 17 */
  u8 deleted;             /* +1 bytes = 18 */
  u8 is_ip6;              /* +1 bytes = 19 */
  u8 reserved1[5];        /* +5 bytes = 24 */
  u64 reserved2[5];       /* +5*8 bytes = 64 */
} fa_session_t;

#define FA_POLICY_EPOCH_MASK 0x7fff
/* input policy epochs have the MSB set */
#define FA_POLICY_EPOCH_IS_INPUT 0x8000


/* This structure is used to fill in the u64 value
   in the per-sw-if-index hash table */
typedef struct {
  union {
    u64 as_u64;
    struct {
      u32 session_index;
      u16 thread_index;
      u16 intf_policy_epoch;
    };
  };
} fa_full_session_id_t;

/*
 * A few compile-time constraints on the size and the layout of the union, to ensure
 * it makes sense both for bihash and for us.
 */

#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value));
CT_ASSERT_EQUAL(fa_ip6_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_40_8.value));
CT_ASSERT_EQUAL(fa_ip4_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_16_8.value));
CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64));
CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64));
CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t));
CT_ASSERT_EQUAL(fa_ip4_starts_at_kv16_key, offsetof(fa_5tuple_t, ip4_addr), offsetof(fa_5tuple_t, kv_16_8));
CT_ASSERT_EQUAL(fa_ip4_and_ip6_kv_value_match, offsetof(fa_5tuple_t, kv_16_8.value), offsetof(fa_5tuple_t, kv_40_8.value));

/* Let's try to fit within two cachelines */
CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);

/* Session ID MUST be the same as u64 */
CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));

CT_ASSERT_EQUAL(fa_5tuple_opaque_t_must_match_5tuple, sizeof(fa_5tuple_opaque_t), sizeof(fa_5tuple_t));
#undef CT_ASSERT_EQUAL

#define FA_SESSION_BOGUS_INDEX ~0

typedef struct {
  /* The pool of sessions managed by this worker */
  fa_session_t *fa_sessions_pool;
  /* incoming session change requests from other workers */
  clib_spinlock_t pending_session_change_request_lock;
  u64 *pending_session_change_requests;
  u64 *wip_session_change_requests;
  u64 rcvd_session_change_requests;
  u64 sent_session_change_requests;
  /* per-worker ACL_N_TIMEOUTS of conn lists */
  u32 *fa_conn_list_head;
  u32 *fa_conn_list_tail;
  /* expiry time set whenever an element is enqueued */
  u64 *fa_conn_list_head_expiry_time;
  /* adds and deletes per-worker-per-interface */
  u64 *fa_session_dels_by_sw_if_index;
  u64 *fa_session_adds_by_sw_if_index;
  /* sessions deleted due to epoch change */
  u64 *fa_session_epoch_change_by_sw_if_index;
  /* Vector of expired connections retrieved from lists */
  u32 *expired;
  /* the earliest next expiry time */
  u64 next_expiry_time;
  /* if not zero, look at all the elements until their enqueue timestamp is after below one */
  u64 requeue_until_time;
  /* Current time between the checks */
  u64 current_time_wait_interval;
  /* Counter of how many sessions we did delete */
  u64 cnt_deleted_sessions;
  /* Counter of already deleted sessions being deleted - should not increment unless a bug */
  u64 cnt_already_deleted_sessions;
  /* Number of times we requeued a session to a head of the list */
  u64 cnt_session_timer_restarted;
  /* swipe up to this enqueue time, rather than following the timeouts */
  u64 swipe_end_time;
  /* bitmap of sw_if_index serviced by this worker */
  uword *serviced_sw_if_index_bitmap;
  /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */
  uword *pending_clear_sw_if_index_bitmap;
  /* atomic, indicates that the swipe-deletion of connections is in progress */
  u32 clear_in_process;
  /* Interrupt is pending from main thread */
  int interrupt_is_pending;
  /*
   * Interrupt node on the worker thread sets this if it knows there is
   * more work to do, but it has to finish to avoid hogging the
   * core for too long.
   */
  int interrupt_is_needed;
  /*
   * Set to indicate that the interrupt node wants to get less interrupts
   * because there is not enough work for the current rate.
   */
  int interrupt_is_unwanted;
  /*
   * Set to copy of a "generation" counter in main thread so we can sync the interrupts.
   */
  int interrupt_generation;
   /*
    * work in progress data for the pipelined node operation
    */
  vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
  u32 sw_if_indices[VLIB_FRAME_SIZE];
  fa_5tuple_t fa_5tuples[VLIB_FRAME_SIZE];
  u64 hashes[VLIB_FRAME_SIZE];
  u16 nexts[VLIB_FRAME_SIZE];

} acl_fa_per_worker_data_t;


typedef enum {
  ACL_FA_ERROR_DROP,
  ACL_FA_N_NEXT,
} acl_fa_next_t;


typedef enum
{
  ACL_FA_CLEANER_RESCHEDULE = 1,
  ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
} acl_fa_cleaner_process_event_e;

void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable);

void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose);

u8 *format_acl_plugin_5tuple (u8 * s, va_list * args);

/* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */

#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1)              \
do {                                                                                                                     \
  if (am->trace_sessions) {                                                                                              \
    CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check);                               \
    u16 thread_index = os_get_thread_index ();                                                                           \
    vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
    ELOG_TYPE_DECLARE (e) =                                                                                              \
      {                                                                                                                  \
        .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
        .format_args = "i2" acl_elog_trace_format_args,                                                                  \
      };                                                                                                                 \
    CLIB_PACKED(struct                                                                                                   \
      {                                                                                                                  \
        u16 thread;                                                                                                      \
        typeof(acl_elog_val1) val1;                                                                                      \
      }) *ed;                                                                                                            \
    ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
    ed->thread = thread_index;                                                                                           \
    ed->val1 = acl_elog_val1;                                                                                            \
  }                                                                                                                      \
} while (0)


/* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */

#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args,                             \
                                                                                           acl_elog_val1, acl_elog_val2) \
do {                                                                                                                     \
  if (am->trace_sessions) {                                                                                              \
    CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check);       \
    u16 thread_index = os_get_thread_index ();                                                                           \
    vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
    ELOG_TYPE_DECLARE (e) =                                                                                              \
      {                                                                                                                  \
        .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
        .format_args = "i2" acl_elog_trace_format_args,                                                                  \
      };                                                                                                                 \
    CLIB_PACKED(struct                                                                                                   \
      {                                                                                                                  \
        u16 thread;                                                                                                      \
        typeof(acl_elog_val1) val1;                                                                                      \
        typeof(acl_elog_val2) val2;                                                                                      \
      }) *ed;                                                                                                            \
    ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
    ed->thread = thread_index;                                                                                           \
    ed->val1 = acl_elog_val1;                                                                                            \
    ed->val2 = acl_elog_val2;                                                                                            \
  }                                                                                                                      \
} while (0)


/* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */

#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1,              \
                                                                                           acl_elog_val2, acl_elog_val3) \
do {                                                                                                                     \
  if (am->trace_sessions) {                                                                                              \
    CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)                           \
                                               - sizeof(acl_elog_val3)]; } *static_check);                               \
    u16 thread_index = os_get_thread_index ();                                                                           \
    vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
    ELOG_TYPE_DECLARE (e) =                                                                                              \
      {                                                                                                                  \
        .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
        .format_args = "i2" acl_elog_trace_format_args,                                                                  \
      };                                                                                                                 \
    CLIB_PACKED(struct                                                                                                   \
      {                                                                                                                  \
        u16 thread;                                                                                                      \
        typeof(acl_elog_val1) val1;                                                                                      \
        typeof(acl_elog_val2) val2;                                                                                      \
        typeof(acl_elog_val3) val3;                                                                                      \
      }) *ed;                                                                                                            \
    ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
    ed->thread = thread_index;                                                                                           \
    ed->val1 = acl_elog_val1;                                                                                            \
    ed->val2 = acl_elog_val2;                                                                                            \
    ed->val3 = acl_elog_val3;                                                                                            \
  }                                                                                                                      \
} while (0)


/* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */

#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1,              \
                                                                            acl_elog_val2, acl_elog_val3, acl_elog_val4) \
do {                                                                                                                     \
  if (am->trace_sessions) {                                                                                              \
    CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)                           \
                                               - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check);        \
    u16 thread_index = os_get_thread_index ();                                                                           \
    vlib_worker_thread_t * w = vlib_worker_threads + thread_index;                                                       \
    ELOG_TYPE_DECLARE (e) =                                                                                              \
      {                                                                                                                  \
        .format = "(%02d) " acl_elog_trace_format_label,                                                                 \
        .format_args = "i2" acl_elog_trace_format_args,                                                                  \
      };                                                                                                                 \
    CLIB_PACKED(struct                                                                                                   \
      {                                                                                                                  \
        u16 thread;                                                                                                      \
        typeof(acl_elog_val1) val1;                                                                                      \
        typeof(acl_elog_val2) val2;                                                                                      \
        typeof(acl_elog_val3) val3;                                                                                      \
        typeof(acl_elog_val4) val4;                                                                                      \
      }) *ed;                                                                                                            \
    ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);                                                \
    ed->thread = thread_index;                                                                                           \
    ed->val1 = acl_elog_val1;                                                                                            \
    ed->val2 = acl_elog_val2;                                                                                            \
    ed->val3 = acl_elog_val3;                                                                                            \
    ed->val4 = acl_elog_val4;                                                                                            \
  }                                                                                                                      \
} while (0)


#endif