summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_output.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2019-02-21 16:46:24 -0800
committerDamjan Marion <dmarion@me.com>2019-02-22 10:55:27 +0000
commite5b17918e78c974b43fe41300d2f5d817e89c30b (patch)
treeefa59300186940fcada3b4018443e14743bb38ed /src/vnet/tcp/tcp_output.c
parent78b5fa6398d02af4f4f92e4bc9cc22c010ae24f9 (diff)
tcp: send enough dupacks to cover all sack holes
Make sure we send enough dupacks to cover all the holes created in the last frame received. Also make sure we send all the blocks, not just the first. Change-Id: I9597a34ac14473d1cc3ad07d65bc37043e3d0582 Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r--src/vnet/tcp/tcp_output.c44
1 files changed, 33 insertions, 11 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 725ffec0852..4de479c344b 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -253,14 +253,12 @@ tcp_options_write (u8 * data, tcp_options_t * opts)
if (tcp_opts_sack (opts))
{
int i;
- u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
- TCP_OPTS_MAX_SACK_BLOCKS);
- if (n_sack_blocks != 0)
+ if (opts->n_sack_blocks != 0)
{
*data++ = TCP_OPTION_SACK_BLOCK;
- *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
- for (i = 0; i < n_sack_blocks; i++)
+ *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ for (i = 0; i < opts->n_sack_blocks; i++)
{
buf = clib_host_to_net_u32 (opts->sacks[i].start);
clib_memcpy_fast (data, &buf, seq_len);
@@ -269,7 +267,7 @@ tcp_options_write (u8 * data, tcp_options_t * opts)
clib_memcpy_fast (data, &buf, seq_len);
data += seq_len;
}
- opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
}
}
@@ -372,9 +370,13 @@ tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
if (vec_len (tc->snd_sacks))
{
opts->flags |= TCP_OPTS_FLAG_SACK;
- opts->sacks = tc->snd_sacks;
- opts->n_sack_blocks = clib_min (vec_len (tc->snd_sacks),
+ if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
+ tc->snd_sack_pos = 0;
+ opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
+ opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
+ opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
TCP_OPTS_MAX_SACK_BLOCKS);
+ tc->snd_sack_pos += opts->n_sack_blocks;
len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
}
}
@@ -1250,14 +1252,34 @@ tcp_send_acks (tcp_worker_ctx_t * wrk)
{
tc = tcp_connection_get (pending_acks[i], thread_index);
tc->flags &= ~TCP_CONN_SNDACK;
- n_acks = clib_max (1, tc->pending_dupacks);
+ if (!tc->pending_dupacks)
+ {
+ tcp_send_ack (tc);
+ continue;
+ }
+
/* If we're supposed to send dupacks but have no ooo data
* send only one ack */
- if (tc->pending_dupacks && !vec_len (tc->snd_sacks))
- n_acks = 1;
+ if (!vec_len (tc->snd_sacks))
+ {
+ tcp_send_ack (tc);
+ continue;
+ }
+
+ /* Start with first sack block */
+ tc->snd_sack_pos = 0;
+
+ /* Generate enough dupacks to cover all sack blocks. Do not generate
+ * more sacks than the number of packets received. But do generate at
+ * least 3, i.e., the number needed to signal congestion, if needed. */
+ n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
+ n_acks = clib_min (n_acks, tc->pending_dupacks);
+ n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
for (j = 0; j < n_acks; j++)
tcp_send_ack (tc);
+
tc->pending_dupacks = 0;
+ tc->snd_sack_pos = 0;
}
_vec_len (wrk->pending_acks) = 0;
}
0'>310
/*
 *------------------------------------------------------------------
 * Copyright (c) 2018 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#ifndef _AVF_H_
#define _AVF_H_

#include <avf/virtchnl.h>

#include <vlib/log.h>

#define foreach_avf_device_flags \
  _(0, INITIALIZED, "initialized") \
  _(1, ERROR, "error") \
  _(2, ADMIN_UP, "admin-up") \
  _(3, IOVA, "iova") \
  _(4, LINK_UP, "link-up") \
  _(5, SHARED_TXQ_LOCK, "shared-txq-lock") \
  _(6, ELOG, "elog")

enum
{
#define _(a, b, c) AVF_DEVICE_F_##b = (1 << a),
  foreach_avf_device_flags
#undef _
};

typedef volatile struct
{
  union
  {
    struct
    {
      u64 mirr:13;
      u64 rsv1:3;
      u64 l2tag1:16;
      u64 filter_status:32;
      u64 status:19;
      u64 error:8;
      u64 rsv2:3;
      u64 ptype:8;
      u64 length:26;
    };
    u64 qword[4];
#ifdef CLIB_HAVE_VEC256
    u64x4 as_u64x4;
#endif
  };
} avf_rx_desc_t;

STATIC_ASSERT_SIZEOF (avf_rx_desc_t, 32);

typedef volatile struct
{
  union
  {
    u64 qword[2];
#ifdef CLIB_HAVE_VEC128
    u64x2 as_u64x2;
#endif
  };
} avf_tx_desc_t;

STATIC_ASSERT_SIZEOF (avf_tx_desc_t, 16);

typedef struct
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  volatile u32 *qrx_tail;
  u16 next;
  u16 size;
  avf_rx_desc_t *descs;
  u32 *bufs;
  u16 n_enqueued;
  u8 int_mode;
} avf_rxq_t;

typedef struct
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  volatile u32 *qtx_tail;
  u16 next;
  u16 size;
  clib_spinlock_t lock;
  avf_tx_desc_t *descs;
  u32 *bufs;
  u16 n_enqueued;
} avf_txq_t;

typedef struct
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  u32 flags;
  u32 per_interface_next_index;

  u32 dev_instance;
  u32 sw_if_index;
  u32 hw_if_index;
  vlib_pci_dev_handle_t pci_dev_handle;
  void *bar0;

  /* queues */
  avf_rxq_t *rxqs;
  avf_txq_t *txqs;
  u16 n_tx_queues;
  u16 n_rx_queues;

  /* Admin queues */
  avf_aq_desc_t *atq;
  avf_aq_desc_t *arq;
  void *atq_bufs;
  void *arq_bufs;
  u64 atq_bufs_pa;
  u64 arq_bufs_pa;
  u16 atq_next_slot;
  u16 arq_next_slot;
  virtchnl_pf_event_t *events;

  u16 vsi_id;
  u32 feature_bitmap;
  u8 hwaddr[6];
  u16 num_queue_pairs;
  u16 max_vectors;
  u16 max_mtu;
  u32 rss_key_size;
  u32 rss_lut_size;
  virtchnl_link_speed_t link_speed;

  /* stats */
  virtchnl_eth_stats_t eth_stats;

  /* error */
  clib_error_t *error;
} avf_device_t;

typedef struct
{
  u32 status;
  u16 length;
  u8 ptype;
  u8 error;
} avf_rx_vector_entry_t;

STATIC_ASSERT_SIZEOF (avf_rx_vector_entry_t, 8);

#define AVF_RX_VECTOR_SZ VLIB_FRAME_SIZE

enum
{
  AVF_PROCESS_EVENT_START = 1,
  AVF_PROCESS_EVENT_STOP = 2,
  AVF_PROCESS_EVENT_AQ_INT = 3,
} avf_process_event_t;

typedef struct
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  avf_rx_vector_entry_t rx_vector[AVF_RX_VECTOR_SZ];
  u32 *to_free;
  vlib_buffer_t buffer_template;
} avf_per_thread_data_t;

typedef struct
{
  u8 next_node;
  i8 buffer_advance;
  u32 flags;
} avf_ptype_t;

STATIC_ASSERT (VNET_DEVICE_INPUT_N_NEXT_NODES < 256, "too many next nodes");

typedef struct
{
  u16 msg_id_base;

  avf_device_t *devices;
  avf_per_thread_data_t *per_thread_data;
  vlib_physmem_region_index_t physmem_region;
  int physmem_region_alloc;

  vlib_log_class_t log_class;

  /* 256 element array for ptype based lookup */
  avf_ptype_t *ptypes;
} avf_main_t;

extern avf_main_t avf_main;

typedef struct
{
  vlib_pci_addr_t addr;
  int enable_elog;
  u16 rxq_size;
  u16 txq_size;
  /* return */
  int rv;
  u32 sw_if_index;
  clib_error_t *error;
} avf_create_if_args_t;

void avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args);
void avf_delete_if (vlib_main_t * vm, avf_device_t * ad);

extern vlib_node_registration_t avf_input_node;
extern vnet_device_class_t avf_device_class;

/* format.c */
format_function_t format_avf_device;
format_function_t format_avf_device_name;
format_function_t format_avf_input_trace;

static inline u32
avf_get_u32 (void *start, int offset)
{
  return *(u32 *) (((u8 *) start) + offset);
}

static inline u64
avf_get_u64 (void *start, int offset)
{
  return *(u64 *) (((u8 *) start) + offset);
}

static inline u32
avf_get_u32_bits (void *start, int offset, int first, int last)
{
  u32 value = avf_get_u32 (start, offset);
  if ((last == 0) && (first == 31))
    return value;
  value >>= last;
  value &= (1 << (first - last + 1)) - 1;
  return value;
}

static inline u64
avf_get_u64_bits (void *start, int offset, int first, int last)
{
  u64 value = avf_get_u64 (start, offset);
  if ((last == 0) && (first == 63))
    return value;
  value >>= last;
  value &= (1 << (first - last + 1)) - 1;
  return value;
}

static inline void
avf_set_u32 (void *start, int offset, u32 value)
{
  (*(u32 *) (((u8 *) start) + offset)) = value;
}

static inline void
avf_reg_write (avf_device_t * ad, u32 addr, u32 val)
{
  *(volatile u32 *) ((u8 *) ad->bar0 + addr) = val;
}

static inline u32
avf_reg_read (avf_device_t * ad, u32 addr)
{
  return *(volatile u32 *) (ad->bar0 + addr);
}

static inline void
avf_reg_flush (avf_device_t * ad)
{
  avf_reg_read (ad, AVFGEN_RSTAT);
  asm volatile ("":::"memory");
}

typedef struct
{
  u32 next_index;
  u32 hw_if_index;
  avf_rx_vector_entry_t rxve;
} avf_input_trace_t;

#define foreach_avf_tx_func_error	       \
_(NO_FREE_SLOTS, "no free tx slots")

typedef enum
{
#define _(f,s) AVF_TX_ERROR_##f,
  foreach_avf_tx_func_error
#undef _
    AVF_TX_N_ERROR,
} avf_tx_func_error_t;

#endif /* AVF_H */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */