summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_output.c
AgeCommit message (Collapse)AuthorFilesLines
2018-06-26tcp: cleanup functionsFlorin Coras1-68/+67
- sprinkle statics for functions - move some inlines from header files to corresponding .c files - replace some always_inlines with statics where inlining is not performance critical Change-Id: I371dbf63431ce7e27e4ebbbdd844a9546a1f1849 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-06-20tcp: add per worker ctx structureFlorin Coras1-21/+24
Change-Id: I28d3c31bdc4255a4ca223d80bcf44709fb39f4ed Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-06-19tcp: optimize tcp outputFlorin Coras1-162/+158
Change-Id: Idf17a0633a1618b12c22b1119e40c2e9d3192df9 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-06-12tcp: add close-wait state flagsFlorin Coras1-0/+1
Change-Id: I35508d5251633396393f52842d1b58bc1c1463f6 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-06-11tcp: cleanup connection/session fixesFlorin Coras1-2/+3
- Cleanup session state after last ack and avoid using a cleanup timer. - Change session cleanup to free the session as opposed to waiting for delete notify. - When in close-wait, postpone sending the fin on close until all outstanding data has been sent. - Don't flush rx fifo unless in closed state Change-Id: Ic2a4f0d5568b65c83f4b55b6c469a7b24b947f39 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-06-10tcp: fix timer based recovery exit conditionFlorin Coras1-1/+2
Change-Id: I3f36e5760fd2935cc29d22601d4c0a1d2a22ba84 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-26tcp: loss recovery improvements/fixesFlorin Coras1-11/+15
- fix newreno cwnd computation - reset snd_una_max on entering recovery - accept acks beyond snd_nxt but less than snd_congestion when in recovery - avoid entering fast recovery multiple times when using sacks - avoid as much as possible sending small segments when doing fast retransmit - more event logging Change-Id: I19dd151d7704e39d4eae06de3a26f5e124875366 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-23tcp: cc improvements and fixesFlorin Coras1-5/+7
Change-Id: I6615bb612bcc3f795b5f822ea55209bb30ef35b5 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-21tcp: unlock link-local adjacencies on connection cleanupFlorin Coras1-24/+9
Change-Id: I37705fb572045f42be4c2dabbd8460c8f8872167 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-18session: add session process nodeFlorin Coras1-0/+2
Add a session process node that handles main thread tx and retransmit in order to avoid having a polling input node. Change-Id: I3357e987c023a84b533b32793e37ab4204420f64 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-17tcp: handle link-local addressesFlorin Coras1-6/+61
Change-Id: I9ede6bc861350c7d9e78fa4d96cd584c2816d06f Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-05-10tcp: fix jumbo retransmitsFlorin Coras1-42/+42
Change-Id: I1c8a14d4d51aa730f0edcf491e3c4725e2d8bd66 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-04-20tcp: improve statsFlorin Coras1-0/+6
Change-Id: I9ab11ba9f958c679112eb22c8db39cb269a29dc7 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-03-29tcp: fix fib index buffer taggingFlorin Coras1-13/+17
Change-Id: I373cc252df3621d44879b8eca70aed17d7752a2a Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-03-23tcp/session: sprinkle prefetchesFlorin Coras1-0/+8
Change-Id: Idef3c665580c13d72e99f43d16b8b13cc6ab746f Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-03-02session: first approximation implementation of tlsFlorin Coras1-1/+1
It consists of two main parts. First, add an application transport type whereby applications can offer transport to other applications. For instance, a tls app can offer transport services to other applications. And second, a tls transport app that leverages the mbedtls library for tls protocol implementation. Change-Id: I616996c6e6539a9e2368fab8a1ac874d7c5d9838 Signed-off-by: Florin Coras <fcoras@cisco.com>
2018-02-07Refactor vlib_buffer flagsDamjan Marion1-2/+2
Change-Id: I853386aebfe488ebb10328435b81b6e3403c5dd0 Signed-off-by: Damjan Marion <damarion@cisco.com>
2018-02-01tcp: tcp_output.c failed to compile when VLIB_BUFFER_TRACE_TRAJECTORY is enabledSteven1-1/+1
Fixed a typo in tcp_push_header(). The typo only kicks in when the macro VLIB_BUFFER_TRACE_TRAJECTORY is enabled. Change-Id: I62832a4932ec5b14e3063d5eac113780851aae59 Signed-off-by: Steven <sluong@cisco.com>
2017-12-11session: generalize handling of network transportsFlorin Coras1-0/+1
- compute session type out of transport and network protos - make session, session lookup and session queue code network protocol agnostic This does not update the session layer to support non-ip network layer protocols Change-Id: Ifc2f92845e158b649d59462eb7d51c12af536691 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-11-29session: fix preallocation of local endpoint tableFlorin Coras1-3/+1
Change-Id: I67a73e31bda9e497859297fcc1765e880572884a Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-11-28tcp: fix retransmissions under buffer shortageFlorin Coras1-28/+28
- add debugging scaffolding for simulating buffer shortage Change-Id: Ice519d74f9c4e4094c4586c548185135b7bb5f2d Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-10-10session: add support for application namespacingFlorin Coras1-2/+1
Applications are now provided the option to select the namespace they are to be attached to and the scope of their attachement. Application namespaces are meant to: 1) constrain the scope of communication through the network by association with source interfaces and/or fib tables that provide the source ips to be used and limit the scope of routing 2) provide a namespace local scope to session layer communication, as opposed to the global scope provided by 1). That is, sessions can be established without assistance from transport and network layers. Albeit, zero/local-host ip addresses must still be provided in session establishment messages due to existing application idiosyncrasies. This mode of communication uses shared-memory fifos (cut-through sessions) exclusively. If applications request no namespace, they are assigned to the default one, which at its turn uses the default fib. Applications can request access to both local and global scopes for a namespace. If no scope is specified, session layer defaults to the global one. When a sw_if_index is provided for a namespace, zero-ip (INADDR_ANY) binds are converted to binds to the requested interface. Change-Id: Ia0f660bbf7eec7f89673f75b4821fc7c3d58e3d1 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-10-04[aarch64] Fixes CLI crashes on dpaa2 platform.Christophe Fontaine1-1/+1
- always use 'va_args' as pointer in all format_* functions - u32 for all 'indent' params as it's declaration was inconsistent Change-Id: Ic5799309a6b104c9b50fec309cba789c8da99e79 Signed-off-by: Christophe Fontaine <christophe.fontaine@enea.com>
2017-10-03tcp: updates to connection closing procedure (VPP-996)Florin Coras1-2/+13
- add separate TIME_WAIT time constant - fix output node for TIME_WAIT acks - ensure snd_nxt is snd_una_max after retransmitting fin - debugging improvements Change-Id: Ic947153346979853f2526824b229126e47aead86 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-26tcp: update snd_nxt after congestion recoveryFlorin Coras1-9/+5
Change-Id: I2cf4c4850b9c3c093a7dce0cec89b9f710f69393 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-25tcp: do not sample rtt for retransmitted segmentsFlorin Coras1-2/+19
Change-Id: I365c31607332a944ef498369881332b515894ed7 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-19session/tcp: improve preallocated segment handlingFlorin Coras1-18/+73
- add preallocated segment flag - don't remove pre-allocated segments except if application detaches - when preallocating fifos in multiple segments, completely fill a segment before moving to the next - detach server application from segment-managers when deleting app - batch syn/syn-ack/fin (re)transmissions - loosen up close-wait and time-wait times Change-Id: I412f53ce601cc83b3acc26aeffd7fa2d52d73b03 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-15dpdk: cli to check for buffer leakageFlorin Coras1-4/+6
Use buffer pre_data and existing buffer trace trajectory code to find out dpdk buffer leakages. Change-Id: I26a5d8bd2f23d01cb6070ffc3ddcc6d3d863b575 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-12tcp: horizontal scaling improvmentsFlorin Coras1-113/+99
- do not scale syn-ack window - fix the max number of outstanding syns in builtin client - fix syn-sent ack validation to use modulo arithmetic - improve retransmit timer handler - fix output buffer allocator leakeage - improved debugging Change-Id: Iac3bc0eadf7d0b494a93e22d210a3153b61b3273 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-09-01Add fixed-size, preallocated pool supportDave Barach1-14/+28
Simply call pool_init_fixed(...) before using the pool. Note that fixed, preallocated pools live in individually-mmap'ed address segments, except for the free element bitmap. A large fixed pool can exceed 4gb. Fix tcp buffer allocator leak, remove broken assert Change-Id: I4421082e12a77c41c6e20f7747f3150dcd01fc26 Signed-off-by: Dave Barach <dave@barachs.net>
2017-08-30tcp: re-enable persist timer if no data available to sendFlorin Coras1-4/+18
Additionally, flush rx fifos for closed sessions. Change-Id: If2cc563fbda0451e7572650e98b15f0a694a0ff9 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-25tcp: retransmit and multi-buffer segment fixes and improvementsFlorin Coras1-112/+152
- set session state as closed on session manager delete - enable retransmit as opposed to persist timer after persist timer completes - properly discard buffer chain bytes when new data overlaps ooo segments - don't use rxt bytes in snd space estimate used on tx path Change-Id: Id9cab686e532e5fe70c775d5440260e8eb890a9f Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-18session: fix multi-buffer segmentsFlorin Coras1-14/+17
Change-Id: I0e22c85ea570b934b9c78dc5e86d86d690bdae5e Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-16tcp: fix v6 sessionsroot1-4/+2
Change-Id: Ia6dd5e948b17b2f3866fe70838eabb09e35415e1 Signed-off-by: Dave Barach <dbarach@cisco.com> Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-14TCP: Update time_now once per burstFlorin Coras1-1/+1
Change-Id: I58089d7a9867ede9d8a36b2aea62edef04cb5b81 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-11Fix tcp multi buffer segments retransmissionFlorin Coras1-106/+184
- Fix tcp/udp sw checksum computation - Fix allocation of multi buffer tcp segments for retransmits - Send FIN only if/when tx fifo is empty Change-Id: I2e43a14b87a72c9e547b4339b9a51811cf5732c4 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-08-02Fix tcp tx buffer allocationFlorin Coras1-61/+127
- Make tcp output buffer allocation macro an inline function - Use per ip version per thread tx frames for retransmits and timer events - Fix / parameterize tcp data structure preallocation - Add a couple of gdb-callable show commands - Fix local endpoint cleanup Change-Id: I67b47b7570aa14cb4634b6fd93c57cd2eacbfa29 Signed-off-by: Florin Coras <fcoras@cisco.com> Signed-off-by: Dave Barach <dave@barachs.net>
2017-08-02Make ip csum configurable in vlib buffer functionsFlorin Coras1-6/+7
Also fixes csum computation for lisp control plane 4o6 encapsulated control messages. Change-Id: I991e0b5c0d16dc51e0b5bdc79e1d752270b34765 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-07-30Make tcp active open data structures thread safeFlorin Coras1-3/+16
- Cleanup half-open connections and timers on the right thread - Ensure half-open connection and transport endpoint pools are thread safe - Enqueue TX events to the correct vpp thread in the builtin client - Use transport proto in transport connections instead of session type Change-Id: Id13239a206afbff6f34a38afa510fe014e4b2049 Signed-off-by: Florin Coras <fcoras@cisco.com> Signed-off-by: Dave Barach <dave@barachs.net>
2017-07-21Improvements to tcp rx path and debuggingFlorin Coras1-20/+31
- Increment rcv_nxt for fin packets - Call tcp_segment_rcv only if buffer has data - Parse rcv opts before deleting half-open connection - Fix initial rcv_wnd - Improved event logging Change-Id: I9b83c04f432c4cec832c480b03e534deff02c3b1 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-07-18TCP/UDP checksum offload APIDave Barach1-3/+13
Change-Id: I2cb6ce4e29813f6602b14e6e61713fb381fbcef8 Signed-off-by: Dave Barach <dave@barachs.net>
2017-07-14vnet_buffer_t flags cleanupDamjan Marion1-4/+4
Change-Id: I123eccea98abafeb31f25d2a162501e2eded60d4 Signed-off-by: Damjan Marion <damarion@cisco.com>
2017-07-11Horizontal (nSessions) scaling draftDave Barach1-19/+34
- Data structure preallocation. - Input state machine fixes for mid-stream 3-way handshake retries. - Batch connections in the builtin_client - Multiple private fifo segment support - Fix elog simultaneous event type registration - Fix sacks when segment hole is added after highest sacked - Add "accepting" session state for sessions pending accept - Add ssvm non-recursive locking - Estimate RTT for syn-ack - Don't init fifo pointers. We're using relative offsets for ooo segments - CLI to dump individual session Change-Id: Ie0598563fd246537bafba4feed7985478ea1d415 Signed-off-by: Dave Barach <dbarach@cisco.com> Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-06-22Improve svm fifo and tcp tx path performance (VPP-846)Florin Coras1-8/+21
- multiarch on svm fifo - avoid ip lookup on tx Change-Id: Iab0d85204a710979417bca1d692cc47877131203 Signed-off-by: Florin Coras <fcoras@cisco.com> Signed-off-by: Dave Barach <dbarach@cisco.com>
2017-06-19Overall tcp performance improvements (VPP-846)Florin Coras1-6/+4
- limit minimum rto per connection - cleanup sack scoreboard - switched svm fifo out-of-order data handling from absolute offsets to relative offsets. - improve cwnd handling when using sacks - add cc event debug stats - improved uri tcp test client/server: bugfixes and added half-duplex mode - expanded builtin client/server - updated uri socket client/server code to work in half-duplex - ensure session node unsets fifo event for empty fifo - fix session detach Change-Id: Ia446972340e32a65e0694ee2844355167d0c170d Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-06-09Implement sack based tcp loss recovery (RFC 6675)Florin Coras1-98/+189
- refactor existing congestion control code (RFC 6582/5681). Handling of ack feedback now consists of: ack parsing, cc event detection, event handling, congestion control update - extend sack scoreboard to support sack based retransmissions - basic implementation of Eifel detection algorithm (RFC 3522) for detecting spurious retransmissions - actually initialize the per-thread frame freelist hash tables - increase worker stack size to 2mb - fix session queue node out-of-buffer handling - ensure that the local buffer cache vec_len matches reality - avoid 2x spurious event requeues when short of buffers - count out-of-buffer events - make the builtin server thread-safe - fix bihash template threading issue: need to paint -1 across uninitialized working_copy_length vector elements (via rebase from master) Change-Id: I646cb9f1add9a67d08f4a87badbcb117980ebfc4 Signed-off-by: Florin Coras <fcoras@cisco.com> Signed-off-by: Dave Barach <dbarach@cisco.com>
2017-05-20Improve session debuggingFlorin Coras1-2/+10
Also improves builtin client code. Change-Id: I8bca1aa632028f95c373726efb0abf2ee0eff414 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-05-10Improve TCP option handling, VPP-757Florin Coras1-4/+12
Change-Id: Ica634536387d1196366ec96c52770287fcab0768 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-05-10Ignore data in RST segments, VPP-723Florin Coras1-44/+30
Change-Id: I01650b3b10b79ec549be0d5eceb0e318c06fcb50 Signed-off-by: Florin Coras <fcoras@cisco.com>
2017-05-09Add support for tcp/session buffer chainsFlorin Coras1-2/+2
Change-Id: I01c6e3dc3a1b2785df37bb66b19c4b5cbb8f3211 Signed-off-by: Florin Coras <fcoras@cisco.com>
pan class="p">; if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6) { hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6); } else { hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4); } } /** * gre_tunnel_stack * * 'stack' (resolve the recursion for) the tunnel's midchain adjacency */ void gre_tunnel_stack (adj_index_t ai) { gre_main_t *gm = &gre_main; ip_adjacency_t *adj; gre_tunnel_t *gt; u32 sw_if_index; adj = adj_get (ai); sw_if_index = adj->rewrite_header.sw_if_index; if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) || (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) return; gt = pool_elt_at_index (gm->tunnels, gm->tunnel_index_by_sw_if_index[sw_if_index]); if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) { adj_midchain_delegate_unstack (ai); } else { adj_midchain_delegate_stack (ai, gt->outer_fib_index, &gt->tunnel_dst); } } /** * mgre_tunnel_stack * * 'stack' (resolve the recursion for) the tunnel's midchain adjacency */ static void mgre_tunnel_stack (adj_index_t ai) { gre_main_t *gm = &gre_main; const ip_adjacency_t *adj; const gre_tunnel_t *gt; u32 sw_if_index; adj = adj_get (ai); sw_if_index = adj->rewrite_header.sw_if_index; if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) || (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) return; gt = pool_elt_at_index (gm->tunnels, gm->tunnel_index_by_sw_if_index[sw_if_index]); if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) { adj_midchain_delegate_unstack (ai); } else { const teib_entry_t *ne; ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto, &adj->sub_type.nbr.next_hop); if (NULL != ne) teib_entry_adj_stack (ne, ai); } } /** * @brief Call back when restacking all adjacencies on a GRE interface */ static adj_walk_rc_t gre_adj_walk_cb (adj_index_t ai, void *ctx) { gre_tunnel_stack (ai); return (ADJ_WALK_RC_CONTINUE); } static adj_walk_rc_t mgre_adj_walk_cb (adj_index_t ai, void *ctx) { mgre_tunnel_stack (ai); return (ADJ_WALK_RC_CONTINUE); } static void gre_tunnel_restack (gre_tunnel_t * gt) { fib_protocol_t proto; /* * walk all the adjacencies on th GRE interface and restack them */ FOR_EACH_FIB_IP_PROTOCOL (proto) { switch (gt->mode) { case TUNNEL_MODE_P2P: adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL); break; case TUNNEL_MODE_MP: adj_nbr_walk (gt->sw_if_index, proto, mgre_adj_walk_cb, NULL); break; } } } static void gre_teib_mk_key (const gre_tunnel_t * t, const teib_entry_t * ne, gre_tunnel_key_t * key) { const fib_prefix_t *nh; nh = teib_entry_get_nh (ne); /* construct the key using mode P2P so it can be found in the DP */ if (FIB_PROTOCOL_IP4 == nh->fp_proto) gre_mk_key4 (t->tunnel_src.ip4, nh->fp_addr.ip4, teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4); else gre_mk_key6 (&t->tunnel_src.ip6, &nh->fp_addr.ip6, teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6); } /** * An TEIB entry has been added */ static void gre_teib_entry_added (const teib_entry_t * ne) { gre_main_t *gm = &gre_main; const ip_address_t *nh; gre_tunnel_key_t key; gre_tunnel_t *t; u32 sw_if_index; u32 t_idx; sw_if_index = teib_entry_get_sw_if_index (ne); if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index) return; t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index]; if (INDEX_INVALID == t_idx) return; /* entry has been added on an interface for which there is a GRE tunnel */ t = pool_elt_at_index (gm->tunnels, t_idx); if (t->mode != TUNNEL_MODE_MP) return; /* the next-hop (underlay) of the NHRP entry will form part of the key for * ingress lookup to match packets to this interface */ gre_teib_mk_key (t, ne, &key); gre_tunnel_db_add (t, &key); /* update the rewrites for each of the adjacencies for this peer (overlay) * using the next-hop (underlay) */ mgre_walk_ctx_t ctx = { .t = t, .ne = ne }; nh = teib_entry_get_peer (ne); adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne), (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6), &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx); } static void gre_teib_entry_deleted (const teib_entry_t * ne) { gre_main_t *gm = &gre_main; const ip_address_t *nh; gre_tunnel_key_t key; gre_tunnel_t *t; u32 sw_if_index; u32 t_idx; sw_if_index = teib_entry_get_sw_if_index (ne); if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index) return; t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index]; if (INDEX_INVALID == t_idx) return; t = pool_elt_at_index (gm->tunnels, t_idx); /* remove the next-hop as an ingress lookup key */ gre_teib_mk_key (t, ne, &key); gre_tunnel_db_remove (t, &key); nh = teib_entry_get_peer (ne); /* make all the adjacencies incomplete */ adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne), (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6), &ip_addr_46 (nh), mgre_mk_incomplete_walk, t); } static walk_rc_t gre_tunnel_delete_teib_walk (index_t nei, void *ctx) { gre_tunnel_t *t = ctx; gre_tunnel_key_t key; gre_teib_mk_key (t, teib_entry_get (nei), &key); gre_tunnel_db_remove (t, &key); return (WALK_CONTINUE); } static walk_rc_t gre_tunnel_add_teib_walk (index_t nei, void *ctx) { gre_tunnel_t *t = ctx; gre_tunnel_key_t key; gre_teib_mk_key (t, teib_entry_get (nei), &key); gre_tunnel_db_add (t, &key); return (WALK_CONTINUE); } static int vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a, u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; gre_tunnel_t *t; vnet_hw_interface_t *hi; u32 hw_if_index, sw_if_index; u8 is_ipv6 = a->is_ipv6; gre_tunnel_key_t key; t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL != t) return VNET_API_ERROR_IF_ALREADY_EXISTS; pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); clib_memset (t, 0, sizeof (*t)); /* Reconcile the real dev_instance and a possible requested instance */ u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */ u32 u_idx = a->instance; /* user specified instance */ if (u_idx == ~0) u_idx = t_idx; if (hash_get (gm->instance_used, u_idx)) { pool_put (gm->tunnels, t); return VNET_API_ERROR_INSTANCE_IN_USE; } hash_set (gm->instance_used, u_idx, 1); t->dev_instance = t_idx; /* actual */ t->user_instance = u_idx; /* name */ t->type = a->type; t->mode = a->mode; t->flags = a->flags; if (t->type == GRE_TUNNEL_TYPE_ERSPAN) t->session_id = a->session_id; if (t->type == GRE_TUNNEL_TYPE_L3) { if (t->mode == TUNNEL_MODE_P2P) hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx, gre_hw_interface_class.index, t_idx); else hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx, mgre_hw_interface_class.index, t_idx); } else { vnet_eth_interface_registration_t eir = {}; /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ u8 address[6] = { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx }; eir.dev_class_index = gre_device_class.index; eir.dev_instance = t_idx; eir.address = address; hw_if_index = vnet_eth_register_interface (vnm, &eir); } /* Set GRE tunnel interface output node (not used for L3 payload) */ if (GRE_TUNNEL_TYPE_ERSPAN == t->type) vnet_set_interface_output_node (vnm, hw_if_index, gre_erspan_encap_node.index); else vnet_set_interface_output_node (vnm, hw_if_index, gre_teb_encap_node.index); hi = vnet_get_hw_interface (vnm, hw_if_index); sw_if_index = hi->sw_if_index; t->hw_if_index = hw_if_index; t->outer_fib_index = outer_fib_index; t->sw_if_index = sw_if_index; t->l2_adj_index = ADJ_INDEX_INVALID; vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx; if (!is_ipv6) { hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t); } else { hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip6_header_t); } /* Standard default gre MTU. */ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000); /* * source the FIB entry for the tunnel's destination * and become a child thereof. The tunnel will then get poked * when the forwarding for the entry updates, and the tunnel can * re-stack accordingly */ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128; t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; t->tunnel_dst.fp_addr = a->dst; gre_tunnel_db_add (t, &key); if (t->mode == TUNNEL_MODE_MP) teib_walk_itf (t->sw_if_index, gre_tunnel_add_teib_walk, t); if (t->type == GRE_TUNNEL_TYPE_ERSPAN) { gre_sn_key_t skey; gre_sn_t *gre_sn; gre_mk_sn_key (t, &skey); gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey); if (gre_sn != NULL) { gre_sn->ref_count++; t->gre_sn = gre_sn; } else { gre_sn = clib_mem_alloc (sizeof (gre_sn_t)); gre_sn->seq_num = 0; gre_sn->ref_count = 1; t->gre_sn = gre_sn; hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn); } } if (t->type != GRE_TUNNEL_TYPE_L3) { t->l2_adj_index = adj_nbr_add_or_lock (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index); vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index, (u8 *) "tunnel-output-no-count"); gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index); } else { vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index, (u8 *) "tunnel-output"); } if (sw_if_indexp) *sw_if_indexp = sw_if_index; /* register gre46-input nodes */ ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index); ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index); return 0; } static int vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a, u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; gre_tunnel_t *t; gre_tunnel_key_t key; u32 sw_if_index; t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL == t) return VNET_API_ERROR_NO_SUCH_ENTRY; if (t->mode == TUNNEL_MODE_MP) teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t); sw_if_index = t->sw_if_index; vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); /* make sure tunnel is removed from l2 bd or xconnect */ set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, L2_BD_PORT_TYPE_NORMAL, 0, 0); gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; if (t->type == GRE_TUNNEL_TYPE_L3) vnet_delete_hw_interface (vnm, t->hw_if_index); else ethernet_delete_interface (vnm, t->hw_if_index); if (t->l2_adj_index != ADJ_INDEX_INVALID) { adj_midchain_delegate_unstack (t->l2_adj_index); adj_unlock (t->l2_adj_index); } ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL)); if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1)) { gre_sn_key_t skey; gre_mk_sn_key (t, &skey); hash_unset_mem_free (&gm->seq_num_by_key, &skey); clib_mem_free (t->gre_sn); } vnet_reset_interface_l3_output_node (gm->vlib_main, sw_if_index); hash_unset (gm->instance_used, t->user_instance); gre_tunnel_db_remove (t, &key); pool_put (gm->tunnels, t); if (sw_if_indexp) *sw_if_indexp = sw_if_index; return 0; } int vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a, u32 * sw_if_indexp) { u32 outer_fib_index; outer_fib_index = fib_table_find ((a->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), a->outer_table_id); if (~0 == outer_fib_index) return VNET_API_ERROR_NO_SUCH_FIB; if (a->session_id > GTK_SESSION_ID_MAX) return VNET_API_ERROR_INVALID_SESSION_ID; if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst)) return (VNET_API_ERROR_INVALID_DST_ADDRESS); if (a->is_add) return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp)); else return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp)); } clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { gre_main_t *gm = &gre_main; vnet_hw_interface_t *hi; gre_tunnel_t *t; u32 ti; hi = vnet_get_hw_interface (vnm, hw_if_index); if (NULL == gm->tunnel_index_by_sw_if_index || hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) return (NULL); ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index]; if (~0 == ti) /* not one of ours */ return (NULL); t = pool_elt_at_index (gm->tunnels, ti); if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); else vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ ); gre_tunnel_restack (t); return /* no error */ 0; } static clib_error_t * create_gre_tunnel_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; vnet_gre_tunnel_add_del_args_t _a, *a = &_a; ip46_address_t src = ip46_address_initializer, dst = ip46_address_initializer; u32 instance = ~0; u32 outer_table_id = 0; gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3; tunnel_mode_t t_mode = TUNNEL_MODE_P2P; tunnel_encap_decap_flags_t flags = TUNNEL_ENCAP_DECAP_FLAG_NONE; u32 session_id = 0; int rv; u8 is_add = 1; u32 sw_if_index; clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "del")) is_add = 0; else if (unformat (line_input, "instance %d", &instance)) ; else if (unformat (line_input, "src %U", unformat_ip46_address, &src)) ; else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst)) ; else if (unformat (line_input, "outer-table-id %d", &outer_table_id)) ; else if (unformat (line_input, "multipoint")) t_mode = TUNNEL_MODE_MP; else if (unformat (line_input, "teb")) t_type = GRE_TUNNEL_TYPE_TEB; else if (unformat (line_input, "erspan %d", &session_id)) t_type = GRE_TUNNEL_TYPE_ERSPAN; else if (unformat (line_input, "flags %U", unformat_tunnel_encap_decap_flags, &flags)) ; else { error = clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); goto done; } } if (ip46_address_is_equal (&src, &dst)) { error = clib_error_return (0, "src and dst are identical"); goto done; } if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst)) { error = clib_error_return (0, "destination address not specified"); goto done; } if (ip46_address_is_zero (&src)) { error = clib_error_return (0, "source address not specified"); goto done; } if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst)) { error = clib_error_return (0, "src and dst address must be the same AF"); goto done; } clib_memset (a, 0, sizeof (*a)); a->is_add = is_add; a->outer_table_id = outer_table_id; a->type = t_type; a->mode = t_mode; a->session_id = session_id; a->is_ipv6 = !ip46_address_is_ip4 (&src); a->instance = instance; a->flags = flags; clib_memcpy (&a->src, &src, sizeof (a->src)); clib_memcpy (&a->dst, &dst, sizeof (a->dst)); rv = vnet_gre_tunnel_add_del (a, &sw_if_index); switch (rv) { case 0: vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); break; case VNET_API_ERROR_IF_ALREADY_EXISTS: error = clib_error_return (0, "GRE tunnel already exists..."); goto done; case VNET_API_ERROR_NO_SUCH_FIB: error = clib_error_return (0, "outer table ID %d doesn't exist\n", outer_table_id); goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: error = clib_error_return (0, "GRE tunnel doesn't exist"); goto done; case VNET_API_ERROR_INVALID_SESSION_ID: error = clib_error_return (0, "session ID %d out of range\n", session_id); goto done; case VNET_API_ERROR_INSTANCE_IN_USE: error = clib_error_return (0, "Instance is in use"); goto done; default: error = clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv); goto done; } done: unformat_free (line_input); return error; } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { .path = "create gre tunnel", .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] " "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] " "[multipoint]", .function = create_gre_tunnel_command_fn, }; /* *INDENT-ON* */ static clib_error_t * show_gre_tunnel_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { gre_main_t *gm = &gre_main; gre_tunnel_t *t; u32 ti = ~0; if (pool_elts (gm->tunnels) == 0) vlib_cli_output (vm, "No GRE tunnels configured..."); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "%d", &ti)) ; else break; } if (~0 == ti) { /* *INDENT-OFF* */ pool_foreach (t, gm->tunnels) { vlib_cli_output (vm, "%U", format_gre_tunnel, t); } /* *INDENT-ON* */ } else { t = pool_elt_at_index (gm->tunnels, ti); vlib_cli_output (vm, "%U", format_gre_tunnel, t); } return 0; } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = { .path = "show gre tunnel", .function = show_gre_tunnel_command_fn, }; /* *INDENT-ON* */ const static teib_vft_t gre_teib_vft = { .nv_added = gre_teib_entry_added, .nv_deleted = gre_teib_entry_deleted, }; /* force inclusion from application's main.c */ clib_error_t * gre_interface_init (vlib_main_t * vm) { teib_register (&gre_teib_vft); return (NULL); } VLIB_INIT_FUNCTION (gre_interface_init); /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */