summaryrefslogtreecommitdiffstats
path: root/src/plugins/snat/in2out.c
diff options
context:
space:
mode:
authorJuraj Sloboda <jsloboda@cisco.com>2017-04-03 08:08:48 +0200
committerDamjan Marion <dmarion.lists@gmail.com>2017-04-10 20:00:43 +0000
commitb498eeb6a75fda19e54c712ec176f78f95c7a754 (patch)
treeb56275ed18039e4a6bf9393134ccdd45686236cc /src/plugins/snat/in2out.c
parent3747c75a215f082bc52198a7229e1b1e529d7666 (diff)
Refactor SNAT code
Change-Id: I71f34dc64d4ddc5f2ec1164cb3c353d0fe2d95ab Signed-off-by: Juraj Sloboda <jsloboda@cisco.com>
Diffstat (limited to 'src/plugins/snat/in2out.c')
-rw-r--r--src/plugins/snat/in2out.c133
1 files changed, 69 insertions, 64 deletions
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index a70892fd31d..f5443762336 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -134,13 +134,10 @@ typedef enum {
* @returns 0 if packet should be translated otherwise 1
*/
static inline int
-snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
- ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0)
+snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
+ u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
+ u32 rx_fib_index0)
{
- ip4_address_t * first_int_addr;
- udp_header_t * udp0 = ip4_next_header (ip0);
- snat_session_key_t key0, sm0;
- clib_bihash_kv_8_8_t kv0, value0;
fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP4,
@@ -150,39 +147,11 @@ snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
},
};
- if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
- {
- first_int_addr =
- ip4_interface_first_address (sm->ip4_main, sw_if_index0,
- 0 /* just want the address */);
- rt->cached_sw_if_index = sw_if_index0;
- if (first_int_addr)
- rt->cached_ip4_address = first_int_addr->as_u32;
- else
- rt->cached_ip4_address = 0;
- }
-
/* Don't NAT packet aimed at the intfc address */
- if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
+ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
+ ip0->dst_address.as_u32)))
return 1;
- key0.addr = ip0->dst_address;
- key0.port = udp0->dst_port;
- key0.protocol = proto0;
- key0.fib_index = sm->outside_fib_index;
- kv0.key = key0.as_u64;
-
- /* NAT packet aimed at external address if */
- /* has active sessions */
- if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
- {
- /* or is static mappings */
- if (!snat_static_mapping_match(sm, key0, &sm0, 1))
- return 0;
- }
- else
- return 0;
-
fei = fib_table_lookup (rx_fib_index0, &pfx);
if (FIB_NODE_INDEX_INVALID != fei)
{
@@ -205,6 +174,36 @@ snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0,
return 1;
}
+static inline int
+snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
+ u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
+ u32 rx_fib_index0)
+{
+ udp_header_t * udp0 = ip4_next_header (ip0);
+ snat_session_key_t key0, sm0;
+ clib_bihash_kv_8_8_t kv0, value0;
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = sm->outside_fib_index;
+ kv0.key = key0.as_u64;
+
+ /* NAT packet aimed at external address if */
+ /* has active sessions */
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+ {
+ /* or is static mappings */
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1))
+ return 0;
+ }
+ else
+ return 0;
+
+ return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
+ rx_fib_index0);
+}
+
static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
ip4_header_t * ip0,
u32 rx_fib_index0,
@@ -432,8 +431,6 @@ snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
if (!icmp_is_error_message (icmp0))
{
- if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
- return SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE;
key0.protocol = SNAT_PROTOCOL_ICMP;
key0.addr = ip0->src_address;
key0.port = echo0->identifier;
@@ -471,18 +468,16 @@ snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
* @param[in,out] node SNAT node runtime
* @param[in] thread_index thread index
* @param[in,out] b0 buffer containing packet to be translated
- * @param[out] p_key address and port before NAT translation
+ * @param[out] p_proto protocol used for matching
* @param[out] p_value address and port after NAT translation
* @param[out] p_dont_translate if packet should not be translated
* @param d optional parameter
*/
u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
- u32 thread_index, vlib_buffer_t *b0,
- snat_session_key_t *p_key,
+ u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
snat_session_key_t *p_value,
u8 *p_dont_translate, void *d)
{
- snat_runtime_t *rt;
ip4_header_t *ip0;
icmp46_header_t *icmp0;
u32 sw_if_index0;
@@ -494,7 +489,6 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
u32 next0 = ~0;
int err;
- rt = (snat_runtime_t *) node->runtime_data;
ip0 = vlib_buffer_get_current (b0);
icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
@@ -513,7 +507,7 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
IP_PROTOCOL_ICMP, rx_fib_index0)))
{
dont_translate = 1;
@@ -536,8 +530,16 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
value0.value);
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+ !icmp_is_error_message (icmp0)))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+
out:
- *p_key = key0;
+ *p_proto = key0.protocol;
if (s0)
*p_value = s0->out2in;
*p_dont_translate = dont_translate;
@@ -553,18 +555,16 @@ out:
* @param[in,out] node SNAT node runtime
* @param[in] thread_index thread index
* @param[in,out] b0 buffer containing packet to be translated
- * @param[out] p_key address and port before NAT translation
+ * @param[out] p_proto protocol used for matching
* @param[out] p_value address and port after NAT translation
* @param[out] p_dont_translate if packet should not be translated
* @param d optional parameter
*/
u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
- u32 thread_index, vlib_buffer_t *b0,
- snat_session_key_t *p_key,
+ u32 thread_index, vlib_buffer_t *b0, u8 *p_proto,
snat_session_key_t *p_value,
u8 *p_dont_translate, void *d)
{
- snat_runtime_t *rt;
ip4_header_t *ip0;
icmp46_header_t *icmp0;
u32 sw_if_index0;
@@ -575,7 +575,6 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
u32 next0 = ~0;
int err;
- rt = (snat_runtime_t *) node->runtime_data;
ip0 = vlib_buffer_get_current (b0);
icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
@@ -592,7 +591,7 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
if (snat_static_mapping_match(sm, key0, &sm0, 0))
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
+ if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
IP_PROTOCOL_ICMP, rx_fib_index0)))
{
dont_translate = 1;
@@ -610,10 +609,21 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
goto out;
}
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+ !icmp_is_error_message (icmp0)))
+ {
+ if (icmp0->type != ICMP4_echo_reply || key0.port != sm0.port)
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ next0 = SNAT_IN2OUT_NEXT_DROP;
+ goto out;
+ }
+ }
+
out:
*p_value = sm0;
out2:
- *p_key = key0;
+ *p_proto = key0.protocol;
*p_dont_translate = dont_translate;
return next0;
}
@@ -629,7 +639,8 @@ static inline u32 icmp_in2out (snat_main_t *sm,
u32 thread_index,
void *d)
{
- snat_session_key_t key0, sm0;
+ snat_session_key_t sm0;
+ u8 protocol;
icmp_echo_header_t *echo0, *inner_echo0 = 0;
ip4_header_t *inner_ip0;
void *l4_header = 0;
@@ -644,7 +655,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
echo0 = (icmp_echo_header_t *)(icmp0+1);
next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
- &key0, &sm0, &dont_translate, d);
+ &protocol, &sm0, &dont_translate, d);
if (next0_tmp != ~0)
next0 = next0_tmp;
if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
@@ -703,7 +714,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
dst_address /* changed member */);
icmp0->checksum = ip_csum_fold (sum0);
- switch (key0.protocol)
+ switch (protocol)
{
case SNAT_PROTOCOL_ICMP:
inner_icmp0 = (icmp46_header_t*)l4_header;
@@ -883,7 +894,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
snat_in2out_next_t next_index;
u32 pkts_processed = 0;
snat_main_t * sm = &snat_main;
- snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
f64 now = vlib_time_now (vm);
u32 stats_node_index;
u32 thread_index = vlib_get_thread_index ();
@@ -1003,7 +1013,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
{
if (is_slow_path)
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
proto0, rx_fib_index0)))
goto trace00;
@@ -1143,7 +1153,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
{
if (is_slow_path)
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1,
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
proto1, rx_fib_index1)))
goto trace01;
@@ -1318,7 +1328,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
{
if (is_slow_path)
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
+ if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
proto0, rx_fib_index0)))
goto trace0;
@@ -2197,7 +2207,6 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm,
snat_in2out_next_t next_index;
u32 pkts_processed = 0;
snat_main_t * sm = &snat_main;
- snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
u32 stats_node_index;
stats_node_index = snat_in2out_fast_node.index;
@@ -2266,10 +2275,6 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm,
if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
{
- if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0,
- proto0, rx_fib_index0)))
- goto trace0;
-
next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
rx_fib_index0, node, next0, ~0, 0);
goto trace0;
highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __included_vnet_bonding_node_h__
#define __included_vnet_bonding_node_h__

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vppinfra/format.h>
#include <vppinfra/hash.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface.h>

#define LACP_FAST_PERIODIC_TIMER        1.0
#define LACP_SHORT_TIMOUT_TIME          (LACP_FAST_PERIODIC_TIMER * 3)
#define LACP_SLOW_PERIODIC_TIMER        30.0
#define LACP_LONG_TIMOUT_TIME           (LACP_SLOW_PERIODIC_TIMER * 3)

#ifndef MIN
#define MIN(x,y) (((x)<(y))?(x):(y))
#endif

#define BOND_MODULO_SHORTCUT(a) \
  (is_pow2 (a))

#define foreach_bond_mode	    \
  _ (1, ROUND_ROBIN, "round-robin") \
  _ (2, ACTIVE_BACKUP, "active-backup") \
  _ (3, XOR, "xor") \
  _ (4, BROADCAST, "broadcast") \
  _ (5, LACP, "lacp")

typedef enum
{
#define _(v, f, s) BOND_MODE_##f = v,
  foreach_bond_mode
#undef _
} bond_mode_t;

/* configurable load-balances */
#define foreach_bond_lb	  \
  _ (2, L23, "l23", l23)  \
  _ (1, L34 , "l34", l34) \
  _ (0, L2, "l2", l2)

/* load-balance functions implemented in bond-output */
#define foreach_bond_lb_algo			 \
  _ (0, L2, "l2", l2)                            \
  _ (1, L34 , "l34", l34)                        \
  _ (2, L23, "l23", l23)                         \
  _ (3, RR, "round-robin", round_robin)          \
  _ (4, BC, "broadcast", broadcast)              \
  _ (5, AB, "active-backup", active_backup)

typedef enum
{
#define _(v, f, s, p) BOND_LB_##f = v,
  foreach_bond_lb_algo
#undef _
} bond_load_balance_t;

typedef enum
{
  BOND_SEND_GARP_NA = 1,
} bond_send_garp_na_process_event_t;

typedef struct
{
  u32 id;
  u8 hw_addr_set;
  u8 hw_addr[6];
  u8 mode;
  u8 lb;
  /* return */
  u32 sw_if_index;
  int rv;
  clib_error_t *error;
} bond_create_if_args_t;

typedef struct
{
  /* slave's sw_if_index */
  u32 slave;
  /* bond's sw_if_index */
  u32 group;
  u8 is_passive;
  u8 is_long_timeout;
  /* return */
  int rv;
  clib_error_t *error;
} bond_enslave_args_t;

typedef struct
{
  u32 slave;
  /* return */
  int rv;
  clib_error_t *error;
} bond_detach_slave_args_t;

/** BOND interface details struct */
typedef struct
{
  u32 sw_if_index;
  u32 id;
  u8 interface_name[64];
  u8 mode;
  u8 lb;
  u32 active_slaves;
  u32 slaves;
} bond_interface_details_t;

/** slave interface details struct */
typedef struct
{
  u32 sw_if_index;
  u8 interface_name[64];
  u8 is_passive;
  u8 is_long_timeout;
  u32 active_slaves;
} slave_interface_details_t;

typedef CLIB_PACKED (struct
		     {
		     u16 system_priority;
		     u8 system[6];
		     u16 key; u16 port_priority; u16 port_number;
		     u8 state;
		     }) lacp_port_info_t;

typedef struct
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  u32 buffers[VLIB_FRAME_SIZE];
  u32 n_buffers;
} bond_per_port_queue_t;

typedef struct
{
  bond_per_port_queue_t *per_port_queue;
} bond_per_thread_data_t;

typedef struct
{
  u8 admin_up;
  u8 mode;
  u8 lb;

  /* the last slave index for the rr lb */
  u32 lb_rr_last_index;

  /* Real device instance in interface vector */
  u32 dev_instance;

  /* Interface ID being shown to user */
  u32 id;

  u32 hw_if_index;
  u32 sw_if_index;

  /* Configured slaves */
  u32 *slaves;

  /* Slaves that are in DISTRIBUTING state */
  u32 *active_slaves;

  /* rapidly find an active slave */
  uword *active_slave_by_sw_if_index;

  lacp_port_info_t partner;
  lacp_port_info_t actor;
  u8 individual_aggregator;

  u32 group;
  uword *port_number_bitmap;
  u8 use_custom_mac;
  u8 hw_address[6];

  clib_spinlock_t lockp;
} bond_if_t;

typedef struct
{
  u8 persistent_hw_address[6];

  /* neighbor's vlib software interface index */
  u32 sw_if_index;

  /* Neighbor time-to-live (usually 3s) */
  f32 ttl_in_seconds;

  /* 1 = interface is configured with long timeout (60s) */
  u8 is_long_timeout;

  /* 1 = debug is on; 0 = debug is off */
  u8 debug;

  /* tx packet template id for this neighbor */
  u8 packet_template_index;

  /* Info we actually keep about each neighbor */

  /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
  u8 last_packet_signature_valid;
  uword last_packet_signature;

  /* last received lacp packet, for the J-hash optimization */
  u8 *last_rx_pkt;

  /* last marker packet */
  u8 *last_marker_pkt;

  /* neighbor vlib hw_if_index */
  u32 hw_if_index;

  /* actor does not initiate the protocol exchange */
  u8 is_passive;

  /* Partner port information */
  lacp_port_info_t partner;
  lacp_port_info_t partner_admin;;

  /* Partner port information */
  lacp_port_info_t actor;
  lacp_port_info_t actor_admin;

  /* Need To Transmit flag */
  u8 ntt;

  /* Link has been established and Aggregate Port is operable */
  u8 port_enabled;

  /* Initialization or reinitialization of the lacp protocol entity */
  u8 begin;

  /* Aggregation Port is operating the lacp */
  u8 lacp_enabled;

  /* MUX to indicate to the Selection Logic wait_while_timer expired */
  u8 ready_n;

  /* Selection Logic indicates al Aggregation Ports attached */
  u8 ready;

  /* Selection Logic selected an Aggregator */
  int selected;

  /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
  u8 port_moved;

  /* timer used to detect whether received protocol information has expired */
  f64 current_while_timer;

  /* timer used to detect actor churn states */
  f64 actor_churn_timer;

  /* time last lacpdu was sent */
  f64 last_lacpdu_sent_time;

  /* time last lacpdu was received */
  f64 last_lacpdu_recd_time;

  /* time last marker pdu was sent */
  f64 last_marker_pdu_sent_time;

  /* time last marker pdu was received */
  f64 last_marker_pdu_recd_time;

  /* timer used to generate periodic transmission */
  f64 periodic_timer;

  /* timer used to detect partner churn states */
  f64 partner_churn_timer;

  /* provides hysteresis before performing an aggregation change */
  f64 wait_while_timer;

  /* Implemention variables, not in the spec */
  int rx_state;
  int tx_state;
  int mux_state;
  int ptx_state;

  /* actor admin key */
  u32 group;

  u32 marker_tx_id;

  u32 bif_dev_instance;

  u8 loopback_port;

  /* bond mode */
  u8 mode;

  /* good lacp pdu received */
  u64 pdu_received;

  /* bad lacp pdu received */
  u64 bad_pdu_received;

  /* pdu sent */
  u64 pdu_sent;

  /* good marker pdu received */
  u64 marker_pdu_received;

  /* bad marker pdu received */
  u64 marker_bad_pdu_received;

  /* pdu sent */
  u64 marker_pdu_sent;
} slave_if_t;

typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
					  slave_if_t * sif, u8 enable);

typedef struct
{
  /* pool of bonding interfaces */
  bond_if_t *interfaces;

  /* record used interface IDs */
  uword *id_used;

  /* pool of slave interfaces */
  slave_if_t *neighbors;

  /* rapidly find a bond by vlib software interface index */
  uword *bond_by_sw_if_index;

  /* convenience variables */
  vlib_main_t *vlib_main;
  vnet_main_t *vnet_main;

  /* lacp plugin is loaded */
  u8 lacp_plugin_loaded;

  lacp_enable_disable_func lacp_enable_disable;

  uword *slave_by_sw_if_index;

  bond_per_thread_data_t *per_thread_data;
} bond_main_t;

/* bond packet trace capture */
typedef struct
{
  ethernet_header_t ethernet;
  u32 sw_if_index;
  u32 bond_sw_if_index;
} bond_packet_trace_t;

typedef u32 (*load_balance_func) (vlib_main_t * vm,
				  vlib_node_runtime_t * node, bond_if_t * bif,
				  vlib_buffer_t * b0, uword slave_count);

typedef struct
{
  load_balance_func load_balance;
} bond_load_balance_func_t;

extern vlib_node_registration_t bond_input_node;
extern vlib_node_registration_t bond_process_node;
extern vnet_device_class_t bond_dev_class;
extern bond_main_t bond_main;

void bond_disable_collecting_distributing (vlib_main_t * vm,
					   slave_if_t * sif);
void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
u8 *format_bond_interface_name (u8 * s, va_list * args);

void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);
void bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args);
int bond_dump_ifs (bond_interface_details_t ** out_bondids);
int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveids,
			 u32 bond_sw_if_index);

static inline uword
unformat_bond_mode (unformat_input_t * input, va_list * args)
{
  u8 *r = va_arg (*args, u8 *);

  if (0);
#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
  foreach_bond_mode
#undef _
    else
    return 0;

  return 1;
}

static inline u8 *
format_bond_mode (u8 * s, va_list * args)
{
  u32 i = va_arg (*args, u32);
  u8 *t = 0;

  switch (i)
    {
#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
      foreach_bond_mode
#undef _
    default:
      return format (s, "unknown");
    }
  return format (s, "%s", t);
}

static inline uword
unformat_bond_load_balance (unformat_input_t * input, va_list * args)
{
  u8 *r = va_arg (*args, u8 *);

  if (0);
#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
  foreach_bond_lb
#undef _
    else
    return 0;

  return 1;
}

static inline u8 *
format_bond_load_balance (u8 * s, va_list * args)
{
  u32 i = va_arg (*args, u32);
  u8 *t = 0;

  switch (i)
    {
#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
      foreach_bond_lb_algo
#undef _
    default:
      return format (s, "unknown");
    }
  return format (s, "%s", t);
}

static inline void
bond_register_callback (lacp_enable_disable_func func)
{
  bond_main_t *bm = &bond_main;

  bm->lacp_plugin_loaded = 1;
  bm->lacp_enable_disable = func;
}

static inline bond_if_t *
bond_get_master_by_sw_if_index (u32 sw_if_index)
{
  bond_main_t *bm = &bond_main;
  uword *p;

  p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
  if (!p)
    {
      return 0;
    }
  return pool_elt_at_index (bm->interfaces, p[0]);
}

static inline bond_if_t *
bond_get_master_by_dev_instance (u32 dev_instance)
{
  bond_main_t *bm = &bond_main;

  return pool_elt_at_index (bm->interfaces, dev_instance);
}

static inline slave_if_t *
bond_get_slave_by_sw_if_index (u32 sw_if_index)
{
  bond_main_t *bm = &bond_main;
  slave_if_t *sif = 0;
  uword p;

  if (sw_if_index < vec_len (bm->slave_by_sw_if_index))
    {
      p = bm->slave_by_sw_if_index[sw_if_index];
      if (p)
	sif = pool_elt_at_index (bm->neighbors, p >> 1);
    }

  return sif;
}

#endif /* __included_vnet_bonding_node_h__ */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */