diff options
Diffstat (limited to 'src/plugins/lb/lb.h')
-rw-r--r-- | src/plugins/lb/lb.h | 211 |
1 files changed, 198 insertions, 13 deletions
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h index 61d17d713a5..1526298b0fa 100644 --- a/src/plugins/lb/lb.h +++ b/src/plugins/lb/lb.h @@ -38,17 +38,65 @@ #include <vnet/dpo/dpo.h> #include <vnet/fib/fib_table.h> #include <vppinfra/hash.h> - +#include <vppinfra/bihash_8_8.h> +#include <vppinfra/bihash_24_8.h> #include <lb/lbhash.h> #define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10 #define LB_DEFAULT_FLOW_TIMEOUT 40 +#define LB_MAPPING_BUCKETS 1024 +#define LB_MAPPING_MEMORY_SIZE 64<<20 typedef enum { LB_NEXT_DROP, LB_N_NEXT, } lb_next_t; +typedef enum { + LB_NAT4_IN2OUT_NEXT_DROP, + LB_NAT4_IN2OUT_NEXT_LOOKUP, + LB_NAT4_IN2OUT_N_NEXT, +} LB_nat4_in2out_next_t; + +typedef enum { + LB_NAT6_IN2OUT_NEXT_DROP, + LB_NAT6_IN2OUT_NEXT_LOOKUP, + LB_NAT6_IN2OUT_N_NEXT, +} LB_nat6_in2out_next_t; + +#define foreach_lb_nat_in2out_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(IN2OUT_PACKETS, "Good in2out packets processed") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) LB_NAT_IN2OUT_ERROR_##sym, + foreach_lb_nat_in2out_error +#undef _ + LB_NAT_IN2OUT_N_ERROR, +} lb_nat_in2out_error_t; + +/** + * lb for kube-proxy supports three types of service + */ +typedef enum { + LB_SRV_TYPE_CLUSTERIP, + LB_SRV_TYPE_NODEPORT, + LB_SRV_N_TYPES, +} lb_svr_type_t; + +typedef enum { + LB4_NODEPORT_NEXT_IP4_NAT4, + LB4_NODEPORT_NEXT_DROP, + LB4_NODEPORT_N_NEXT, +} lb4_nodeport_next_t; + +typedef enum { + LB6_NODEPORT_NEXT_IP6_NAT6, + LB6_NODEPORT_NEXT_DROP, + LB6_NODEPORT_N_NEXT, +} lb6_nodeport_next_t; + /** * Each VIP is configured with a set of * application server. @@ -133,12 +181,14 @@ typedef enum { LB_ENCAP_TYPE_GRE4, LB_ENCAP_TYPE_GRE6, LB_ENCAP_TYPE_L3DSR, + LB_ENCAP_TYPE_NAT4, + LB_ENCAP_TYPE_NAT6, LB_ENCAP_N_TYPES, } lb_encap_type_t; /** * The load balancer supports IPv4 and IPv6 traffic - * and GRE4, GRE6 and L3DSR encap. + * and GRE4, GRE6, L3DSR and NAT4, NAT6 encap. */ typedef enum { LB_VIP_TYPE_IP6_GRE6, @@ -146,13 +196,39 @@ typedef enum { LB_VIP_TYPE_IP4_GRE6, LB_VIP_TYPE_IP4_GRE4, LB_VIP_TYPE_IP4_L3DSR, + LB_VIP_TYPE_IP4_NAT4, + LB_VIP_TYPE_IP6_NAT6, LB_VIP_N_TYPES, } lb_vip_type_t; - format_function_t format_lb_vip_type; unformat_function_t unformat_lb_vip_type; + +/* args for different vip encap types */ +typedef struct { + union + { + struct + { + /* Service type. clusterip or nodeport */ + u8 srv_type; + + /* Service port. network byte order */ + u16 port; + + /* Pod's port corresponding to specific service. network byte order */ + u16 target_port; + + /* Node's port, can access service via NodeIP:node_port. network byte order */ + u16 node_port; + }; + /* DSCP bits for L3DSR */ + u8 dscp; + u64 as_u64; + }; +} lb_vip_encap_args_t; + /** * Load balancing service is provided per VIP. * In this data model, a VIP can be a whole prefix. @@ -205,10 +281,8 @@ typedef struct { */ lb_vip_type_t type; - /** - * DSCP bits for L3DSR - */ - u8 dscp; + /* args for different vip encap types */ + lb_vip_encap_args_t encap_args; /** * Flags related to this VIP. @@ -229,21 +303,100 @@ typedef struct { #define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE4 \ - || (vip)->type == LB_VIP_TYPE_IP4_L3DSR ) + || (vip)->type == LB_VIP_TYPE_IP4_L3DSR \ + || (vip)->type == LB_VIP_TYPE_IP4_NAT4 ) #define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE4) + #define lb_vip_is_gre6(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE6 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE6) -#define lb_vip_is_l3dsr(vip) (vip)->type == LB_VIP_TYPE_IP4_L3DSR + +#define lb_vip_is_l3dsr(vip) ((vip)->type == LB_VIP_TYPE_IP4_L3DSR) + +#define lb_vip_is_nat4(vip) ((vip)->type == LB_VIP_TYPE_IP4_NAT4) + +#define lb_vip_is_nat6(vip) ((vip)->type == LB_VIP_TYPE_IP6_NAT6) #define lb_encap_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE4 \ - || (vip)->type == LB_VIP_TYPE_IP4_L3DSR) + || (vip)->type == LB_VIP_TYPE_IP4_L3DSR \ + || (vip)->type == LB_VIP_TYPE_IP4_NAT4 ) format_function_t format_lb_vip; format_function_t format_lb_vip_detailed; +#define foreach_lb_nat_protocol \ + _(UDP, 0, udp, "udp") \ + _(TCP, 1, tcp, "tcp") + +typedef enum { +#define _(N, i, n, s) LB_NAT_PROTOCOL_##N = i, + foreach_lb_nat_protocol +#undef _ +} lb_nat_protocol_t; + +always_inline u32 +lb_ip_proto_to_nat_proto (u8 ip_proto) +{ + u32 nat_proto = ~0; + + nat_proto = (ip_proto == IP_PROTOCOL_UDP) ? LB_NAT_PROTOCOL_UDP : nat_proto; + nat_proto = (ip_proto == IP_PROTOCOL_TCP) ? LB_NAT_PROTOCOL_TCP : nat_proto; + + return nat_proto; +} + +/* Key for Pod's egress SNAT */ +typedef struct { + union + { + struct + { + ip4_address_t addr; + u16 port; + u16 protocol:3, + fib_index:13; + }; + u64 as_u64; + }; +} lb_snat4_key_t; + +typedef struct +{ + union + { + struct + { + ip6_address_t addr; + u16 port; + u16 protocol; + u32 fib_index; + }; + u64 as_u64[3]; + }; +} lb_snat6_key_t; + +typedef struct { + /** + * for vip + port case, src_ip = vip; + * for node ip + node_port, src_ip = node_ip + */ + ip46_address_t src_ip; + ip46_address_t as_ip; + u8 src_ip_is_ipv6; + u8 as_ip_is_ipv6; + /** + * Network byte order + * for vip + port case, src_port = port; + * for node ip + node_port, src_port = node_port + */ + u16 src_port; + u16 target_port; /* Network byte order */ + u32 vrf_id; + u32 fib_index; +} lb_snat_mapping_t; + typedef struct { /** * Each CPU has its own sticky flow hash table. @@ -273,6 +426,9 @@ typedef struct { */ vlib_refcount_t as_refcount; + /* hash lookup vip_index by key: {u16: nodeport} */ + uword * vip_index_by_nodeport; + /** * Some global data is per-cpu */ @@ -314,23 +470,49 @@ typedef struct { dpo_type_t dpo_gre4_type; dpo_type_t dpo_gre6_type; dpo_type_t dpo_l3dsr_type; + dpo_type_t dpo_nat4_type; + dpo_type_t dpo_nat6_type; /** * Node type for registering to fib changes. */ fib_node_type_t fib_node_type; + /* Find a static mapping by AS IP : target_port */ + clib_bihash_8_8_t mapping_by_as4; + clib_bihash_24_8_t mapping_by_as6; + + /* Static mapping pool */ + lb_snat_mapping_t * snat_mappings; + /** * API dynamically registered base ID. */ u16 msg_id_base; volatile u32 *writer_lock; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; } lb_main_t; +/* args for different vip encap types */ +typedef struct { + ip46_address_t prefix; + u8 plen; + lb_vip_type_t type; + u32 new_length; + lb_vip_encap_args_t encap_args; +} lb_vip_add_args_t; + extern lb_main_t lb_main; -extern vlib_node_registration_t lb6_node; extern vlib_node_registration_t lb4_node; +extern vlib_node_registration_t lb6_node; +extern vlib_node_registration_t lb4_nodeport_node; +extern vlib_node_registration_t lb6_nodeport_node; +extern vlib_node_registration_t lb_nat4_in2out_node; +extern vlib_node_registration_t lb_nat6_in2out_node; /** * Fix global load-balancer parameters. @@ -341,8 +523,8 @@ extern vlib_node_registration_t lb4_node; int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, u32 sticky_buckets, u32 flow_timeout); -int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u8 dscp, - u32 new_length, u32 *vip_index); +int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index); + int lb_vip_del(u32 vip_index); int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index); @@ -356,6 +538,9 @@ u32 lb_hash_time_now(vlib_main_t * vm); void lb_garbage_collection(); +int lb_nat4_interface_add_del (u32 sw_if_index, int is_del); +int lb_nat6_interface_add_del (u32 sw_if_index, int is_del); + format_function_t format_lb_main; #endif /* LB_PLUGIN_LB_LB_H_ */ |