summaryrefslogtreecommitdiffstats
path: root/src/plugins/kubeproxy/kp.h
diff options
context:
space:
mode:
authorHongjun Ni <hongjun.ni@intel.com>2018-02-06 23:00:22 +0800
committerDamjan Marion <dmarion.lists@gmail.com>2018-05-18 07:45:31 +0000
commitd92a0b553fd2872b4fcda25994aaa8852d254824 (patch)
tree5e73673341ab952008f5ecb175ca97139ecdf356 /src/plugins/kubeproxy/kp.h
parentafe56de947822bb981bd30242f4e3c2c469f9ecc (diff)
Rework kube-proxy into LB plugin
Add support of NAT66 Change-Id: Ie6aa79078a3835f989829b9a597c448dfd2f9ea3 Signed-off-by: Hongjun Ni <hongjun.ni@intel.com>
Diffstat (limited to 'src/plugins/kubeproxy/kp.h')
-rw-r--r--src/plugins/kubeproxy/kp.h473
1 files changed, 0 insertions, 473 deletions
diff --git a/src/plugins/kubeproxy/kp.h b/src/plugins/kubeproxy/kp.h
deleted file mode 100644
index 243c002833f..00000000000
--- a/src/plugins/kubeproxy/kp.h
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Copyright (c) 2017 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "POD IS" BPODIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * kp-plugin implements a MagLev-like load balancer.
- * http://research.google.com/pubs/pub44824.html
- *
- * It hasn't been tested for interoperability with the original MagLev
- * but intends to provide similar functionality.
- * The kube-proxy receives traffic destined to VIP (Virtual IP)
- * addresses from one or multiple(ECMP) routers.
- * The kube-proxy tunnels the traffic toward many application servers
- * ensuring session stickyness (i.e. that a single sessions is tunneled
- * towards a single application server).
- *
- */
-
-#ifndef KP_PLUGIN_KP_KP_H_
-#define KP_PLUGIN_KP_KP_H_
-
-#include <vnet/util/refcount.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/fib/fib_table.h>
-#include <vppinfra/bihash_8_8.h>
-
-#include <kubeproxy/kphash.h>
-
-#define KP_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
-#define KP_DEFAULT_FLOW_TIMEOUT 40
-#define KP_MAPPING_BUCKETS 1024
-#define KP_MAPPING_MEMORY_SIZE 64<<20
-
-typedef enum {
- KP_NEXT_DROP,
- KP_N_NEXT,
-} kp_next_t;
-
-typedef enum {
- KP_NAT4_IN2OUT_NEXT_DROP,
- KP_NAT4_IN2OUT_NEXT_LOOKUP,
- KP_NAT4_IN2OUT_N_NEXT,
-} kp_nat4_in2out_next_t;
-
-#define foreach_kp_nat_in2out_error \
-_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
-_(IN2OUT_PACKETS, "Good in2out packets processed") \
-_(NO_TRANSLATION, "No translation")
-
-typedef enum {
-#define _(sym,str) KP_NAT_IN2OUT_ERROR_##sym,
- foreach_kp_nat_in2out_error
-#undef _
- KP_NAT_IN2OUT_N_ERROR,
-} kp_nat_in2out_error_t;
-
-/**
- * kube-proxy supports three types of service
- */
-typedef enum {
- KP_SVR_TYPE_VIP_PORT,
- KP_SVR_TYPE_NODEIP_PORT,
- KP_SVR_TYPE_EXT_LB,
- KP_SVR_N_TYPES,
-} kp_svr_type_t;
-
-typedef enum {
- KP_NODEPORT_NEXT_IP4_NAT4,
- KP_NODEPORT_NEXT_IP4_NAT6,
- KP_NODEPORT_NEXT_IP6_NAT4,
- KP_NODEPORT_NEXT_IP6_NAT6,
- KP_NODEPORT_NEXT_DROP,
- KP_NODEPORT_N_NEXT,
-} kp_nodeport_next_t;
-
-/**
- * Each VIP is configured with a set of PODs
- */
-typedef struct {
- /**
- * Registration to FIB event.
- */
- fib_node_t fib_node;
-
- /**
- * Destination address used to transfer traffic towards to that POD.
- * The address is also used pod ID and pseudo-random
- * seed for the load-balancing process.
- */
- ip46_address_t address;
-
- /**
- * PODs are indexed by address and VIP Index.
- * Which means there will be duplicated if the same server
- * address is used for multiple VIPs.
- */
- u32 vip_index;
-
- /**
- * Some per-POD flags.
- * For now only KP_POD_FLAGS_USED is defined.
- */
- u8 flags;
-
-#define KP_POD_FLAGS_USED 0x1
-
- /**
- * Rotating timestamp of when KP_POD_FLAGS_USED flag was last set.
- *
- * POD removal is based on garbage collection and reference counting.
- * When an POD is removed, there is a race between configuration core
- * and worker cores which may still add a reference while it should not
- * be used. This timestamp is used to not remove the POD while a race condition
- * may happen.
- */
- u32 last_used;
-
- /**
- * The FIB entry index for the next-hop
- */
- fib_node_index_t next_hop_fib_entry_index;
-
- /**
- * The child index on the FIB entry
- */
- u32 next_hop_child_index;
-
- /**
- * The next DPO in the graph to follow.
- */
- dpo_id_t dpo;
-
-} kp_pod_t;
-
-format_function_t format_kp_pod;
-
-typedef struct {
- u32 pod_index;
-} kp_new_flow_entry_t;
-
-#define kp_foreach_vip_counter \
- _(NEXT_PACKET, "packet from existing sessions", 0) \
- _(FIRST_PACKET, "first session packet", 1) \
- _(UNTRACKED_PACKET, "untracked packet", 2) \
- _(NO_SERVER, "no server configured", 3)
-
-typedef enum {
-#define _(a,b,c) KP_VIP_COUNTER_##a = c,
- kp_foreach_vip_counter
-#undef _
- KP_N_VIP_COUNTERS
-} kp_vip_counter_t;
-
-/**
- * kube-proxy supports IPv4 and IPv6 traffic
- * and NAT4 and NAT6.
- */
-typedef enum {
- KP_VIP_TYPE_IP4_NAT44,
- KP_VIP_TYPE_IP4_NAT46,
- KP_VIP_TYPE_IP6_NAT64,
- KP_VIP_TYPE_IP6_NAT66,
- KP_VIP_N_TYPES,
-} kp_vip_type_t;
-
-format_function_t format_kp_vip_type;
-unformat_function_t unformat_kp_vip_type;
-
-/**
- * Load balancing service is provided per VIP.
- * In this data model, a VIP can be a whole prefix.
- * But load balancing only
- * occurs on a per-source-address/port basis. Meaning that if a given source
- * reuses the same port for multiple destinations within the same VIP,
- * they will be considered as a single flow.
- */
-typedef struct {
-
- //Runtime
-
- /**
- * Vector mapping (flow-hash & new_connect_table_mask) to POD index.
- * This is used for new flows.
- */
- kp_new_flow_entry_t *new_flow_table;
-
- /**
- * New flows table length - 1
- * (length MUST be a power of 2)
- */
- u32 new_flow_table_mask;
-
- /**
- * last time garbage collection was run to free the PODs.
- */
- u32 last_garbage_collection;
-
- //Not runtime
-
- /**
- * A Virtual IP represents a given service delivered
- * by a set of PODs. It can be a single
- * address or a prefix.
- * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address
- * (i.e. ::/96 prefix).
- */
- ip46_address_t prefix;
-
- /**
- * The VIP prefix length.
- * In case of IPv4, plen = 96 + ip4_plen.
- */
- u8 plen;
-
- /**
- * Service port. network byte order
- */
- u16 port;
-
- /**
- * Pod's port corresponding to specific service. network byte order
- */
- u16 target_port;
-
- /**
- * Node's port, can access service via NodeIP:node_port. network byte order
- */
- u16 node_port;
-
-
- /**
- * The type of traffic for this.
- * KP_TYPE_UNDEFINED if unknown.
- */
- kp_vip_type_t type;
-
- /**
- * Flags related to this VIP.
- * KP_VIP_FLAGS_USED means the VIP is active.
- * When it is not set, the VIP in the process of being removed.
- * We cannot immediately remove a VIP because the VIP index still may be stored
- * in the adjacency index.
- */
- u8 flags;
-#define KP_VIP_FLAGS_USED 0x1
-
- /**
- * Pool of POD indexes used for this VIP.
- * This also includes PODs that have been removed (but are still referenced).
- */
- u32 *pod_indexes;
-
-} kp_vip_t;
-
-/*
- * mapping from nodeport to vip_index
- */
-typedef struct {
-
- u32 vip_index;
-
-} kp_nodeport_t;
-
-#define kp_vip_is_ip4(vip) ((vip)->type == KP_VIP_TYPE_IP4_NAT44 \
- || (vip)->type == KP_VIP_TYPE_IP4_NAT46)
-#define kp_vip_is_nat4(vip) ((vip)->type == KP_VIP_TYPE_IP6_NAT64 \
- || (vip)->type == KP_VIP_TYPE_IP4_NAT44)
-format_function_t format_kp_vip;
-format_function_t format_kp_vip_detailed;
-
-#define foreach_kp_nat_protocol \
- _(UDP, 0, udp, "udp") \
- _(TCP, 1, tcp, "tcp")
-
-typedef enum {
-#define _(N, i, n, s) KP_NAT_PROTOCOL_##N = i,
- foreach_kp_nat_protocol
-#undef _
-} kp_nat_protocol_t;
-
-always_inline u32
-kp_ip_proto_to_nat_proto (u8 ip_proto)
-{
- u32 nat_proto = ~0;
-
- nat_proto = (ip_proto == IP_PROTOCOL_UDP) ? KP_NAT_PROTOCOL_UDP : nat_proto;
- nat_proto = (ip_proto == IP_PROTOCOL_TCP) ? KP_NAT_PROTOCOL_TCP : nat_proto;
-
- return nat_proto;
-}
-
-/* Key for Pod's egress SNAT */
-typedef struct {
- union
- {
- struct
- {
- ip4_address_t addr;
- u16 port;
- u16 protocol:3,
- fib_index:13;
- };
- u64 as_u64;
- };
-} kp_snat4_key_t;
-
-typedef struct
-{
- ip6_address_t prefix;
- u8 plen;
- u32 vrf_id;
- u32 fib_index;
-} kp_snat6_key_t;
-
-typedef struct {
- kp_svr_type_t svr_type;
- ip46_address_t vip;
- ip46_address_t node_ip;
- ip46_address_t pod_ip;
- u8 vip_is_ipv6;
- u8 node_ip_is_ipv6;
- u8 pod_ip_is_ipv6;
- u16 port; /* Network byte order */
- u16 node_port; /* Network byte order */
- u16 target_port; /* Network byte order */
- u32 vrf_id;
- u32 fib_index;
-} kp_snat_mapping_t;
-
-typedef struct {
- /**
- * Each CPU has its own sticky flow hash table.
- * One single table is used for all VIPs.
- */
- kp_hash_t *sticky_ht;
-
-} kp_per_cpu_t;
-
-typedef struct {
- /**
- * Pool of all Virtual IPs
- */
- kp_vip_t *vips;
-
- /**
- * Pool of PODs.
- * PODs are referenced by address and vip index.
- * The first element (index 0) is special and used only to fill
- * new_flow_tables when no POD has been configured.
- */
- kp_pod_t *pods;
-
- /**
- * Each POD has an associated reference counter.
- * As pods[0] has a special meaning, its associated counter
- * starts at 0 and is decremented instead. i.e. do not use it.
- */
- vlib_refcount_t pod_refcount;
-
- /* hash lookup vip_index by key: {u16: nodeport} */
- uword * nodeport_by_key;
-
-
- /**
- * Some global data is per-cpu
- */
- kp_per_cpu_t *per_cpu;
-
- /**
- * Node next index for IP adjacencies, for each of the traffic types.
- */
- u32 ip_lookup_next_index[KP_VIP_N_TYPES];
-
- /**
- * Number of buckets in the per-cpu sticky hash table.
- */
- u32 per_cpu_sticky_buckets;
-
- /**
- * Flow timeout in seconds.
- */
- u32 flow_timeout;
-
- /**
- * Per VIP counter
- */
- vlib_simple_counter_main_t vip_counters[KP_N_VIP_COUNTERS];
-
- /**
- * DPO used to send packet from IP4/6 lookup to KP node.
- */
- dpo_type_t dpo_nat4_type;
- dpo_type_t dpo_nat6_type;
-
- /**
- * Node type for registering to fib changes.
- */
- fib_node_type_t fib_node_type;
-
- /* Find a static mapping by pod IP : target_port */
- clib_bihash_8_8_t mapping_by_pod;
-
- /* Static mapping pool */
- kp_snat_mapping_t * snat_mappings;
-
- /**
- * API dynamically registered base ID.
- */
- u16 msg_id_base;
-
- volatile u32 *writer_lock;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} kp_main_t;
-
-#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6)
-#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46))
-#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6)
-
-void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen);
-uword unformat_ip46_prefix (unformat_input_t * input, va_list * args);
-u8 *format_ip46_prefix (u8 * s, va_list * args);
-
-
-extern kp_main_t kp_main;
-extern vlib_node_registration_t kp4_node;
-extern vlib_node_registration_t kp6_node;
-extern vlib_node_registration_t kp4_nodeport_node;
-extern vlib_node_registration_t kp6_nodeport_node;
-extern vlib_node_registration_t kp_nat4_in2out_node;
-
-/**
- * Fix global kube-proxy parameters.
- * @return 0 on success. VNET_KP_ERR_XXX on error
- */
-int kp_conf(u32 sticky_buckets, u32 flow_timeout);
-
-int kp_vip_add(ip46_address_t *prefix, u8 plen, kp_vip_type_t type,
- u32 new_length, u32 *vip_index,
- u16 port, u16 target_port, u16 node_port);
-int kp_vip_del(u32 vip_index);
-
-int kp_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
-
-#define kp_vip_get_by_index(index) (pool_is_free_index(kp_main.vips, index)?NULL:pool_elt_at_index(kp_main.vips, index))
-
-int kp_vip_add_pods(u32 vip_index, ip46_address_t *addresses, u32 n);
-int kp_vip_del_pods(u32 vip_index, ip46_address_t *addresses, u32 n);
-
-u32 kp_hash_time_now(vlib_main_t * vm);
-
-void kp_garbage_collection();
-
-int kp_nat4_interface_add_del (u32 sw_if_index, int is_del);
-
-format_function_t format_kp_main;
-
-#endif /* KP_PLUGIN_KP_KP_H_ */