summaryrefslogtreecommitdiffstats
path: root/src/plugins/lb/lb.c
blob: d5dc3054fb42a8172c7cc76cf0d41418b7b6e806 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314

@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
#!/bin/bash
# Copyright (c) 2016 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -x

# Space separated list of available testbeds, described by topology files
TOPOLOGIES="topologies/available/lf_testbed1.yaml \
            topologies/available/lf_testbed2.yaml \
            topologies/available/lf_testbed3.yaml"

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Reservation dir
RESERVATION_DIR="/tmp/reservation_dir"

PYBOT_ARGS="-W 150"

ARCHIVE_ARTIFACTS=(log.html output.xml report.html output_perf_data.xml)

# we will download the DPDK in the robot

WORKING_TOPOLOGY=""
export PYTHONPATH=${SCRIPT_DIR}

sudo apt-get -y update
sudo apt-get -y install libpython2.7-dev python-virtualenv

virtualenv --system-site-packages env
. env/bin/activate

echo pip install
pip install -r requirements.txt

# We iterate over available topologies and wait until we reserve topology
while :; do
    for TOPOLOGY in ${TOPOLOGIES};
    do
        python ${SCRIPT_DIR}/resources/tools/topo_reservation.py -t ${TOPOLOGY}
        if [ $? -eq 0 ]; then
            WORKING_TOPOLOGY=${TOPOLOGY}
            echo "Reserved: ${WORKING_TOPOLOGY}"
            break
        fi
    done

    if [ ! -z "${WORKING_TOPOLOGY}" ]; then
        # Exit the infinite while loop if we made a reservation
        break
    fi

    # Wait ~3minutes before next try
    SLEEP_TIME=$[ ( $RANDOM % 20 ) + 180 ]s
    echo "Sleeping ${SLEEP_TIME}"
    sleep ${SLEEP_TIME}
done

#for DPDK test, we don't need to install the VPP deb
function cancel_all {
    python ${SCRIPT_DIR}/resources/tools/topo_reservation.py -c -t $1
}

# On script exit we cancel the reservation
trap "cancel_all ${WORKING_TOPOLOGY}" EXIT

case "$TEST_TAG" in
    # run specific performance tests based on jenkins job type variable
    PERFTEST_LONG )
        pybot ${PYBOT_ARGS} \
              -L TRACE \
              -v TOPOLOGY_PATH:${WORKING_TOPOLOGY} \
              -v DPDK_TEST:True \
              -s "dpdk-tests.perf" \
              --exclude SKIP_PATCH \
              -i NDRPDRDISC \
              dpdk-tests/
        RETURN_STATUS=$(echo $?)
        ;;
    PERFTEST_SHORT )
        pybot ${PYBOT_ARGS} \
              -L TRACE \
              -v TOPOLOGY_PATH:${WORKING_TOPOLOGY} \
              -v DPDK_TEST:True \
              -s "dpdk-tests.perf" \
              -i NDRCHK \
              dpdk-tests/
        RETURN_STATUS=$(echo $?)
        ;;
   PERFTEST_NIGHTLY )
        #run all available tests
        pybot ${PYBOT_ARGS} \
              -L TRACE \
              -v TOPOLOGY_PATH:${WORKING_TOPOLOGY} \
              -v DPDK_TEST:True \
              -s "dpdk-tests.perf" \
              dpdk-tests/
        RETURN_STATUS=$(echo $?)
        ;;
    * )
        # run full performance test suite and exit on fail
        pybot ${PYBOT_ARGS} \
              -L TRACE \
              -v TOPOLOGY_PATH:${WORKING_TOPOLOGY} \
              -v DPDK_TEST:True \
              -s "dpdk-tests.perf" \
              dpdk-tests/
        RETURN_STATUS=$(echo $?)
esac

# Pybot output post-processing
echo Post-processing test data...

python ${SCRIPT_DIR}/resources/tools/robot_output_parser.py \
       -i ${SCRIPT_DIR}/output.xml \
       -o ${SCRIPT_DIR}/output_perf_data.xml
if [ ! $? -eq 0 ]; then
    echo "Parsing ${SCRIPT_DIR}/output.xml failed"
fi

# Archive artifacts
mkdir archive
for i in ${ARCHIVE_ARTIFACTS[@]}; do
    cp $( readlink -f ${i} | tr '\n' ' ' ) archive/
done

echo Post-processing finished.

exit ${RETURN_STATUS}
id='n926' href='#n926'>926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <lb/lb.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <vnet/api_errno.h>
#include <vnet/udp/udp.h>

//GC runs at most once every so many seconds
#define LB_GARBAGE_RUN 60

//After so many seconds. It is assumed that inter-core race condition will not occur.
#define LB_CONCURRENCY_TIMEOUT 10

lb_main_t lb_main;

#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
#define lb_put_writer_lock() lb_main.writer_lock[0] = 0

static void lb_as_stack (lb_as_t *as);


const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
    {
        [DPO_PROTO_IP4]  = lb_dpo_gre4_ip4,
        [DPO_PROTO_IP6]  = lb_dpo_gre4_ip6,
    };

const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
    {
        [DPO_PROTO_IP4]  = lb_dpo_gre6_ip4,
        [DPO_PROTO_IP6]  = lb_dpo_gre6_ip6,
    };

const static char * const lb_dpo_l3dsr_ip4[] = { "lb4-l3dsr" , NULL };
const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
    {
        [DPO_PROTO_IP4]  = lb_dpo_l3dsr_ip4,
    };

const static char * const lb_dpo_nat4_ip4[] = { "lb4-nat4" , NULL };
const static char* const * const lb_dpo_nat4_nodes[DPO_PROTO_NUM] =
    {
        [DPO_PROTO_IP4]  = lb_dpo_nat4_ip4,
    };

const static char * const lb_dpo_nat6_ip6[] = { "lb6-nat6" , NULL };
const static char* const * const lb_dpo_nat6_nodes[DPO_PROTO_NUM] =
    {
        [DPO_PROTO_IP6]  = lb_dpo_nat6_ip6,
    };

u32 lb_hash_time_now(vlib_main_t * vm)
{
  return (u32) (vlib_time_now(vm) + 10000);
}

u8 *format_lb_main (u8 * s, va_list * args)
{
  vlib_thread_main_t *tm = vlib_get_thread_main();
  lb_main_t *lbm = &lb_main;
  s = format(s, "lb_main");
  s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
  s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
  s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
  s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);

  u32 thread_index;
  for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
    lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
    if (h) {
      s = format(s, "core %d\n", thread_index);
      s = format(s, "  timeout: %ds\n", h->timeout);
      s = format(s, "  usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())),  lb_hash_size(h));
    }
  }

  return s;
}

static char *lb_vip_type_strings[] = {
    [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
    [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
    [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
    [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
    [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
    [LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4",
    [LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6",
};

u8 *format_lb_vip_type (u8 * s, va_list * args)
{
  lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
  u32 i;
  for (i=0; i<LB_VIP_N_TYPES; i++)
    if (vipt == i)
      return format(s, lb_vip_type_strings[i]);
  return format(s, "_WRONG_TYPE_");
}

uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
{
  lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
  u32 i;
  for (i=0; i<LB_VIP_N_TYPES; i++)
    if (unformat(input, lb_vip_type_strings[i])) {
      *vipt = i;
      return 1;
    }
  return 0;
}

u8 *format_lb_vip (u8 * s, va_list * args)
{
  lb_vip_t *vip = va_arg (*args, lb_vip_t *);
  s = format(s, "%U %U new_size:%u #as:%u%s",
             format_lb_vip_type, vip->type,
             format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
             vip->new_flow_table_mask + 1,
             pool_elts(vip->as_indexes),
             (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");

  if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
    {
      s = format(s, "  dscp:%u", vip->encap_args.dscp);
    }
  else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
          || (vip->type == LB_VIP_TYPE_IP6_NAT6))
    {
      if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
        s = format (s, "  type:clusterip port:%u target_port:%u",
                    ntohs (vip->encap_args.port),
                    ntohs (vip->encap_args.target_port));
      else
        s = format (s, "  type:nodeport node_port:%u target_port:%u",
                    ntohs (vip->encap_args.node_port),
                    ntohs (vip->encap_args.target_port));
    }

  return s;
}

u8 *format_lb_as (u8 * s, va_list * args)
{
  lb_as_t *as = va_arg (*args, lb_as_t *);
  return format(s, "%U %s", format_ip46_address,
                &as->address, IP46_TYPE_ANY,
                (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
}

u8 *format_lb_vip_detailed (u8 * s, va_list * args)
{
  lb_main_t *lbm = &lb_main;
  lb_vip_t *vip = va_arg (*args, lb_vip_t *);
  u32 indent = format_get_indent (s);

  s = format(s, "%U %U [%lu] %U%s\n"
                   "%U  new_size:%u\n",
                  format_white_space, indent,
                  format_lb_vip_type, vip->type,
                  vip - lbm->vips,
                  format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
                  (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
                  format_white_space, indent,
                  vip->new_flow_table_mask + 1);

  if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
    {
      s = format(s, "%U  dscp:%u\n",
                    format_white_space, indent,
                    vip->encap_args.dscp);
    }
  else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
          || (vip->type == LB_VIP_TYPE_IP6_NAT6))
    {
      if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
        s = format (s, "%U  type:clusterip port:%u target_port:%u",
                    format_white_space, indent, ntohs (vip->encap_args.port),
                    ntohs (vip->encap_args.target_port));
      else
        s = format (s, "%U  type:nodeport node_port:%u target_port:%u",
                    format_white_space, indent,
                    ntohs (vip->encap_args.node_port),
                    ntohs (vip->encap_args.target_port));
    }

  //Print counters
  s = format(s, "%U  counters:\n",
             format_white_space, indent);
  u32 i;
  for (i=0; i<LB_N_VIP_COUNTERS; i++)
    s = format(s, "%U    %s: %d\n",
               format_white_space, indent,
               lbm->vip_counters[i].name,
               vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));


  s = format(s, "%U  #as:%u\n",
             format_white_space, indent,
             pool_elts(vip->as_indexes));

  //Let's count the buckets for each AS
  u32 *count = 0;
  vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
  lb_new_flow_entry_t *nfe;
  vec_foreach(nfe, vip->new_flow_table)
    count[nfe->as_index]++;

  lb_as_t *as;
  u32 *as_index;
  pool_foreach(as_index, vip->as_indexes, {
      as = &lbm->ass[*as_index];
      s = format(s, "%U    %U %d buckets   %d flows  dpo:%u %s\n",
                   format_white_space, indent,
                   format_ip46_address, &as->address, IP46_TYPE_ANY,
                   count[as - lbm->ass],
                   vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
                   as->dpo.dpoi_index,
                   (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
  });

  vec_free(count);

  /*
  s = format(s, "%U  new flows table:\n", format_white_space, indent);
  lb_new_flow_entry_t *nfe;
  vec_foreach(nfe, vip->new_flow_table) {
    s = format(s, "%U    %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index);
  }
  */
  return s;
}

typedef struct {
  u32 as_index;
  u32 last;
  u32 skip;
} lb_pseudorand_t;

static int lb_pseudorand_compare(void *a, void *b)
{
  lb_as_t *asa, *asb;
  lb_main_t *lbm = &lb_main;
  asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
  asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
  return memcmp(&asa->address, &asb->address, sizeof(asb->address));
}

static void lb_vip_garbage_collection(lb_vip_t *vip)
{
  lb_main_t *lbm = &lb_main;
  lb_snat4_key_t m_key4;
  clib_bihash_kv_8_8_t kv4, value4;
  lb_snat6_key_t m_key6;
  clib_bihash_kv_24_8_t kv6, value6;
  lb_snat_mapping_t *m = 0;
  ASSERT (lbm->writer_lock[0]);

  u32 now = (u32) vlib_time_now(vlib_get_main());
  if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
    return;

  vip->last_garbage_collection = now;
  lb_as_t *as;
  u32 *as_index;
  pool_foreach(as_index, vip->as_indexes, {
      as = &lbm->ass[*as_index];
      if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
          clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
          (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
        { //Not referenced

          if (lb_vip_is_nat4(vip)) {
              m_key4.addr = as->address.ip4;
              m_key4.port = vip->encap_args.target_port;
              m_key4.protocol = 0;
              m_key4.fib_index = 0;

              kv4.key = m_key4.as_u64;
              if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4))
                m = pool_elt_at_index (lbm->snat_mappings, value4.value);
              ASSERT (m);

              kv4.value = m - lbm->snat_mappings;
              clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
             pool_put (lbm->snat_mappings, m);
          } else if (lb_vip_is_nat6(vip)) {
              m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
              m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
              m_key6.port = vip->encap_args.target_port;
              m_key6.protocol = 0;
              m_key6.fib_index = 0;

              kv6.key[0] = m_key6.as_u64[0];
              kv6.key[1] = m_key6.as_u64[1];
              kv6.key[2] = m_key6.as_u64[2];

              if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6))
                m = pool_elt_at_index (lbm->snat_mappings, value6.value);
              ASSERT (m);

              kv6.value = m - lbm->snat_mappings;
              clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0);
              pool_put (lbm->snat_mappings, m);
          }
          fib_entry_child_remove(as->next_hop_fib_entry_index,
                                as->next_hop_child_index);
          fib_table_entry_delete_index(as->next_hop_fib_entry_index,
                                       FIB_SOURCE_RR);
          as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;

          pool_put(vip->as_indexes, as_index);
          pool_put(lbm->ass, as);
        }
  });
}

void lb_garbage_collection()
{
  lb_main_t *lbm = &lb_main;
  lb_get_writer_lock();
  lb_vip_t *vip;
  u32 *to_be_removed_vips = 0, *i;
  pool_foreach(vip, lbm->vips, {
      lb_vip_garbage_collection(vip);

      if (!(vip->flags & LB_VIP_FLAGS_USED) &&
          (pool_elts(vip->as_indexes) == 0)) {
        vec_add1(to_be_removed_vips, vip - lbm->vips);
      }
  });

  vec_foreach(i, to_be_removed_vips) {
    vip = &lbm->vips[*i];
    pool_put(lbm->vips, vip);
    pool_free(vip->as_indexes);
  }

  vec_free(to_be_removed_vips);
  lb_put_writer_lock();
}

static void lb_vip_update_new_flow_table(lb_vip_t *vip)
{
  lb_main_t *lbm = &lb_main;
  lb_new_flow_entry_t *old_table;
  u32 i, *as_index;
  lb_new_flow_entry_t *new_flow_table = 0;
  lb_as_t *as;
  lb_pseudorand_t *pr, *sort_arr = 0;
  u32 count;

  ASSERT (lbm->writer_lock[0]); //We must have the lock

  //Check if some AS is configured or not
  i = 0;
  pool_foreach(as_index, vip->as_indexes, {
      as = &lbm->ass[*as_index];
      if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
        i = 1;
        goto out; //Not sure 'break' works in this macro-loop
      }
  });

out:
  if (i == 0) {
    //Only the default. i.e. no AS
    vec_validate(new_flow_table, vip->new_flow_table_mask);
    for (i=0; i<vec_len(new_flow_table); i++)
      new_flow_table[i].as_index = 0;

    goto finished;
  }

  //First, let's sort the ASs
  sort_arr = 0;
  vec_alloc(sort_arr, pool_elts(vip->as_indexes));

  i = 0;
  pool_foreach(as_index, vip->as_indexes, {
      as = &lbm->ass[*as_index];
      if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
        continue;

      sort_arr[i].as_index = as - lbm->ass;
      i++;
  });
  _vec_len(sort_arr) = i;

  vec_sort_with_function(sort_arr, lb_pseudorand_compare);

  //Now let's pseudo-randomly generate permutations
  vec_foreach(pr, sort_arr) {
    lb_as_t *as = &lbm->ass[pr->as_index];

    u64 seed = clib_xxhash(as->address.as_u64[0] ^
                           as->address.as_u64[1]);
    /* We have 2^n buckets.
     * skip must be prime with 2^n.
     * So skip must be odd.
     * MagLev actually state that M should be prime,
     * but this has a big computation cost (% operation).
     * Using 2^n is more better (& operation).
     */
    pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
    pr->last = (seed >> 32) & vip->new_flow_table_mask;
  }

  //Let's create a new flow table
  vec_validate(new_flow_table, vip->new_flow_table_mask);
  for (i=0; i<vec_len(new_flow_table); i++)
    new_flow_table[i].as_index = ~0;

  u32 done = 0;
  while (1) {
    vec_foreach(pr, sort_arr) {
      while (1) {
        u32 last = pr->last;
        pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
        if (new_flow_table[last].as_index == ~0) {
          new_flow_table[last].as_index = pr->as_index;
          break;
        }
      }
      done++;
      if (done == vec_len(new_flow_table))
        goto finished;
    }
  }

  vec_free(sort_arr);

finished:

//Count number of changed entries
  count = 0;
  for (i=0; i<vec_len(new_flow_table); i++)
    if (vip->new_flow_table == 0 ||
        new_flow_table[i].as_index != vip->new_flow_table[i].as_index)
      count++;

  old_table = vip->new_flow_table;
  vip->new_flow_table = new_flow_table;
  vec_free(old_table);
}

int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
           u32 per_cpu_sticky_buckets, u32 flow_timeout)
{
  lb_main_t *lbm = &lb_main;

  if (!is_pow2(per_cpu_sticky_buckets))
    return VNET_API_ERROR_INVALID_MEMORY_SIZE;

  lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
  lbm->ip4_src_address = *ip4_address;
  lbm->ip6_src_address = *ip6_address;
  lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
  lbm->flow_timeout = flow_timeout;
  lb_put_writer_lock();
  return 0;
}

static
int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
{
  lb_main_t *lbm = &lb_main;
  lb_vip_t *vip;
  ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
  ip46_prefix_normalize(prefix, plen);
  pool_foreach(vip, lbm->vips, {
      if ((vip->flags & LB_AS_FLAGS_USED) &&
          vip->plen == plen &&
          vip->prefix.as_u64[0] == prefix->as_u64[0] &&
          vip->prefix.as_u64[1] == prefix->as_u64[1]) {
        *vip_index = vip - lbm->vips;
        return 0;
      }
  });
  return VNET_API_ERROR_NO_SUCH_ENTRY;
}

int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
{
  int ret;
  lb_get_writer_lock();
  ret = lb_vip_find_index_with_lock(prefix, plen, vip_index);
  lb_put_writer_lock();
  return ret;
}

static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
{
  lb_main_t *lbm = &lb_main;
  ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
  lb_as_t *as;
  u32 *asi;
  pool_foreach(asi, vip->as_indexes, {
      as = &lbm->ass[*asi];
      if (as->vip_index == (vip - lbm->vips) &&
          as->address.as_u64[0] == address->as_u64[0] &&
          as->address.as_u64[1] == address->as_u64[1]) {
        *as_index = as - lbm->ass;
        return 0;
      }
  });
  return -1;
}

int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
{
  lb_main_t *lbm = &lb_main;
  lb_get_writer_lock();
  lb_vip_t *vip;
  if (!(vip = lb_vip_get_by_index(vip_index))) {
    lb_put_writer_lock();
    return VNET_API_ERROR_NO_SUCH_ENTRY;
  }

  ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
  u32 *to_be_added = 0;
  u32 *to_be_updated = 0;
  u32 i;
  u32 *ip;
  lb_snat_mapping_t *m;

  //Sanity check
  while (n--) {

    if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
      if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
        vec_free(to_be_added);
        vec_free(to_be_updated);
        lb_put_writer_lock();
        return VNET_API_ERROR_VALUE_EXIST;
      }
      vec_add1(to_be_updated, i);
      goto next;
    }

    if (ip46_address_type(&addresses[n]) != type) {
      vec_free(to_be_added);
      vec_free(to_be_updated);
      lb_put_writer_lock();
      return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
    }

    if (n) {
      u32 n2 = n;
      while(n2--) //Check for duplicates
        if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
            addresses[n2].as_u64[1] == addresses[n].as_u64[1])
          goto next;
    }

    vec_add1(to_be_added, n);

next:
    continue;
  }

  //Update reused ASs
  vec_foreach(ip, to_be_updated) {
    lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
  }
  vec_free(to_be_updated);

  //Create those who have to be created
  vec_foreach(ip, to_be_added) {
    lb_as_t *as;
    u32 *as_index;
    pool_get(lbm->ass, as);
    as->address = addresses[*ip];
    as->flags = LB_AS_FLAGS_USED;
    as->vip_index = vip_index;
    pool_get(vip->as_indexes, as_index);
    *as_index = as - lbm->ass;

    /*
     * become a child of the FIB entry
     * so we are informed when its forwarding changes
     */
    fib_prefix_t nh = {};
    if (lb_encap_is_ip4(vip)) {
        nh.fp_addr.ip4 = as->address.ip4;
        nh.fp_len = 32;
        nh.fp_proto = FIB_PROTOCOL_IP4;
    } else {
        nh.fp_addr.ip6 = as->address.ip6;
        nh.fp_len = 128;
        nh.fp_proto = FIB_PROTOCOL_IP6;
    }

    as->next_hop_fib_entry_index =
	fib_table_entry_special_add(0,
                                    &nh,
                                    FIB_SOURCE_RR,
                                    FIB_ENTRY_FLAG_NONE);
    as->next_hop_child_index =
	fib_entry_child_add(as->next_hop_fib_entry_index,
                            lbm->fib_node_type,
                            as - lbm->ass);

    lb_as_stack(as);

    if ( lb_vip_is_nat4(vip) || lb_vip_is_nat6(vip) )
      {
        /* Add SNAT static mapping */
        pool_get (lbm->snat_mappings, m);
        memset (m, 0, sizeof (*m));
        if (lb_vip_is_nat4(vip)) {
            lb_snat4_key_t m_key4;
            clib_bihash_kv_8_8_t kv4;
            m_key4.addr = as->address.ip4;
            m_key4.port = vip->encap_args.target_port;
            m_key4.protocol = 0;
            m_key4.fib_index = 0;

            if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
              {
                m->src_ip.ip4 = vip->prefix.ip4;
                m->src_port = vip->encap_args.port;
              }
            else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
              {
                m->src_ip.ip4 = lbm->ip4_src_address;
                m->src_port = vip->encap_args.node_port;
              }
            m->src_ip_is_ipv6 = 0;
            m->as_ip.ip4 = as->address.ip4;
            m->as_ip_is_ipv6 = 0;;
            m->target_port = vip->encap_args.target_port;
            m->vrf_id = 0;
            m->fib_index = 0;

            kv4.key = m_key4.as_u64;
            kv4.value = m - lbm->snat_mappings;
            clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1);
        } else {
            lb_snat6_key_t m_key6;
            clib_bihash_kv_24_8_t kv6;
            m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
            m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
            m_key6.port = vip->encap_args.target_port;
            m_key6.protocol = 0;
            m_key6.fib_index = 0;

            if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
              {
                m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
                m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
                m->src_port = vip->encap_args.port;
              }
            else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
              {
                m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
                m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
                m->src_port = vip->encap_args.node_port;
              }
            m->src_ip_is_ipv6 = 1;
            m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
            m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
            m->as_ip_is_ipv6 = 1;
            m->target_port = vip->encap_args.target_port;
            m->vrf_id = 0;
            m->fib_index = 0;

            kv6.key[0] = m_key6.as_u64[0];
            kv6.key[1] = m_key6.as_u64[1];
            kv6.key[2] = m_key6.as_u64[2];
            kv6.value = m - lbm->snat_mappings;
            clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1);
        }
      }
  }
  vec_free(to_be_added);

  //Recompute flows
  lb_vip_update_new_flow_table(vip);

  //Garbage collection maybe
  lb_vip_garbage_collection(vip);

  lb_put_writer_lock();
  return 0;
}

int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
{
  lb_main_t *lbm = &lb_main;
  u32 now = (u32) vlib_time_now(vlib_get_main());
  u32 *ip = 0;

  lb_vip_t *vip;
  if (!(vip = lb_vip_get_by_index(vip_index))) {
    return VNET_API_ERROR_NO_SUCH_ENTRY;
  }

  u32 *indexes = NULL;
  while (n--) {
    u32 i;
    if (lb_as_find_index_vip(vip, &addresses[n], &i)) {
      vec_free(indexes);
      return VNET_API_ERROR_NO_SUCH_ENTRY;
    }

    if (n) { //Check for duplicates
      u32 n2 = n - 1;
      while(n2--) {
        if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
            addresses[n2].as_u64[1] == addresses[n].as_u64[1])
          goto next;
      }
    }

    vec_add1(indexes, i);
next:
  continue;
  }

  //Garbage collection maybe
  lb_vip_garbage_collection(vip);

  if (indexes != NULL) {
    vec_foreach(ip, indexes) {
      lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
      lbm->ass[*ip].last_used = now;
    }

    //Recompute flows
    lb_vip_update_new_flow_table(vip);
  }

  vec_free(indexes);
  return 0;
}

int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
{
  lb_get_writer_lock();
  int ret = lb_vip_del_ass_withlock(vip_index, addresses, n);
  lb_put_writer_lock();
  return ret;
}

/**
 * Add the VIP adjacency to the ip4 or ip6 fib
 */
static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
{
  dpo_proto_t proto = 0;
  dpo_type_t dpo_type = 0;

  dpo_id_t dpo = DPO_INVALID;
  fib_prefix_t pfx = {};
  if (lb_vip_is_ip4(vip)) {
      pfx.fp_addr.ip4 = vip->prefix.ip4;
      pfx.fp_len = vip->plen - 96;
      pfx.fp_proto = FIB_PROTOCOL_IP4;
      proto = DPO_PROTO_IP4;
  } else {
      pfx.fp_addr.ip6 = vip->prefix.ip6;
      pfx.fp_len = vip->plen;
      pfx.fp_proto = FIB_PROTOCOL_IP6;
      proto = DPO_PROTO_IP6;
  }

  if (lb_vip_is_gre4(vip))
    dpo_type = lbm->dpo_gre4_type;
  else if (lb_vip_is_gre6(vip))
    dpo_type = lbm->dpo_gre6_type;
  else if (lb_vip_is_l3dsr(vip))
    dpo_type = lbm->dpo_l3dsr_type;
  else if(lb_vip_is_nat4(vip))
    dpo_type = lbm->dpo_nat4_type;
  else if (lb_vip_is_nat6(vip))
    dpo_type = lbm->dpo_nat6_type;

  dpo_set(&dpo, dpo_type, proto, vip - lbm->vips);
  fib_table_entry_special_dpo_add(0,
                                  &pfx,
                                  FIB_SOURCE_PLUGIN_HI,
                                  FIB_ENTRY_FLAG_EXCLUSIVE,
                                  &dpo);
  dpo_reset(&dpo);
}

/**
 * Deletes the adjacency associated with the VIP
 */
static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
{
  fib_prefix_t pfx = {};
  if (lb_vip_is_ip4(vip)) {
      pfx.fp_addr.ip4 = vip->prefix.ip4;
      pfx.fp_len = vip->plen - 96;
      pfx.fp_proto = FIB_PROTOCOL_IP4;
  } else {
      pfx.fp_addr.ip6 = vip->prefix.ip6;
      pfx.fp_len = vip->plen;
      pfx.fp_proto = FIB_PROTOCOL_IP6;
  }
  fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
}

int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
{
  lb_main_t *lbm = &lb_main;
  vlib_main_t *vm = vlib_get_main();
  lb_vip_t *vip;
  lb_vip_type_t type = args.type;
  u16 node_port = args.encap_args.node_port;

  lb_get_writer_lock();
  ip46_prefix_normalize(&(args.prefix), args.plen);

  if (!lb_vip_find_index_with_lock(&(args.prefix), args.plen, vip_index)) {
    lb_put_writer_lock();
    return VNET_API_ERROR_VALUE_EXIST;
  }

  if (!is_pow2(args.new_length)) {
    lb_put_writer_lock();
    return VNET_API_ERROR_INVALID_MEMORY_SIZE;
  }

  if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
      (type != LB_VIP_TYPE_IP4_GRE4) &&
      (type != LB_VIP_TYPE_IP4_GRE6) &&
      (type != LB_VIP_TYPE_IP4_L3DSR) &&
      (type != LB_VIP_TYPE_IP4_NAT4)) {
    lb_put_writer_lock();
    return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
  }

  if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
      (type != LB_VIP_TYPE_IP6_GRE4) &&
      (type != LB_VIP_TYPE_IP6_GRE6) &&
      (type != LB_VIP_TYPE_IP6_NAT6)) {
    lb_put_writer_lock();
    return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
  }

  if ((type == LB_VIP_TYPE_IP4_L3DSR) && (args.encap_args.dscp >= 64 ) )
    {
      lb_put_writer_lock();
      return VNET_API_ERROR_VALUE_EXIST;
    }

  //Allocate
  pool_get(lbm->vips, vip);

  //Init
  memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
  vip->plen = args.plen;
  vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
  vip->type = args.type;

  if (args.type == LB_VIP_TYPE_IP4_L3DSR) {
      vip->encap_args.dscp = args.encap_args.dscp;
    }
  else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
	    ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
      vip->encap_args.srv_type = args.encap_args.srv_type;
      vip->encap_args.port = clib_host_to_net_u16(args.encap_args.port);
      vip->encap_args.target_port =
          clib_host_to_net_u16(args.encap_args.target_port);
      vip->encap_args.node_port = clib_host_to_net_u16(node_port);
    }

  vip->flags = LB_VIP_FLAGS_USED;
  vip->as_indexes = 0;

  //Validate counters
  u32 i;
  for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
    vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
    vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
  }

  //Configure new flow table
  vip->new_flow_table_mask = args.new_length - 1;
  vip->new_flow_table = 0;

  //Create a new flow hash table full of the default entry
  lb_vip_update_new_flow_table(vip);

  //Create adjacency to direct traffic
  lb_vip_add_adjacency(lbm, vip);

  if ( (lb_vip_is_nat4(vip) || lb_vip_is_nat6(vip))
      && (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
    {
      u32 key;
      uword * entry;

      //Create maping from nodeport to vip_index
      key = clib_host_to_net_u16(node_port);
      entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
      if (entry) {
        lb_put_writer_lock();
        return VNET_API_ERROR_VALUE_EXIST;
      }

      hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);

      /* receive packets destined to NodeIP:NodePort */
      udp_register_dst_port (vm, node_port, lb4_nodeport_node.index, 1);
      udp_register_dst_port (vm, node_port, lb6_nodeport_node.index, 0);
    }

  //Return result
  *vip_index = vip - lbm->vips;

  lb_put_writer_lock();
  return 0;
}

int lb_vip_del(u32 vip_index)
{
  lb_main_t *lbm = &lb_main;
  lb_vip_t *vip;
  lb_get_writer_lock();
  if (!(vip = lb_vip_get_by_index(vip_index))) {
    lb_put_writer_lock();
    return VNET_API_ERROR_NO_SUCH_ENTRY;
  }

  //FIXME: This operation is actually not working
  //We will need to remove state before performing this.

  {
    //Remove all ASs
    ip46_address_t *ass = 0;
    lb_as_t *as;
    u32 *as_index;
    pool_foreach(as_index, vip->as_indexes, {
        as = &lbm->ass[*as_index];
        vec_add1(ass, as->address);
    });
    if (vec_len(ass))
      lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass));
    vec_free(ass);
  }

  //Delete adjacency
  lb_vip_del_adjacency(lbm, vip);

  //Set the VIP as unused
  vip->flags &= ~LB_VIP_FLAGS_USED;

  lb_put_writer_lock();
  return 0;
}

/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
    .version = VPP_BUILD_VER,
    .description = "Load Balancer",
};
/* *INDENT-ON* */

u8 *format_lb_dpo (u8 * s, va_list * va)
{
  index_t index = va_arg (*va, index_t);
  CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
  lb_main_t *lbm = &lb_main;
  lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
  return format (s, "%U", format_lb_vip, vip);
}

static void lb_dpo_lock (dpo_id_t *dpo) {}
static void lb_dpo_unlock (dpo_id_t *dpo) {}

static fib_node_t *
lb_fib_node_get_node (fib_node_index_t index)
{
  lb_main_t *lbm = &lb_main;
  lb_as_t *as = pool_elt_at_index (lbm->ass, index);
  return (&as->fib_node);
}

static void
lb_fib_node_last_lock_gone (fib_node_t *node)
{
}

static lb_as_t *
lb_as_from_fib_node (fib_node_t *node)
{
  return ((lb_as_t*)(((char*)node) -
      STRUCT_OFFSET_OF(lb_as_t, fib_node)));
}

static void
lb_as_stack (lb_as_t *as)
{
  lb_main_t *lbm = &lb_main;
  lb_vip_t *vip = &lbm->vips[as->vip_index];
  dpo_type_t dpo_type = 0;

  if (lb_vip_is_gre4(vip))
    dpo_type = lbm->dpo_gre4_type;
  else if (lb_vip_is_gre6(vip))
    dpo_type = lbm->dpo_gre6_type;
  else if (lb_vip_is_l3dsr(vip))
    dpo_type = lbm->dpo_l3dsr_type;
  else if(lb_vip_is_nat4(vip))
    dpo_type = lbm->dpo_nat4_type;
  else if (lb_vip_is_nat6(vip))
    dpo_type = lbm->dpo_nat6_type;

  dpo_stack(dpo_type,
            lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6,
            &as->dpo,
            fib_entry_contribute_ip_forwarding(
                as->next_hop_fib_entry_index));
}

static fib_node_back_walk_rc_t
lb_fib_node_back_walk_notify (fib_node_t *node,
			       fib_node_back_walk_ctx_t *ctx)
{
    lb_as_stack(lb_as_from_fib_node(node));
    return (FIB_NODE_BACK_WALK_CONTINUE);
}

int lb_nat4_interface_add_del (u32 sw_if_index, int is_del)
{
  if (is_del)
    {
      vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
                                   sw_if_index, 0, 0, 0);
    }
  else
    {
      vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
                                   sw_if_index, 1, 0, 0);
    }

  return 0;
}

int lb_nat6_interface_add_del (u32 sw_if_index, int is_del)
{
  if (is_del)
    {
      vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
                                   sw_if_index, 0, 0, 0);
    }
  else
    {
      vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
                                   sw_if_index, 1, 0, 0);
    }

  return 0;
}

clib_error_t *
lb_init (vlib_main_t * vm)
{
  vlib_thread_main_t *tm = vlib_get_thread_main ();
  lb_main_t *lbm = &lb_main;
  lbm->vnet_main = vnet_get_main ();
  lbm->vlib_main = vm;

  lb_as_t *default_as;
  fib_node_vft_t lb_fib_node_vft = {
      .fnv_get = lb_fib_node_get_node,
      .fnv_last_lock = lb_fib_node_last_lock_gone,
      .fnv_back_walk = lb_fib_node_back_walk_notify,
  };
  dpo_vft_t lb_vft = {
      .dv_lock = lb_dpo_lock,
      .dv_unlock = lb_dpo_unlock,
      .dv_format = format_lb_dpo,
  };

  lbm->vips = 0;
  lbm->per_cpu = 0;
  vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
  lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,  CLIB_CACHE_LINE_BYTES);
  lbm->writer_lock[0] = 0;
  lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
  lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
  lbm->ip4_src_address.as_u32 = 0xffffffff;
  lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
  lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
  lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
  lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
  lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft, lb_dpo_l3dsr_nodes);
  lbm->dpo_nat4_type = dpo_register_new_type(&lb_vft, lb_dpo_nat4_nodes);
  lbm->dpo_nat6_type = dpo_register_new_type(&lb_vft, lb_dpo_nat6_nodes);
  lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);

  //Init AS reference counters
  vlib_refcount_init(&lbm->as_refcount);

  //Allocate and init default AS.
  lbm->ass = 0;
  pool_get(lbm->ass, default_as);
  default_as->flags = 0;
  default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
  default_as->vip_index = ~0;
  default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
  default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;

  lbm->vip_index_by_nodeport
    = hash_create_mem (0, sizeof(u16), sizeof (uword));

  clib_bihash_init_8_8 (&lbm->mapping_by_as4,
                        "mapping_by_as4", LB_MAPPING_BUCKETS,
                        LB_MAPPING_MEMORY_SIZE);

  clib_bihash_init_24_8 (&lbm->mapping_by_as6,
                        "mapping_by_as6", LB_MAPPING_BUCKETS,
                        LB_MAPPING_MEMORY_SIZE);

#define _(a,b,c) lbm->vip_counters[c].name = b;
  lb_foreach_vip_counter
#undef _
  return NULL;
}

VLIB_INIT_FUNCTION (lb_init);