1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
|
/*
* Copyright (c) 2015 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* ip/ip4_fib.h: ip4 mtrie fib
*
* Copyright (c) 2012 Eliot Dresselhaus
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef included_ip_ip4_fib_h
#define included_ip_ip4_fib_h
#include <vppinfra/cache.h>
#include <vppinfra/vector.h>
#include <vnet/ip/lookup.h>
#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */
/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
1 + 2*adj_index for terminal leaves.
0 + 2*next_ply_index for non-terminals, i.e. PLYs
1 => empty (adjacency index of zero is special miss adjacency). */
typedef u32 ip4_fib_mtrie_leaf_t;
#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
/**
* @brief the 16 way stride that is the top PLY of the mtrie
* We do not maintain the count of 'real' leaves in this PLY, since
* it is never removed. The FIB will destroy the mtrie and the ply once
* the FIB is destroyed.
*/
#define PLY_16_SIZE (1<<16)
typedef struct ip4_fib_mtrie_16_ply_t_
{
/**
* The leaves/slots/buckets to be filed with leafs
*/
union
{
ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
#ifdef CLIB_HAVE_VEC128
u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
#endif
};
/**
* Prefix length for terminal leaves.
*/
u8 dst_address_bits_of_leaves[PLY_16_SIZE];
} ip4_fib_mtrie_16_ply_t;
/**
* @brief One ply of the 4 ply mtrie fib.
*/
typedef struct ip4_fib_mtrie_8_ply_t_
{
/**
* The leaves/slots/buckets to be filed with leafs
*/
union
{
ip4_fib_mtrie_leaf_t leaves[256];
#ifdef CLIB_HAVE_VEC128
u32x4 leaves_as_u32x4[256 / 4];
#endif
};
/**
* Prefix length for leaves/ply.
*/
u8 dst_address_bits_of_leaves[256];
/**
* Number of non-empty leafs (whether terminal or not).
*/
i32 n_non_empty_leafs;
/**
* The length of the ply's coviering prefix. Also a measure of its depth
* If a leaf in a slot has a mask length longer than this then it is
* 'non-empty'. Otherwise it is the value of the cover.
*/
i32 dst_address_bits_base;
/* Pad to cache line boundary. */
u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
}
ip4_fib_mtrie_8_ply_t;
STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
"IP4 Mtrie ply cache line");
/**
* @brief The mutiway-TRIE.
* There is no data associated with the mtrie apart from the top PLY
*/
typedef struct
{
/**
* Embed the PLY with the mtrie struct. This means that the Data-plane
* 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
* to it. therefore no cachline misses in the data-path.
*/
ip4_fib_mtrie_16_ply_t root_ply;
} ip4_fib_mtrie_t;
/**
* @brief Initialise an mtrie
*/
void ip4_mtrie_init (ip4_fib_mtrie_t * m);
/**
* @brief Free an mtrie, It must be emty when free'd
*/
void ip4_mtrie_free (ip4_fib_mtrie_t * m);
/**
* @brief Add a route/rntry to the mtrie
*/
void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
const ip4_address_t * dst_address,
u32 dst_address_length, u32 adj_index);
/**
* @brief remove a route/rntry to the mtrie
*/
void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
const ip4_address_t * dst_address,
u32 dst_address_length,
u32 adj_index,
u32 cover_address_length, u32 cover_adj_index);
/**
* @brief Format/display the contents of the mtrie
*/
format_function_t format_ip4_fib_mtrie;
/**
* @brief A global pool of 8bit stride plys
*/
extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
/**
* Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
*/
always_inline u32
ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
{
return n & 1;
}
/**
* From the stored slot value extract the LB index value
*/
always_inline u32
ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
{
ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
return n >> 1;
}
/**
* @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
*/
always_inline ip4_fib_mtrie_leaf_t
ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
ip4_fib_mtrie_leaf_t current_leaf,
const ip4_address_t * dst_address,
u32 dst_address_byte_index)
{
ip4_fib_mtrie_8_ply_t *ply;
uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
if (!current_is_terminal)
{
ply = ip4_ply_pool + (current_leaf >> 1);
return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
}
return current_leaf;
}
/**
* @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address.
*/
always_inline ip4_fib_mtrie_leaf_t
ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
const ip4_address_t * dst_address)
{
ip4_fib_mtrie_leaf_t next_leaf;
next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
return next_leaf;
}
#endif /* included_ip_ip4_fib_h */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/
'n996' href='#n996'>996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
|
/*
* Copyright (c) 2017 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file
* @brief NAT64 implementation
*/
#include <nat/nat64.h>
#include <nat/nat64_db.h>
#include <nat/nat_inlines.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/crc32.h>
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
nat64_main_t nat64_main;
/* *INDENT-OFF* */
/* Hook up input features */
VNET_FEATURE_INIT (nat64_in2out, static) = {
.arc_name = "ip6-unicast",
.node_name = "nat64-in2out",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
};
VNET_FEATURE_INIT (nat64_out2in, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat64-out2in",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
};
VNET_FEATURE_INIT (nat64_in2out_handoff, static) = {
.arc_name = "ip6-unicast",
.node_name = "nat64-in2out-handoff",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
};
VNET_FEATURE_INIT (nat64_out2in_handoff, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat64-out2in-handoff",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
};
static u8 well_known_prefix[] = {
0x00, 0x64, 0xff, 0x9b,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
/* *INDENT-ON* */
static void
nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque,
u32 sw_if_index,
ip4_address_t * address,
u32 address_length,
u32 if_address_index, u32 is_delete)
{
nat64_main_t *nm = &nat64_main;
int i, j;
for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
{
if (sw_if_index == nm->auto_add_sw_if_indices[i])
{
if (!is_delete)
{
/* Don't trip over lease renewal, static config */
for (j = 0; j < vec_len (nm->addr_pool); j++)
if (nm->addr_pool[j].addr.as_u32 == address->as_u32)
return;
(void) nat64_add_del_pool_addr (vlib_get_thread_index (),
address, ~0, 1);
return;
}
else
{
(void) nat64_add_del_pool_addr (vlib_get_thread_index (),
address, ~0, 0);
return;
}
}
}
}
u32
nat64_get_worker_in2out (ip6_address_t * addr)
{
nat64_main_t *nm = &nat64_main;
snat_main_t *sm = nm->sm;
u32 next_worker_index = nm->sm->first_worker_index;
u32 hash;
#ifdef clib_crc32c_uses_intrinsics
hash = clib_crc32c ((u8 *) addr->as_u32, 16);
#else
u64 tmp = addr->as_u64[0] ^ addr->as_u64[1];
hash = clib_xxhash (tmp);
#endif
if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
else
next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
return next_worker_index;
}
u32
nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
{
nat64_main_t *nm = &nat64_main;
snat_main_t *sm = nm->sm;
udp_header_t *udp;
u16 port;
u32 proto;
proto = ip_proto_to_snat_proto (ip->protocol);
udp = ip4_next_header (ip);
port = udp->dst_port;
/* unknown protocol */
if (PREDICT_FALSE (proto == ~0))
{
nat64_db_t *db;
ip46_address_t daddr;
nat64_db_bib_entry_t *bibe;
clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip->dst_address.as_u32;
/* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
if (bibe)
return (u32) (db - nm->db);
}
/* *INDENT-ON* */
return vlib_get_thread_index ();
}
/* ICMP */
if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
{
icmp46_header_t *icmp = (icmp46_header_t *) udp;
icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
if (!icmp_type_is_error_message
(vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
port = vnet_buffer (b)->ip.reass.l4_src_port;
else
{
/* if error message, then it's not fragmented and we can access it */
ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
proto = ip_proto_to_snat_proto (inner_ip->protocol);
void *l4_header = ip4_next_header (inner_ip);
switch (proto)
{
case SNAT_PROTOCOL_ICMP:
icmp = (icmp46_header_t *) l4_header;
echo = (icmp_echo_header_t *) (icmp + 1);
port = echo->identifier;
break;
case SNAT_PROTOCOL_UDP:
case SNAT_PROTOCOL_TCP:
port = ((tcp_udp_header_t *) l4_header)->src_port;
break;
default:
return vlib_get_thread_index ();
}
}
}
/* worker by outside port (TCP/UDP) */
port = clib_net_to_host_u16 (port);
if (port > 1024)
return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread);
return vlib_get_thread_index ();
}
clib_error_t *
nat64_init (vlib_main_t * vm)
{
nat64_main_t *nm = &nat64_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
ip4_add_del_interface_address_callback_t cb4;
ip4_main_t *im = &ip4_main;
nm->sm = &snat_main;
vlib_node_t *node;
vec_validate (nm->db, tm->n_vlib_mains - 1);
nm->fq_in2out_index = ~0;
nm->fq_out2in_index = ~0;
node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
nm->error_node_index = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out");
nm->in2out_node_index = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath");
nm->in2out_slowpath_node_index = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in");
nm->out2in_node_index = node->index;
/* set session timeouts to default values */
nm->udp_timeout = SNAT_UDP_TIMEOUT;
nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
nm->total_enabled_count = 0;
/* Set up the interface address add/del callback */
cb4.function = nat64_ip4_add_del_interface_address_cb;
cb4.function_opaque = 0;
vec_add1 (im->add_del_interface_address_callbacks, cb4);
nm->ip4_main = im;
/* Init counters */
nm->total_bibs.name = "total-bibs";
nm->total_bibs.stat_segment_name = "/nat64/total-bibs";
vlib_validate_simple_counter (&nm->total_bibs, 0);
vlib_zero_simple_counter (&nm->total_bibs, 0);
nm->total_sessions.name = "total-sessions";
nm->total_sessions.stat_segment_name = "/nat64/total-sessions";
vlib_validate_simple_counter (&nm->total_sessions, 0);
vlib_zero_simple_counter (&nm->total_sessions, 0);
return 0;
}
static void nat64_free_out_addr_and_port (struct nat64_db_s *db,
ip4_address_t * addr, u16 port,
u8 protocol);
void
nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets,
u32 st_memory_size)
{
nat64_main_t *nm = &nat64_main;
nat64_db_t *db;
nm->bib_buckets = bib_buckets;
nm->bib_memory_size = bib_memory_size;
nm->st_buckets = st_buckets;
nm->st_memory_size = st_memory_size;
/* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets,
st_memory_size, nat64_free_out_addr_and_port))
nat_elog_err ("NAT64 DB init failed");
}
/* *INDENT-ON* */
}
int
nat64_add_del_pool_addr (u32 thread_index,
ip4_address_t * addr, u32 vrf_id, u8 is_add)
{
nat64_main_t *nm = &nat64_main;
snat_address_t *a = 0;
snat_interface_t *interface;
int i;
nat64_db_t *db;
vlib_thread_main_t *tm = vlib_get_thread_main ();
/* Check if address already exists */
for (i = 0; i < vec_len (nm->addr_pool); i++)
{
if (nm->addr_pool[i].addr.as_u32 == addr->as_u32)
{
a = nm->addr_pool + i;
break;
}
}
if (is_add)
{
if (a)
return VNET_API_ERROR_VALUE_EXIST;
vec_add2 (nm->addr_pool, a, 1);
a->addr = *addr;
a->fib_index = ~0;
if (vrf_id != ~0)
a->fib_index =
fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
nat_fib_src_hi);
#define _(N, id, n, s) \
clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \
a->busy_##n##_ports = 0; \
vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
foreach_snat_protocol
#undef _
}
else
{
if (!a)
return VNET_API_ERROR_NO_SUCH_ENTRY;
if (a->fib_index != ~0)
fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nat_fib_src_hi);
/* Delete sessions using address */
/* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
nat64_db_free_out_addr (thread_index, db, &a->addr);
vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0,
db->bib.bib_entries_num);
vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
db->st.st_entries_num);
}
#define _(N, id, n, s) \
clib_bitmap_free (a->busy_##n##_port_bitmap);
foreach_snat_protocol
#undef _
/* *INDENT-ON* */
vec_del1 (nm->addr_pool, i);
}
/* Add/del external address to FIB */
/* *INDENT-OFF* */
pool_foreach (interface, nm->interfaces,
({
if (nat_interface_is_inside(interface))
continue;
snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
break;
}));
/* *INDENT-ON* */
return 0;
}
void
nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
{
nat64_main_t *nm = &nat64_main;
snat_address_t *a = 0;
/* *INDENT-OFF* */
vec_foreach (a, nm->addr_pool)
{
if (fn (a, ctx))
break;
};
/* *INDENT-ON* */
}
int
nat64_add_interface_address (u32 sw_if_index, int is_add)
{
nat64_main_t *nm = &nat64_main;
ip4_main_t *ip4_main = nm->ip4_main;
ip4_address_t *first_int_addr;
int i;
first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
{
if (nm->auto_add_sw_if_indices[i] == sw_if_index)
{
if (is_add)
return VNET_API_ERROR_VALUE_EXIST;
else
{
/* if have address remove it */
if (first_int_addr)
(void) nat64_add_del_pool_addr (vlib_get_thread_index (),
first_int_addr, ~0, 0);
vec_del1 (nm->auto_add_sw_if_indices, i);
return 0;
}
}
}
if (!is_add)
return VNET_API_ERROR_NO_SUCH_ENTRY;
/* add to the auto-address list */
vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
/* If the address is already bound - or static - add it now */
if (first_int_addr)
(void) nat64_add_del_pool_addr (vlib_get_thread_index (),
first_int_addr, ~0, 1);
return 0;
}
int
nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
{
nat64_main_t *nm = &nat64_main;
snat_interface_t *interface = 0, *i;
snat_address_t *ap;
const char *feature_name, *arc_name;
/* Check if interface already exists */
/* *INDENT-OFF* */
pool_foreach (i, nm->interfaces,
({
if (i->sw_if_index == sw_if_index)
{
interface = i;
break;
}
}));
/* *INDENT-ON* */
if (is_add)
{
if (interface)
goto set_flags;
pool_get (nm->interfaces, interface);
interface->sw_if_index = sw_if_index;
interface->flags = 0;
set_flags:
if (is_inside)
interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
else
interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
nm->total_enabled_count++;
vlib_process_signal_event (nm->sm->vlib_main,
nm->nat64_expire_walk_node_index,
NAT64_CLEANER_RESCHEDULE, 0);
}
else
{
if (!interface)
return VNET_API_ERROR_NO_SUCH_ENTRY;
if ((nat_interface_is_inside (interface)
&& nat_interface_is_outside (interface)))
interface->flags &=
is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE :
~NAT_INTERFACE_FLAG_IS_OUTSIDE;
else
pool_put (nm->interfaces, interface);
nm->total_enabled_count--;
}
if (!is_inside)
{
/* *INDENT-OFF* */
vec_foreach (ap, nm->addr_pool)
snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add);
/* *INDENT-ON* */
}
if (nm->sm->num_workers > 1)
{
feature_name =
is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff";
if (nm->fq_in2out_index == ~0)
nm->fq_in2out_index =
vlib_frame_queue_main_init (nat64_in2out_node.index, 0);
if (nm->fq_out2in_index == ~0)
nm->fq_out2in_index =
vlib_frame_queue_main_init (nat64_out2in_node.index, 0);
}
else
feature_name = is_inside ? "nat64-in2out" : "nat64-out2in";
arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
if (is_inside)
{
int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
if (rv)
return rv;
}
else
{
int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
if (rv)
return rv;
}
return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
is_add, 0, 0);
}
void
nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
{
nat64_main_t *nm = &nat64_main;
snat_interface_t *i = 0;
/* *INDENT-OFF* */
pool_foreach (i, nm->interfaces,
({
if (fn (i, ctx))
break;
}));
/* *INDENT-ON* */
}
int
nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
ip4_address_t * addr, u16 * port,
u32 thread_index)
{
nat64_main_t *nm = &nat64_main;
snat_main_t *sm = nm->sm;
snat_session_key_t k;
u32 worker_index = 0;
int rv;
k.protocol = proto;
if (sm->num_workers > 1)
worker_index = thread_index - sm->first_worker_index;
rv =
sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k,
sm->port_per_thread, worker_index);
if (!rv)
{
*port = k.port;
addr->as_u32 = k.addr.as_u32;
}
return rv;
}
static void
nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr,
u16 port, u8 protocol)
{
nat64_main_t *nm = &nat64_main;
int i;
snat_address_t *a;
u32 thread_index = db - nm->db;
snat_protocol_t proto = ip_proto_to_snat_proto (protocol);
u16 port_host_byte_order = clib_net_to_host_u16 (port);
for (i = 0; i < vec_len (nm->addr_pool); i++)
{
a = nm->addr_pool + i;
if (addr->as_u32 != a->addr.as_u32)
continue;
switch (proto)
{
#define _(N, j, n, s) \
case SNAT_PROTOCOL_##N: \
ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
port_host_byte_order) == 1); \
clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port_host_byte_order, 0); \
a->busy_##n##_ports--; \
a->busy_##n##_ports_per_thread[thread_index]--; \
break;
foreach_snat_protocol
#undef _
default:
nat_elog_notice ("unknown protocol");
return;
}
break;
}
}
/**
* @brief Add/delete static BIB entry in worker thread.
*/
static uword
nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
{
nat64_main_t *nm = &nat64_main;
u32 thread_index = vm->thread_index;
nat64_db_t *db = &nm->db[thread_index];
nat64_static_bib_to_update_t *static_bib;
nat64_db_bib_entry_t *bibe;
ip46_address_t addr;
/* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs,
({
if ((static_bib->thread_index != thread_index) || (static_bib->done))
continue;
if (static_bib->is_add)
{
(void) nat64_db_bib_entry_create (thread_index, db,
&static_bib->in_addr,
&static_bib->out_addr,
static_bib->in_port,
static_bib->out_port,
static_bib->fib_index,
static_bib->proto, 1);
vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
db->bib.bib_entries_num);
}
else
{
addr.as_u64[0] = static_bib->in_addr.as_u64[0];
addr.as_u64[1] = static_bib->in_addr.as_u64[1];
bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port,
static_bib->proto,
static_bib->fib_index, 1);
if (bibe)
{
nat64_db_bib_entry_free (thread_index, db, bibe);
vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
db->bib.bib_entries_num);
vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
db->st.st_entries_num);
}
}
static_bib->done = 1;
}));
/* *INDENT-ON* */
return 0;
}
static vlib_node_registration_t nat64_static_bib_worker_node;
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
.function = nat64_static_bib_worker_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-static-bib-worker",
};
/* *INDENT-ON* */
int
nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
ip4_address_t * out_addr, u16 in_port,
u16 out_port, u8 proto, u32 vrf_id, u8 is_add)
{
nat64_main_t *nm = &nat64_main;
nat64_db_bib_entry_t *bibe;
u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
nat_fib_src_hi);
snat_protocol_t p = ip_proto_to_snat_proto (proto);
ip46_address_t addr;
int i;
snat_address_t *a;
u32 thread_index = 0;
nat64_db_t *db;
nat64_static_bib_to_update_t *static_bib;
vlib_main_t *worker_vm;
u32 *to_be_free = 0, *index;
if (nm->sm->num_workers > 1)
{
thread_index = nat64_get_worker_in2out (in_addr);
db = &nm->db[thread_index];
}
else
db = &nm->db[nm->sm->num_workers];
addr.as_u64[0] = in_addr->as_u64[0];
addr.as_u64[1] = in_addr->as_u64[1];
bibe =
nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port),
proto, fib_index, 1);
if (is_add)
{
if (bibe)
return VNET_API_ERROR_VALUE_EXIST;
/* outside port must be assigned to same thread as internall address */
if ((out_port > 1024) && (nm->sm->num_workers > 1))
{
if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread))
return VNET_API_ERROR_INVALID_VALUE_2;
}
for (i = 0; i < vec_len (nm->addr_pool); i++)
{
a = nm->addr_pool + i;
if (out_addr->as_u32 != a->addr.as_u32)
continue;
switch (p)
{
#define _(N, j, n, s) \
case SNAT_PROTOCOL_##N: \
if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
out_port)) \
return VNET_API_ERROR_INVALID_VALUE; \
clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
out_port, 1); \
if (out_port > 1024) \
{ \
a->busy_##n##_ports++; \
a->busy_##n##_ports_per_thread[thread_index]++; \
} \
break;
foreach_snat_protocol
#undef _
default:
clib_memset (&addr, 0, sizeof (addr));
addr.ip4.as_u32 = out_addr->as_u32;
if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0))
return VNET_API_ERROR_INVALID_VALUE;
}
break;
}
if (!nm->sm->num_workers)
{
bibe =
nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr,
clib_host_to_net_u16 (in_port),
clib_host_to_net_u16 (out_port),
fib_index, proto, 1);
if (!bibe)
return VNET_API_ERROR_UNSPECIFIED;
vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
db->bib.bib_entries_num);
}
}
else
{
if (!bibe)
return VNET_API_ERROR_NO_SUCH_ENTRY;
if (!nm->sm->num_workers)
{
nat64_db_bib_entry_free (thread_index, db, bibe);
vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
db->bib.bib_entries_num);
}
}
if (nm->sm->num_workers)
{
/* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs,
({
if (static_bib->done)
vec_add1 (to_be_free, static_bib - nm->static_bibs);
}));
vec_foreach (index, to_be_free)
pool_put_index (nm->static_bibs, index[0]);
/* *INDENT-ON* */
vec_free (to_be_free);
pool_get (nm->static_bibs, static_bib);
static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
static_bib->in_addr.as_u64[1] = in_addr->as_u64[1];
static_bib->in_port = clib_host_to_net_u16 (in_port);
static_bib->out_addr.as_u32 = out_addr->as_u32;
static_bib->out_port = clib_host_to_net_u16 (out_port);
static_bib->fib_index = fib_index;
static_bib->proto = proto;
static_bib->is_add = is_add;
static_bib->thread_index = thread_index;
static_bib->done = 0;
worker_vm = vlib_mains[thread_index];
if (worker_vm)
vlib_node_set_interrupt_pending (worker_vm,
nat64_static_bib_worker_node.index);
else
return VNET_API_ERROR_UNSPECIFIED;
}
return 0;
}
int
nat64_set_udp_timeout (u32 timeout)
{
nat64_main_t *nm = &nat64_main;
if (timeout == 0)
nm->udp_timeout = SNAT_UDP_TIMEOUT;
else
nm->udp_timeout = timeout;
return 0;
}
u32
nat64_get_udp_timeout (void)
{
nat64_main_t *nm = &nat64_main;
return nm->udp_timeout;
}
int
nat64_set_icmp_timeout (u32 timeout)
{
nat64_main_t *nm = &nat64_main;
if (timeout == 0)
nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
else
nm->icmp_timeout = timeout;
return 0;
}
u32
nat64_get_icmp_timeout (void)
{
nat64_main_t *nm = &nat64_main;
return nm->icmp_timeout;
}
int
nat64_set_tcp_timeouts (u32 trans, u32 est)
{
nat64_main_t *nm = &nat64_main;
if (trans == 0)
nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
else
nm->tcp_trans_timeout = trans;
if (est == 0)
nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
else
nm->tcp_est_timeout = est;
return 0;
}
u32
nat64_get_tcp_trans_timeout (void)
{
nat64_main_t *nm = &nat64_main;
return nm->tcp_trans_timeout;
}
u32
nat64_get_tcp_est_timeout (void)
{
nat64_main_t *nm = &nat64_main;
return nm->tcp_est_timeout;
}
void
nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm)
{
nat64_main_t *nm = &nat64_main;
u32 now = (u32) vlib_time_now (vm);
switch (ip_proto_to_snat_proto (ste->proto))
{
case SNAT_PROTOCOL_ICMP:
ste->expire = now + nm->icmp_timeout;
return;
case SNAT_PROTOCOL_TCP:
{
switch (ste->tcp_state)
{
case NAT64_TCP_STATE_V4_INIT:
case NAT64_TCP_STATE_V6_INIT:
case NAT64_TCP_STATE_V4_FIN_RCV:
case NAT64_TCP_STATE_V6_FIN_RCV:
case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV:
case NAT64_TCP_STATE_TRANS:
ste->expire = now + nm->tcp_trans_timeout;
return;
case NAT64_TCP_STATE_ESTABLISHED:
ste->expire = now + nm->tcp_est_timeout;
return;
default:
return;
}
}
case SNAT_PROTOCOL_UDP:
ste->expire = now + nm->udp_timeout;
return;
default:
ste->expire = now + nm->udp_timeout;
return;
}
}
void
nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp,
u8 is_ip6)
{
switch (ste->tcp_state)
{
case NAT64_TCP_STATE_CLOSED:
{
if (tcp->flags & TCP_FLAG_SYN)
{
if (is_ip6)
ste->tcp_state = NAT64_TCP_STATE_V6_INIT;
else
ste->tcp_state = NAT64_TCP_STATE_V4_INIT;
}
return;
}
case NAT64_TCP_STATE_V4_INIT:
{
if (is_ip6 && (tcp->flags & TCP_FLAG_SYN))
ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
return;
}
case NAT64_TCP_STATE_V6_INIT:
{
if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN))
ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
return;
}
case NAT64_TCP_STATE_ESTABLISHED:
{
if (tcp->flags & TCP_FLAG_FIN)
{
if (is_ip6)
ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV;
else
ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV;
}
else if (tcp->flags & TCP_FLAG_RST)
{
ste->tcp_state = NAT64_TCP_STATE_TRANS;
}
return;
}
case NAT64_TCP_STATE_V4_FIN_RCV:
{
if (is_ip6 && (tcp->flags & TCP_FLAG_FIN))
ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
return;
}
case NAT64_TCP_STATE_V6_FIN_RCV:
{
if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN))
ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
return;
}
case NAT64_TCP_STATE_TRANS:
{
if (!(tcp->flags & TCP_FLAG_RST))
ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
return;
}
default:
return;
}
}
int
nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
{
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p = 0;
int i;
/* Verify prefix length */
if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64
&& plen != 96)
return VNET_API_ERROR_INVALID_VALUE;
/* Check if tenant already have prefix */
for (i = 0; i < vec_len (nm->pref64); i++)
{
if (nm->pref64[i].vrf_id == vrf_id)
{
p = nm->pref64 + i;
break;
}
}
if (is_add)
{
if (!p)
{
vec_add2 (nm->pref64, p, 1);
p->fib_index =
fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
nat_fib_src_hi);
p->vrf_id = vrf_id;
}
p->prefix.as_u64[0] = prefix->as_u64[0];
p->prefix.as_u64[1] = prefix->as_u64[1];
p->plen = plen;
}
else
{
if (!p)
return VNET_API_ERROR_NO_SUCH_ENTRY;
vec_del1 (nm->pref64, i);
}
return 0;
}
void
nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
{
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p = 0;
/* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (fn (p, ctx))
break;
};
/* *INDENT-ON* */
}
void
nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
{
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p, *gp = 0, *prefix = 0;
/* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
{
prefix = p;
break;
}
if (p->fib_index == 0)
gp = p;
};
/* *INDENT-ON* */
if (!prefix)
prefix = gp;
if (prefix)
{
clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t));
switch (p->plen)
{
case 32:
ip6->as_u32[1] = ip4->as_u32;
break;
case 40:
ip6->as_u8[5] = ip4->as_u8[0];
ip6->as_u8[6] = ip4->as_u8[1];
ip6->as_u8[7] = ip4->as_u8[2];
ip6->as_u8[9] = ip4->as_u8[3];
break;
case 48:
ip6->as_u8[6] = ip4->as_u8[0];
ip6->as_u8[7] = ip4->as_u8[1];
ip6->as_u8[9] = ip4->as_u8[2];
ip6->as_u8[10] = ip4->as_u8[3];
break;
case 56:
ip6->as_u8[7] = ip4->as_u8[0];
ip6->as_u8[9] = ip4->as_u8[1];
ip6->as_u8[10] = ip4->as_u8[2];
ip6->as_u8[11] = ip4->as_u8[3];
break;
case 64:
ip6->as_u8[9] = ip4->as_u8[0];
ip6->as_u8[10] = ip4->as_u8[1];
ip6->as_u8[11] = ip4->as_u8[2];
ip6->as_u8[12] = ip4->as_u8[3];
break;
case 96:
ip6->as_u32[3] = ip4->as_u32;
break;
default:
nat_elog_notice ("invalid prefix length");
break;
}
}
else
{
clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t));
ip6->as_u32[3] = ip4->as_u32;
}
}
void
nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
{
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p, *gp = 0;
u8 plen = 0;
/* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
{
plen = p->plen;
break;
}
if (p->vrf_id == 0)
gp = p;
};
/* *INDENT-ON* */
if (!plen)
{
if (gp)
plen = gp->plen;
else
plen = 96;
}
switch (plen)
{
case 32:
ip4->as_u32 = ip6->as_u32[1];
break;
case 40:
ip4->as_u8[0] = ip6->as_u8[5];
ip4->as_u8[1] = ip6->as_u8[6];
ip4->as_u8[2] = ip6->as_u8[7];
ip4->as_u8[3] = ip6->as_u8[9];
break;
case 48:
ip4->as_u8[0] = ip6->as_u8[6];
ip4->as_u8[1] = ip6->as_u8[7];
ip4->as_u8[2] = ip6->as_u8[9];
ip4->as_u8[3] = ip6->as_u8[10];
break;
case 56:
ip4->as_u8[0] = ip6->as_u8[7];
ip4->as_u8[1] = ip6->as_u8[9];
ip4->as_u8[2] = ip6->as_u8[10];
ip4->as_u8[3] = ip6->as_u8[11];
break;
case 64:
ip4->as_u8[0] = ip6->as_u8[9];
ip4->as_u8[1] = ip6->as_u8[10];
ip4->as_u8[2] = ip6->as_u8[11];
ip4->as_u8[3] = ip6->as_u8[12];
break;
case 96:
ip4->as_u32 = ip6->as_u32[3];
break;
default:
nat_elog_notice ("invalid prefix length");
break;
}
}
/**
* @brief Per worker process checking expire time for NAT64 sessions.
*/
static uword
nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
{
nat64_main_t *nm = &nat64_main;
u32 thread_index = vm->thread_index;
nat64_db_t *db = &nm->db[thread_index];
u32 now = (u32) vlib_time_now (vm);
nad64_db_st_free_expired (thread_index, db, now);
vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
db->bib.bib_entries_num);
vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
db->st.st_entries_num);
return 0;
}
static vlib_node_registration_t nat64_expire_worker_walk_node;
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
.function = nat64_expire_worker_walk_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-expire-worker-walk",
};
/* *INDENT-ON* */
static vlib_node_registration_t nat64_expire_walk_node;
/**
* @brief Centralized process to drive per worker expire walk.
*/
static uword
nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
{
nat64_main_t *nm = &nat64_main;
vlib_main_t **worker_vms = 0, *worker_vm;
int i;
uword event_type, *event_data = 0;
nm->nat64_expire_walk_node_index = nat64_expire_walk_node.index;
if (vec_len (vlib_mains) == 0)
vec_add1 (worker_vms, vm);
else
{
for (i = 0; i < vec_len (vlib_mains); i++)
{
worker_vm = vlib_mains[i];
if (worker_vm)
vec_add1 (worker_vms, worker_vm);
}
}
while (1)
{
if (nm->total_enabled_count)
{
vlib_process_wait_for_event_or_clock (vm, 10.0);
event_type = vlib_process_get_events (vm, &event_data);
}
else
{
vlib_process_wait_for_event (vm);
event_type = vlib_process_get_events (vm, &event_data);
}
switch (event_type)
{
case ~0:
break;
case NAT64_CLEANER_RESCHEDULE:
break;
default:
nat_elog_notice_X1 ("unknown event %d", "i4", event_type);
break;
}
for (i = 0; i < vec_len (worker_vms); i++)
{
worker_vm = worker_vms[i];
vlib_node_set_interrupt_pending (worker_vm,
nat64_expire_worker_walk_node.index);
}
}
return 0;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = {
.function = nat64_expire_walk_fn,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "nat64-expire-walk",
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/
|