aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/map/lpm.c
blob: 4abeefca06d72432ef449150cce946966b055a6c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*
 * Copyright (c) 2018 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "lpm.h"
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <arpa/inet.h>
#include <vnet/ip/format.h>

static uint32_t
masked_address32 (uint32_t addr, uint8_t len)
{
  u32 a = ntohl(addr);
  return htonl(len == 32 ? a : a & ~(~0u >> len));
}
static uint64_t
masked_address64 (uint64_t addr, uint8_t len)
{
  return len == 64 ? addr : addr & ~(~0ull >> len);
}

static void
lpm_32_add (lpm_t *lpm, void *addr_v, u8 pfxlen,
	    u32 value)
{
  uword * hash, * result;
  u32 key;
  ip4_address_t *addr = addr_v;
  key = masked_address32(addr->data_u32, pfxlen);
  hash = lpm->hash[pfxlen];
  result = hash_get (hash, key);
  if (result) /* Entry exists */
    clib_warning("%U/%d already exists in table for domain %d",
		 format_ip4_address, addr, pfxlen, result[0]);

  /*
   * adding a new entry
   */
  if (hash == NULL) {
    hash = hash_create (32 /* elts */, sizeof (uword));
    hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
  }
  hash = hash_set(hash, key, value);
  lpm->hash[pfxlen] = hash;
}

static void
lpm_32_delete (lpm_t *lpm, void *addr_v, u8 pfxlen)
{
  uword * hash, * result;
  u32 key;
  ip4_address_t *addr = addr_v;
  key = masked_address32(addr->data_u32, pfxlen);
  hash = lpm->hash[pfxlen];
  result = hash_get (hash, key);
  if (result)
    hash_unset(hash, key);
  lpm->hash[pfxlen] = hash;
}

static u32
lpm_32_lookup (lpm_t *lpm, void *addr_v, u8 pfxlen)
{
  uword * hash, * result;
  i32 mask_len;
  u32 key;
  ip4_address_t *addr = addr_v;
  for (mask_len = pfxlen; mask_len >= 0; mask_len--) {
    hash = lpm->hash[mask_len];
    if (hash) {
      key = masked_address32(addr->data_u32, mask_len);
      result = hash_get (hash, key);
      if (result != NULL) {
	return (result[0]);
      }
    }
  }
  return (~0);
}

static int
lpm_128_lookup_core (lpm_t *lpm, ip6_address_t *addr, u8 pfxlen, u32 *value)
{
  BVT(clib_bihash_kv) kv, v;
  int rv;
  kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
  kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0);
  kv.key[2] = pfxlen;
  rv = BV(clib_bihash_search_inline_2)(&lpm->bihash, &kv, &v);
  if (rv != 0)
    return -1;
  *value = v.value;
  return 0;
}

static u32
lpm_128_lookup (lpm_t *lpm, void *addr_v, u8 pfxlen)
{
  ip6_address_t *addr = addr_v;
  int i = 0, rv;
  u32 value;
  clib_bitmap_foreach (i, lpm->prefix_lengths_bitmap,
    ({
      rv = lpm_128_lookup_core(lpm, addr, i, &value);
      if (rv == 0)
	return value;
    }));
  return ~0;
}

static void
lpm_128_add (lpm_t *lpm, void *addr_v, u8 pfxlen, u32 value)
{
  BVT(clib_bihash_kv) kv;
  ip6_address_t *addr = addr_v;

  kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
  kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0);
  kv.key[2] = pfxlen;
  kv.value = value;
  BV(clib_bihash_add_del)(&lpm->bihash, &kv, 1);
  lpm->prefix_length_refcount[pfxlen]++;
  lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 1);
}

static void
lpm_128_delete (lpm_t *lpm, void *addr_v, u8 pfxlen)
{
  ip6_address_t *addr = addr_v;
  BVT(clib_bihash_kv) kv;
  kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
  kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0);
  kv.key[2] = pfxlen;
  BV(clib_bihash_add_del)(&lpm->bihash, &kv, 0);

  /* refcount accounting */
  ASSERT (lpm->prefix_length_refcount[pfxlen] > 0);
  if (--lpm->prefix_length_refcount[pfxlen] == 0) {
    lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, 
						  128 - pfxlen, 0);
  }
}

lpm_t *
lpm_table_init (enum lpm_type_e lpm_type)
{
  lpm_t * lpm = clib_mem_alloc(sizeof(*lpm));
  memset(lpm, 0, sizeof(*lpm));

  switch (lpm_type) {
  case LPM_TYPE_KEY32:
    lpm->add = lpm_32_add;
    lpm->delete = lpm_32_delete;
    lpm->lookup = lpm_32_lookup;
    break;
  case LPM_TYPE_KEY128:
    lpm->add = lpm_128_add;
    lpm->delete = lpm_128_delete;
    lpm->lookup = lpm_128_lookup;
    /* Make bihash sizes configurable */
    BV (clib_bihash_init) (&(lpm->bihash),
			   "LPM 128", 64*1024, 32<<20);

    break;
  default:
    ASSERT(0);
  }
  return lpm;
}
ass="n">time_val = (struct timeval *) optval; if ((time_val->tv_usec < 0) || (time_val->tv_usec > USEC_TO_SEC)) { return EDOM; } else { if (time_val->tv_sec < 0) { *timeout = 0; } else { *timeout = MAX_WAIT_TIMEOUT; if ((time_val->tv_sec != 0) || (time_val->tv_usec != 0)) { if (time_val->tv_sec < ((MAX_WAIT_TIMEOUT / 1000) - 1)) { *timeout = time_val->tv_sec * 1000 + time_val->tv_usec / 1000; } } } } return 0; } /***************************************************************************** * Prototype : sbr_setsockopt_sol_socket * Description : set sol socket * Input : sbr_socket_t * sk * int optname * const void * optval * socklen_t optlen * netconn_type_t type * Output : None * Return Value : int * Calls : * Called By : * *****************************************************************************/ int sbr_setsockopt_sol_socket (sbr_socket_t * sk, int optname, const void *optval, socklen_t optlen, spl_netconn_type_t type) { int err = 0; switch (optname) { case SO_REUSEADDR: case SO_BROADCAST: case SO_KEEPALIVE: case SO_RCVBUF: case SO_SNDBUF: if (optlen < sizeof (int)) { err = EINVAL; } break; case SO_RCVTIMEO: err = sbr_pick_timeout (optval, optlen, &sbr_get_fd_share (sk)->recv_timeout); break; case SO_SNDTIMEO: err = sbr_pick_timeout (optval, optlen, &sbr_get_fd_share (sk)->send_timeout); break; case SO_LINGER: if (optlen < sizeof (struct linger)) { err = EINVAL; } break; default: err = ENOPROTOOPT; break; } return err; } /***************************************************************************** * Prototype : sbr_setsockopt_ipproto_ip * Description : set ipproto ip * Input : int optname * const void * optval * socklen_t optlen * netconn_type_t type * Output : None * Return Value : int * Calls : * Called By : * *****************************************************************************/ int sbr_setsockopt_ipproto_ip (int optname, const void *optval, socklen_t optlen, spl_netconn_type_t type) { int err = 0; switch (optname) { case IP_TOS: if (optlen < sizeof (u8)) { err = EINVAL; } break; case IP_MULTICAST_TTL: if (optlen < sizeof (u8)) { err = EINVAL; break; } if (type != SPL_NETCONN_UDP) { err = EAFNOSUPPORT; break; } break; case IP_MULTICAST_IF: if (optlen < sizeof (struct in_addr)) { err = EINVAL; break; } if (type != SPL_NETCONN_UDP) { err = EAFNOSUPPORT; break; } break; case IP_MULTICAST_LOOP: if (optlen < sizeof (u8)) { err = EINVAL; break; } if (type != SPL_NETCONN_UDP) { err = EAFNOSUPPORT; break; } break; default: err = ENOPROTOOPT; break; } return err; } /***************************************************************************** * Prototype : sbr_dequeue_buf * Description : dequeue buf * Input : sbr_socket_t * sk * void **buf * i32 timeout * u8 use_l4_ring * Output : None * Return Value : int * Calls : * Called By : * *****************************************************************************/ int sbr_dequeue_buf (sbr_socket_t * sk, void **buf, i32 timeout) { mring_handle ring = ss_get_recv_ring (sbr_get_conn (sk)); struct timespec start, end; long timediff; long timediff_sec; long timeout_sec = (long) (timeout / 1000); unsigned int retry_count = 0; if (timeout > 0) { if (unlikely (0 != clock_gettime (CLOCK_MONOTONIC, &start))) { NSSBR_LOGERR ("Failed to get time, errno = %d", errno); } } if (!ss_recv_ring_valid (sbr_get_conn (sk))) { NSSBR_LOGDBG ("ring is invalid]fd=%d", sk->fd); sbr_set_sk_io_errno (sk, ENOTCONN); return -1; } int dequeue_ret = 0; pid_t pid = get_sys_pid (); while (1) { if (ss_is_shut_rd (sbr_get_conn (sk))) { NSSBR_LOGDBG ("is shut rd]fd=%d", sk->fd); sbr_set_sk_io_errno (sk, EINVAL); return -1; } dequeue_ret = nsfw_mem_ring_dequeue (ring, buf); if (1 == dequeue_ret) { pbuf_set_recycle_flg ((struct spl_pbuf *) *buf, pid); /*release buf hold by app on abnormal exit */ return 0; } else if (0 == dequeue_ret) { /*If the peer reset connect, try to receive data only once */ if (ss_can_not_recv (sbr_get_conn (sk))) { NS_LOG_CTRL (LOG_CTRL_RECV_QUEUE_FULL, LOGSBR, "NSSBR", NSLOG_WAR, "try to fetch one more time]fd=%d", sk->fd); /** * l4_ring will not be processed here as can_not_recv flag is * set by TCP only. */ if (1 == nsfw_mem_ring_dequeue (ring, buf)) { pbuf_set_recycle_flg ((struct spl_pbuf *) *buf, pid); return 0; } sbr_set_sk_io_errno (sk, ENOTCONN); return -1; } int err = ss_get_last_errno (sbr_get_conn (sk)); if (SPL_ERR_IS_FATAL (err) || err == ERR_TIMEOUT) /* have to handle ERR_TIMEOUT here, when TCP keepalive timeout. */ { NS_LOG_CTRL (LOG_CTRL_RECV_QUEUE_FULL, LOGSBR, "NSSBR", NSLOG_ERR, "connection fatal error!err=%d", err); /* l4_ring need to be handled in the future */ if (1 == nsfw_mem_ring_dequeue (ring, buf)) { pbuf_set_recycle_flg ((struct spl_pbuf *) *buf, pid); return 0; } sbr_set_sk_io_errno (sk, sbr_spl_err_to_errno (err)); return -1; } if (0 > timeout) { sbr_set_sk_io_errno (sk, EWOULDBLOCK); return -1; } if (retry_count < FAST_RETRY_COUNT) { sys_sleep_ns (0, FAST_SLEEP_TIME); retry_count++; } else { sys_sleep_ns (0, sbr_get_fd_share (sk)->block_polling_time); } if (timeout > 0) { if (unlikely (0 != clock_gettime (CLOCK_MONOTONIC, &end))) { NSSBR_LOGERR ("Failed to get time, errno = %d", errno); } timediff_sec = end.tv_sec - start.tv_sec; if (timediff_sec >= timeout_sec) { timediff = end.tv_nsec > start.tv_nsec ? (timediff_sec * 1000) + (end.tv_nsec - start.tv_nsec) / USEC_TO_SEC : (timediff_sec * 1000) - ((start.tv_nsec - end.tv_nsec) / USEC_TO_SEC); if (timediff > timeout) { NSSBR_LOGDBG ("recv timeout]fd=%d", sk->fd); sbr_set_sk_io_errno (sk, EWOULDBLOCK); return -1; } } } } else { NSSBR_LOGERR ("dequeue failed]fd=%d", sk->fd); sbr_set_sk_io_errno (sk, EINVAL); return -1; } } } int sbr_com_peak (sbr_socket_t * sk) { NSSBR_LOGERR ("not implement]fd=%d", sk->fd); return -1; } /***************************************************************************** * Prototype : sbr_com_try_lock_recv * Description : try lock recv * Input : sbr_socket_t * sk * Output : None * Return Value : int * Calls : * Called By : * *****************************************************************************/ int sbr_com_try_lock_recv (sbr_socket_t * sk) { #ifdef SBR_USE_LOCK return common_spinlock_try_lock_with_pid (&sbr_get_fd_share (sk)->recv_lock, get_sys_pid ()); #else return 1; #endif } /***************************************************************************** * Prototype : sbr_com_lock_common * Description : lock common * Input : sbr_socket_t * sk * Output : None * Return Value : void * Calls : * Called By : * *****************************************************************************/ void sbr_com_lock_common (sbr_socket_t * sk) { #ifdef SBR_USE_LOCK while (!common_spinlock_try_lock_with_pid (&sbr_get_fd_share (sk)->common_lock, get_sys_pid ())) { sys_sleep_ns (0, 0); } #endif } void sbr_com_fork_parent (sbr_socket_t * sk, pid_t p) { i32 ref = ss_inc_fork_ref (sbr_get_conn (sk)); NSSBR_LOGINF ("inc fork ref] fd=%d, p=%d, ref=%d, conn=%p, private_data=%p", sk->fd, p, ref, sbr_get_conn (sk), sbr_get_conn (sk)->private_data); } void sbr_com_fork_child (sbr_socket_t * sk, pid_t p, pid_t c) { if (ss_add_pid (sbr_get_conn (sk), c) != 0) { NSSBR_LOGERR ("add pid failed] fd=%d, p=%d, c=%d, ref=%d, conn=%p, private_data=%p", sk->fd, p, c, ss_get_fork_ref (sbr_get_conn (sk)), sbr_get_conn (sk), sbr_get_conn (sk)->private_data); } else { NSSBR_LOGINF ("add pid ok] fd=%d, p=%d, c=%d, ref=%d, conn=%p, private_data=%p", sk->fd, p, c, ss_get_fork_ref (sbr_get_conn (sk)), sbr_get_conn (sk), sbr_get_conn (sk)->private_data); } } /***************************************************************************** * Prototype : sbr_com_unlock_common * Description : unlock common * Input : sbr_socket_t * sk * Output : None * Return Value : void * Calls : * Called By : * *****************************************************************************/ void sbr_com_unlock_common (sbr_socket_t * sk) { #ifdef SBR_USE_LOCK common_spinlock_unlock (&sbr_get_fd_share (sk)->common_lock); #endif } /***************************************************************************** * Prototype : sbr_com_free_recv_buf * Description : free recv buf,can't free buf in app * Input : sbr_socket_t * sk * struct spl_pbuf *p * Output : None * Return Value : void * Calls : * Called By : * *****************************************************************************/ void sbr_com_free_recv_buf (sbr_socket_t * sk, struct spl_pbuf *p) { struct spl_pbuf *p_orig = p; if (p) { p->freeNext = NULL; p = (struct spl_pbuf *) ADDR_LTOSH (p); if (sbr_get_fd_share (sk)->recoder.totalLen > 0) { ((struct spl_pbuf *) ADDR_SHTOL (sbr_get_fd_share (sk)->recoder.tail))->freeNext = p; sbr_get_fd_share (sk)->recoder.tail = p; } else { sbr_get_fd_share (sk)->recoder.head = p; sbr_get_fd_share (sk)->recoder.tail = p; } sbr_get_fd_share (sk)->recoder.totalLen++; } /* send MSG only if it's a big packet or number of packets larger than 32 */ if ((p_orig && p_orig->tot_len > MAX_RECV_FREE_LEN) || (sbr_get_fd_share (sk)->recoder.totalLen >= MAX_RECV_FREE_BUF)) { sbr_handle_free_recv_buf (sk); } } /***************************************************************************** * Prototype : sbr_get_sockaddr_and_len * Description : get addr and len * Input : u16 port * spl_ip_addr_t * ipaddr * struct sockaddr * addr * socklen_t * addrlen * Output : None * Return Value : int * Calls : * Called By : * *****************************************************************************/ int sbr_get_sockaddr_and_len (u16 port, spl_ip_addr_t * ipaddr, struct sockaddr *addr, socklen_t * addrlen) { int ret; struct sockaddr_in sin; ret = MEMSET_S (&sin, sizeof (sin), 0, sizeof (sin)); if (0 != ret) { NSSBR_LOGERR ("MEMSET_S failed]ret=%d.", ret); return -1; } sin.sin_family = AF_INET; sin.sin_port = htons (port); inet_addr_from_ipaddr (&sin.sin_addr, ipaddr); if (*addrlen > sizeof (struct sockaddr)) { *addrlen = sizeof (struct sockaddr); } if (*addrlen > 0) { ret = MEMCPY_S (addr, sizeof (struct sockaddr), &sin, *addrlen); if (0 != ret) { NSSBR_LOGERR ("MEMCPY_S failed]ret=%d", ret); return -1; } } return 0; } /***************************************************************************** * Prototype : sbr_com_set_app_info * Description : set app info to netconn * Input : sbr_socket_t * sk * void* appinfo * Output : None * Return Value : void * Calls : * Called By : * *****************************************************************************/ void sbr_com_set_app_info (sbr_socket_t * sk, void *appinfo) { return; }