/* *------------------------------------------------------------------ * svm.c - shared VM allocation, mmap(...MAP_FIXED...) * library * * Copyright (c) 2009 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *------------------------------------------------------------------ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "svm.h" static svm_region_t *root_rp; static int root_rp_refcount; #define MAXLOCK 2 static pthread_mutex_t *mutexes_held[MAXLOCK]; static int nheld; svm_region_t * svm_get_root_rp (void) { return root_rp; } #define MUTEX_DEBUG u64 svm_get_global_region_base_va () { #if __aarch64__ /* On AArch64 VA space can have different size, from 36 to 48 bits. Here we are trying to detect VA bits by parsing /proc/self/maps address ranges */ int fd; unformat_input_t input; u64 start, end = 0; u8 bits = 0; if ((fd = open ("/proc/self/maps", 0)) < 0) clib_unix_error ("open '/proc/self/maps'"); unformat_init_clib_file (&input, fd); while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) { if (unformat (&input, "%llx-%llx", &start, &end)) end--; unformat_skip_line (&input); } unformat_free (&input); close (fd); bits = count_leading_zeros (end); bits = 64 - bits; if (bits >= 36 && bits <= 48) return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE); else clib_unix_error ("unexpected va bits '%u'", bits); #endif #ifdef CLIB_SANITIZE_ADDR return 0x200000000000; #endif /* default value */ return 0x130000000ULL; } static void region_lock (svm_region_t * rp, int tag) { pthread_mutex_lock (&rp->mutex); #ifdef MUTEX_DEBUG rp->mutex_owner_pid = getpid (); rp->mutex_owner_tag = tag; #endif ASSERT (nheld < MAXLOCK); //NOSONAR /* * Keep score of held mutexes so we can try to exit * cleanly if the world comes to an end at the worst possible * moment */ mutexes_held[nheld++] = &rp->mutex; } static void region_unlock (svm_region_t * rp) { int i, j; #ifdef MUTEX_DEBUG rp->mutex_owner_pid = 0; rp->mutex_owner_tag = 0; #endif for (i = nheld - 1; i >= 0; i--) { if (mutexes_held[i] == &rp->mutex) { for (j = i; j < MAXLOCK - 1; j++) mutexes_held[j] = mutexes_held[j + 1]; nheld--; goto found; } } ASSERT (0); found: CLIB_MEMORY_BARRIER (); pthread_mutex_unlock (&rp->mutex); } static u8 * format_svm_flags (u8 * s, va_list * args) { uword f = va_arg (*args, uword); if (f & SVM_FLAGS_MHEAP) s = format (s, "MHEAP "); if (f & SVM_FLAGS_FILE) s = format (s, "FILE "); if (f & SVM_FLAGS_NODATA) s = format (s, "NODATA "); if (f & SVM_FLAGS_NEED_DATA_INIT) s = format (s, "INIT "); return (s); } static u8 * format_svm_size (u8 * s, va_list * args) { uword size = va_arg (*args, uword); if (size >= (1 << 20)) { s = format (s, "(%d mb)", size >> 20); } else if (size >= (1 << 10)) { s = format (s, "(%d kb)", size >> 10); } else { s = format (s, "(%d bytes)", size); } return (s); } u8 * format_svm_region (u8 * s, va_list * args) { svm_region_t *rp = va_arg (*args, svm_region_t *); int verbose = va_arg (*args, int); int i; uword lo, hi; s = format (s, "%s: base va 0x%x size 0x%x %U\n", rp->region_name, rp->virtual_base, rp->virtual_size, format_svm_size, rp->virtual_size); s = format (s, " user_ctx 0x%x, bitmap_size %d\n", rp->user_ctx, rp->bitmap_size); if (verbose) { s = format (s, " flags: 0x%x %U\n", rp->flags, format_svm_flags, rp->flags); s = format (s, " region_heap 0x%x data_base 0x%x data_heap 0x%x\n", rp->region_heap, rp->data_base, rp->data_heap); } s = format (s, " %d clients, pids: ", vec_len (rp->client_pids)); for (i = 0; i < vec_len (rp->client_pids); i++) s = format (s, "%d ", rp->client_pids[i]); s = format (s, "\n"); if (verbose) { lo = hi = ~0; s = format (s, " VM in use: "); for (i = 0; i < rp->bitmap_size; i++) { if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) { if (lo == ~0) { hi = lo = rp->virtual_base + i * MMAP_PAGESIZE; } else { hi = rp->virtual_base + i * MMAP_PAGESIZE; } } else { if (lo != ~0) { hi = rp->virtual_base + i * MMAP_PAGESIZE - 1; s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi, (hi - lo) >> 10); lo = hi = ~0; } } } } return (s); } /* * rnd_pagesize * Round to a pagesize multiple, presumably 4k works */ static u64 rnd_pagesize (u64 size) { u64 rv; rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1); return (rv); } /* * svm_data_region_setup */ static int svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp) { int fd; u8 junk = 0; uword map_size; map_size = rp->virtual_size - (MMAP_PAGESIZE + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)); if (a->flags & SVM_FLAGS_FILE) { struct stat statb; fd = open (a->backing_file, O_RDWR | O_CREAT, 0777); if (fd < 0) { clib_unix_warning ("open"); return -1; } if (fstat (fd, &statb) < 0) { clib_unix_warning ("fstat"); close (fd); return -2; } if (statb.st_mode & S_IFREG) { if (statb.st_size == 0) { if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) { clib_unix_warning ("seek region size"); close (fd); return -3; } if (write (fd, &junk, 1) != 1) { clib_unix_warning ("set region size"); close (fd); return -3; } } else { map_size = rnd_pagesize (statb.st_size); } } else { map_size = a->backing_mmap_size; } ASSERT (map_size <= rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE)); if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) { clib_unix_warning ("mmap"); close (fd); return -3; } close (fd); CLIB_MEM_UNPOISON (rp->data_base, map_size); rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0); rp->flags |= SVM_FLAGS_FILE; } if (a->flags & SVM_FLAGS_MHEAP) { rp->data_heap = clib_mem_create_heap (rp->data_base, map_size, 1 /* locked */ , "svm data"); rp->flags |= SVM_FLAGS_MHEAP; } return 0; } static int svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp) { int fd; u8 junk = 0; uword map_size; struct stat statb; map_size = rp->virtual_size - (MMAP_PAGESIZE + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)); if (a->flags & SVM_FLAGS_FILE) { fd = open (a->backing_file, O_RDWR, 0777); if (fd < 0) { clib_unix_warning ("open"); return -1; } if (fstat (fd, &statb) < 0) { clib_unix_warning ("fstat"); close (fd); return -2; } if (statb.st_mode & S_IFREG) { if (statb.st_size == 0) { if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) { clib_unix_warning ("seek region size"); close (fd); return -3; } if (write (fd, &junk, 1) != 1) { clib_unix_warning ("set region size"); close (fd); return -3; } } else { map_size = rnd_pagesize (statb.st_size); } } else { map_size = a->backing_mmap_size; } ASSERT (map_size <= rp->virtual_size - (MMAP_PAGESIZE + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE))); if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) { clib_unix_warning ("mmap"); close (fd); return -3; } close (fd); CLIB_MEM_UNPOISON (rp->data_base, map_size); } return 0; } u8 * shm_name_from_svm_map_region_args (svm_map_region_args_t * a) { u8 *shm_name; int root_path_offset = 0; int name_offset = 0; if (a->root_path) { /* Tolerate present or absent slashes */ if (a->root_path[0] == '/') root_path_offset++; if (a->name[0] == '/') name_offset = 1; shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset], &a->name[name_offset], 0); } else shm_name = format (0, "%s%c", a->name, 0); return (shm_name); } void svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp) { pthread_mutexattr_t attr; pthread_condattr_t cattr; int nbits, words, bit; int overhead_space; void *oldheap; uword data_base; ASSERT (rp); int rv; clib_memset (rp, 0, sizeof (*rp)); if (pthread_mutexattr_init (&attr)) clib_unix_warning ("mutexattr_init"); if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) clib_unix_warning ("mutexattr_setpshared"); if (pthread_mutex_init (&rp->mutex, &attr)) clib_unix_warning ("mutex_init"); if (pthread_mutexattr_destroy (&attr)) clib_unix_warning ("mutexattr_destroy"); if (pthread_condattr_init (&cattr)) clib_unix_warning ("condattr_init"); if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) clib_unix_warning ("condattr_setpshared"); if (pthread_cond_init (&rp->condvar, &cattr)) clib_unix_warning ("cond_init"); if (pthread_condattr_destroy (&cattr)) clib_unix_warning ("condattr_destroy"); region_lock (rp, 1); rp->virtual_base = a->baseva; rp->virtual_size = a->size; rp->region_heap = clib_mem_create_heap (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *), (a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ , "svm region"); oldheap = svm_push_pvt_heap (rp); rp->region_name = (char *) format (0, "%s%c", a->name, 0); vec_add1 (rp->client_pids, getpid ()); nbits = rp->virtual_size / MMAP_PAGESIZE; ASSERT (nbits > 0); rp->bitmap_size = nbits; words = (nbits + BITS (uword) - 1) / BITS (uword); vec_validate (rp->bitmap, words - 1); overhead_space = MMAP_PAGESIZE /* header */ + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); bit = 0; data_base = (uword) rp->virtual_base; if (a->flags & SVM_FLAGS_NODATA) rp->flags |= SVM_FLAGS_NEED_DATA_INIT; do { clib_bitmap_set_no_check (rp->bitmap, bit, 1); bit++; overhead_space -= MMAP_PAGESIZE; data_base += MMAP_PAGESIZE; } while (overhead_space > 0); rp->data_base = (void *) data_base; /* * Note: although the POSIX spec guarantees that only one * proces
/*
 * Copyright (c) 2018-2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vnet/vnet.h>
#include <vlibmemory/api.h>

#include <vnet/udp/udp_encap.h>
#include <vnet/fib/fib_table.h>
#include <vnet/ip/ip_types_api.h>

#include <vnet/vnet_msg_enum.h>

#define vl_typedefs		/* define message structures */
#include <vnet/vnet_all_api_h.h>
#undef vl_typedefs

#define vl_endianfun		/* define message structures */
#include <vnet/vnet_all_api_h.h>
#undef vl_endianfun

/* instantiate all the print functions we know about */
#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include <vnet/vnet_all_api_h.h>
#undef vl_printfun

#include <vlibapi/api_helper_macros.h>


#define foreach_udp_api_msg            \
_(UDP_ENCAP_DEL, udp_encap_del)        \
_(UDP_ENCAP_ADD, udp_encap_add)        \
_(UDP_ENCAP_DUMP, udp_encap_dump)

static void
send_udp_encap_details (const udp_encap_t * ue, vl_api_registration_t * reg,
			u32 context)
{
  vl_api_udp_encap_details_t *mp;

  mp = vl_msg_api_alloc (sizeof (*mp));
  clib_memset (mp, 0, sizeof (*mp));
  mp->_vl_msg_id = ntohs (VL_API_UDP_ENCAP_DETAILS);
  mp->context = context;

  if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto)
    {
      clib_memcpy (&mp->udp_encap.src_ip.un.ip4,
		   &ue->ue_hdrs.ip4.ue_ip4.src_address, 4);
      clib_memcpy (&mp->udp_encap.dst_ip.un.ip4,
		   &ue->ue_hdrs.ip4.ue_ip4.dst_address, 4);
      mp->udp_encap.dst_ip.af = ip_address_family_encode (AF_IP4);
      mp->udp_encap.src_ip.af = ip_address_family_encode (AF_IP4);

      /* ports aren't byte swapped because they are stored in network
       * byte order */
      mp->udp_encap.src_port = ue->ue_hdrs.ip4.ue_udp.src_port;
      mp->udp_encap.dst_port = ue->ue_hdrs.ip4.ue_udp.dst_port;
    }
  else
    {
      clib_memcpy (&mp->udp_encap.src_ip.un.ip6,
		   &ue->ue_hdrs.ip6.ue_ip6.src_address, 16);
      clib_memcpy (&mp->udp_encap.dst_ip.un.ip6,
		   &ue</