summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/linux/mem.c
blob: 253ae87845b3f216e71bba87357e867c687efd77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
loop create
set int ip address loop0 192.168.1.1/8
set int state loop0 up

packet-generator new {						\
  name udp							\
  limit 512							\
  rate 1e4							\
  node ip4-input						\
  size 100-100							\
  interface loop0						\
  data {							\
   UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1		\
   UDP: 4321 -> 1234						\
    length 72							\
    incrementing 100						\
  }								\
}
/a> 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
/*
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/mount.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <linux/mempolicy.h>
#include <linux/memfd.h>

#include <vppinfra/clib.h>
#include <vppinfra/mem.h>
#include <vppinfra/time.h>
#include <vppinfra/format.h>
#include <vppinfra/clib_error.h>
#include <vppinfra/linux/syscall.h>
#include <vppinfra/linux/sysfs.h>

#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
#endif

#ifndef F_ADD_SEALS
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)

#define F_SEAL_SEAL     0x0001	/* prevent further seals from being set */
#define F_SEAL_SHRINK   0x0002	/* prevent file from shrinking */
#define F_SEAL_GROW     0x0004	/* prevent file from growing */
#define F_SEAL_WRITE    0x0008	/* prevent writes */
#endif

u64
clib_mem_vm_get_page_size (int fd)
{
  struct stat st = { 0 };
  if (fstat (fd, &st) == -1)
    return 0;
  return st.st_blksize;
}

int
clib_mem_vm_get_log2_page_size (int fd)
{
  return min_log2 (clib_mem_vm_get_page_size (fd));
}

void
clib_mem_vm_randomize_va (uword * requested_va, u32 log2_page_size)
{
  u8 bit_mask = 15;

  if (log2_page_size <= 12)
    bit_mask = 15;
  else if (log2_page_size > 12 && log2_page_size <= 16)
    bit_mask = 3;
  else
    bit_mask = 0;

  *requested_va +=
    (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
}

#ifndef MFD_HUGETLB
#define MFD_HUGETLB 0x0004U
#endif

clib_error_t *
clib_mem_create_hugetlb_fd (char *name, int *fdp)
{
  clib_error_t *err = 0;
  int fd = -1;
  static int memfd_hugetlb_supported = 1;
  char *mount_dir;
  char template[] = "/tmp/hugepage_mount.XXXXXX";
  u8 *filename;

  ASSERT (name);

  if (memfd_hugetlb_supported)
    {
      if ((fd = memfd_create (name, MFD_HUGETLB)) != -1)
	goto done;

      /* avoid further tries if memfd MFD_HUGETLB is not supported */
      if (errno == EINVAL && strnlen (name, 256) <= 249)
	memfd_hugetlb_supported = 0;
    }

  mount_dir = mkdtemp (template);
  if (mount_dir == 0)
    return clib_error_return_unix (0, "mkdtemp \'%s\'", template);

  if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL))
    {
      rmdir ((char *) mount_dir);
      err = clib_error_return_unix (0, "mount hugetlb directory '%s'",
				    mount_dir);
    }

  filename = format (0, "%s/%s%c", mount_dir, name, 0);
  fd = open ((char *) filename, O_CREAT | O_RDWR, 0755);
  umount2 ((char *) mount_dir, MNT_DETACH);
  rmdir ((char *) mount_dir);

  if (fd == -1)
    err = clib_error_return_unix (0, "open");

done:
  if (fd != -1)
    fdp[0] = fd;
  return err;
}

clib_error_t *
clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a)
{
  int fd = -1;
  clib_error_t *err = 0;
  void *addr = 0;
  u8 *filename = 0;
  int mmap_flags = 0;
  int log2_page_size;
  int n_pages;
  int old_mpol = -1;
  long unsigned int old_mask[16] = { 0 };

  /* save old numa mem policy if needed */
  if (a->flags & (CLIB_MEM_VM_F_NUMA_PREFER | CLIB_MEM_VM_F_NUMA_FORCE))
    {
      int rv;
      rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1,
			  0, 0);

      if (rv == -1)
	{
	  if (a->numa_node != 0 && (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
	    {
	      err = clib_error_return_unix (0, "get_mempolicy");
	      goto error;
	    }
	  else
	    old_mpol = -1;
	}
    }

  if (a->flags & CLIB_MEM_VM_F_LOCKED)
    mmap_flags |= MAP_LOCKED;

  /* if we are creating shared segment, we need file descriptor */
  if (a->flags & CLIB_MEM_VM_F_SHARED)
    {
      mmap_flags |= MAP_SHARED;
      /* if hugepages are needed we need to create mount point */
      if (a->flags & CLIB_MEM_VM_F_HUGETLB)
	{
	  if ((err = clib_mem_create_hugetlb_fd (a->name, &fd)))
	    goto error;

	  mmap_flags |= MAP_LOCKED;
	}
      else
	{
	  if ((fd = memfd_create (a->name, MFD_ALLOW_SEALING)) == -1)
	    {
	      err = clib_error_return_unix (0, "memfd_create");
	      goto error;
	    }

	  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
	    {
	      err = clib_error_return_unix (0, "fcntl (F_ADD_SEALS)");
	      goto error;
	    }
	}

      log2_page_size = clib_mem_vm_get_log2_page_size (fd);
      if (log2_page_size == 0)
	{
	  err = clib_error_return_unix (0, "cannot determine page size");
	  goto error;
	}

      if (a->requested_va)
	{
	  clib_mem_vm_randomize_va (&a->requested_va, log2_page_size);
	  mmap_flags |= MAP_FIXED;
	}
    }
  else				/* not CLIB_MEM_VM_F_SHARED */
    {
      mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
      if (a->flags & CLIB_MEM_VM_F_HUGETLB)
	{
	  mmap_flags |= MAP_HUGETLB;
	  log2_page_size = 21;
	}
      else
	{
	  log2_page_size = min_log2 (sysconf (_SC_PAGESIZE));
	}
    }

  n_pages = ((a->size - 1) >> log2_page_size) + 1;

  if (a->flags & CLIB_MEM_VM_F_HUGETLB_PREALLOC)
    {
      err = clib_sysfs_prealloc_hugepages (a->numa_node,
					   1 << (log2_page_size - 10),
					   n_pages);
      if (err)
	goto error;

    }

  if (fd != -1)
    if ((ftruncate (fd, (u64) n_pages * (1 << log2_page_size))) == -1)
      {
	err = clib_error_return_unix (0, "ftruncate");
	goto error;
      }

  if (old_mpol != -1)
    {
      int rv;
      long unsigned int mask[16] = { 0 };
      mask[0] = 1 << a->numa_node;
      rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
      if (rv == -1 && a->numa_node != 0 &&
	  (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
	{
	  err = clib_error_return_unix (0, "set_mempolicy");
	  goto error;
	}
    }

  addr = mmap (uword_to_pointer (a->requested_va, void *), a->size,
	       (PROT_READ | PROT_WRITE), mmap_flags, fd, 0);
  if (addr == MAP_FAILED)
    {
      err = clib_error_return_unix (0, "mmap");
      goto error;
    }

  /* re-apply old numa memory policy */
  if (old_mpol != -1 &&
      set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1)
    {
      err = clib_error_return_unix (0, "set_mempolicy");
      goto error;
    }

  a->log2_page_size = log2_page_size;
  a->n_pages = n_pages;
  a->addr = addr;
  a->fd = fd;
  goto done;

error:
  if (fd != -1)
    close (fd);

done:
  vec_free (filename);
  return err;
}

void
clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a)
{
  if (a != 0)
    {
      clib_mem_vm_free (a->addr, 1ull << a->log2_page_size);
      if (a->fd != -1)
	close (a->fd);
    }
}

u64 *
clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages)
{
  int pagesize = sysconf (_SC_PAGESIZE);
  int fd;
  int i;
  u64 *r = 0;

  if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
    return 0;

  for (i = 0; i < n_pages; i++)
    {
      u64 seek, pagemap = 0;
      uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
      seek = ((u64) vaddr / pagesize) * sizeof (u64);
      if (lseek (fd, seek, SEEK_SET) != seek)
	goto done;

      if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
	goto done;

      if ((pagemap & (1ULL << 63)) == 0)
	goto done;

      pagemap &= pow2_mask (55);
      vec_add1 (r, pagemap * pagesize);
    }

done:
  close (fd);
  if (vec_len (r) != n_pages)
    {
      vec_free (r);
      return 0;
    }
  return r;
}

clib_error_t *
clib_mem_vm_ext_map (clib_mem_vm_map_t * a)
{
  int mmap_flags = MAP_SHARED;
  void *addr;

  if (a->requested_va)
    mmap_flags |= MAP_FIXED;

  addr = (void *) mmap (uword_to_pointer (a->requested_va, void *), a->size,
			PROT_READ | PROT_WRITE, mmap_flags, a->fd, 0);

  if (addr == MAP_FAILED)
    return clib_error_return_unix (0, "mmap");

  a->addr = addr;
  return 0;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */