/*
 *------------------------------------------------------------------
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#define _GNU_SOURCE
#include <stdint.h>
#include <net/if.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vpp/app/version.h>

#include <memif/memif.h>
#include <memif/private.h>

void
memif_socket_close (clib_socket_t ** s)
{
  memif_file_del_by_index ((*s)->private_data);
  clib_mem_free (*s);
  *s = 0;
}

static u8 *
memif_str2vec (uint8_t * str, int len)
{
  u8 *s = 0;
  int i;

  if (str[0] == 0)
    return s;

  for (i = 0; i < len; i++)
    {
      vec_add1 (s, str[i]);
      if (str[i] == 0)
	return s;
    }
  vec_add1 (s, 0);

  return s;
}

static void
memif_msg_enq_ack (memif_if_t * mif)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);

  e->msg.type = MEMIF_MSG_TYPE_ACK;
  e->fd = -1;
}

static clib_error_t *
memif_msg_enq_hello (clib_socket_t * sock)
{
  u8 *s;
  memif_msg_t msg = { 0 };
  memif_msg_hello_t *h = &msg.hello;
  msg.type = MEMIF_MSG_TYPE_HELLO;
  h->min_version = MEMIF_VERSION;
  h->max_version = MEMIF_VERSION;
  h->max_m2s_ring = MEMIF_MAX_M2S_RING;
  h->max_s2m_ring = MEMIF_MAX_S2M_RING;
  h->max_region = MEMIF_MAX_REGION;
  h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE;
  s = format (0, "VPP %s%c", VPP_BUILD_VER, 0);
  strncpy ((char *) h->name, (char *) s, sizeof (h->name) - 1);
  vec_free (s);
  return clib_socket_sendmsg (sock, &msg, sizeof (memif_msg_t), 0, 0);
}

static void
memif_msg_enq_init (memif_if_t * mif)
{
  u8 *s;
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);
  memif_msg_init_t *i = &e->msg.init;

  e->msg.type = MEMIF_MSG_TYPE_INIT;
  e->fd = -1;
  i->version = MEMIF_VERSION;
  i->id = mif->id;
  i->mode = mif->mode;
  s = format (0, "VPP %s%c", VPP_BUILD_VER, 0);
  strncpy ((char *) i->name, (char *) s, sizeof (i->name) - 1);
  if (mif->secret)
    strncpy ((char *) i->secret, (char *) mif->secret,
	     sizeof (i->secret) - 1);
  vec_free (s);
}

static void
memif_msg_enq_add_region (memif_if_t * mif, u8 region)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);
  memif_msg_add_region_t *ar = &e->msg.add_region;

  e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
  e->fd = mif->regions[region].fd;
  ar->index = region;
  ar->size = mif->regions[region].region_size;
}

static void
memif_msg_enq_add_ring (memif_if_t * mif, u8 index, u8 direction)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);
  memif_msg_add_ring_t *ar = &e->msg.add_ring;
  memif_queue_t *mq;

  ASSERT ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0);

  e->msg.type = MEMIF_MSG_TYPE_ADD_RING;

  if (direction == MEMIF_RING_M2S)
    mq = vec_elt_at_index (mif->rx_queues, index);
  else
    mq = vec_elt_at_index (mif->tx_queues, index);

  e->fd = mq->int_fd;
  ar->index = index;
  ar->region = mq->region;
  ar->offset = mq->offset;
  ar->log2_ring_size = mq->log2_ring_size;
  ar->flags = (direction == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
  ar->private_hdr_size = 0;
}

static void
memif_msg_enq_connect (memif_if_t * mif)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);
  memif_msg_connect_t *c = &e->msg.connect;
  u8 *s;

  e->msg.type = MEMIF_MSG_TYPE_CONNECT;
  e->fd = -1;
  s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0);
  strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1);
  vec_free (s);
}

static void
memif_msg_enq_connected (memif_if_t * mif)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_add2 (mif->msg_queue, e);
  memif_msg_connected_t *c = &e->msg.connected;
  u8 *s;

  e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
  e->fd = -1;
  s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0);
  strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1);
  vec_free (s);
}

clib_error_t *
memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err)
{
  memif_msg_t msg = { 0 };
  msg.type = MEMIF_MSG_TYPE_DISCONNECT;
  memif_msg_disconnect_t *d = &msg.disconnect;

  d->code = err->code;
  strncpy ((char *) d->string, (char *) err->what, sizeof (d->string) - 1);

  return clib_socket_sendmsg (mif->sock, &msg, sizeof (memif_msg_t), 0, 0);
}

static clib_error_t *
memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg)
{
  memif_msg_hello_t *h = &msg->hello;

  if (msg->hello.min_version > MEMIF_VERSION ||
      msg->hello.max_version < MEMIF_VERSION)
    return clib_error_return (0, "incompatible protocol version");

  mif->run.num_s2m_rings = clib_min (h->max_s2m_ring + 1,
				     mif->cfg.num_s2m_rings);
  mif->run.num_m2s_rings = clib_min (h->max_m2s_ring + 1,
				     mif->cfg.num_m2s_rings);
  mif->run.log2_ring_size = clib_min (h->max_log2_ring_size,
				      mif->cfg.log2_ring_size);
  mif->run.buffer_size = mif->cfg.buffer_size;

  mif->remote_name = memif_str2vec (h->name, sizeof (h->name));

  return 0;
}

static clib_error_t *
memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg,
			clib_socket_t * sock, uword socket_file_index)
{
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    vec_elt_at_index (mm->socket_files, socket_file_index);
  memif_msg_init_t *i = &msg->init;
  memif_if_t *mif, tmp;
  clib_error_t *err;
  uword *p;

  if (i->version != MEMIF_VERSION)
    {
      memif_file_del_by_index (sock->private_data);
      return clib_error_return (0, "unsupported version");
    }

  p = mhash_get (&msf->dev_instance_by_id, &i->id);

  if (!p)
    {
      err = clib_error_return (0, "unmatched interface id");
      goto error;
    }

  mif = vec_elt_at_index (mm->interfaces, p[0]);

  if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
    {
      err = clib_error_return (0, "cannot connect to slave");
      goto error;
    }

  if (mif->sock)
    {
      err = clib_error_return (0, "already connected");
      goto error;
    }

  if (i->mode != mif->mode)
    {
      err = clib_error_return (0, "mode mismatch");
      goto error;
    }

  mif->sock = sock;
  hash_set (msf->dev_instance_by_fd, mif->sock->fd, mif->dev_instance);
  mif->remote_name = memif_str2vec (i->name, sizeof (i->name));
  *mifp = mif;

  if (mif->secret)
    {
      u8 *s;
      int r;
      s = memif_str2vec (i->secret, sizeof (i->secret));
      if (s == 0)
	return clib_error_return (0, "secret required");

      r = vec_cmp (s, mif->secret);
      vec_free (s);

      if (r)
	return clib_error_return (0, "incorrect secret");
    }

  return 0;

error:
  tmp.sock = sock;
  memif_msg_send_disconnect (&tmp, err);
  memif_socket_close (&sock);
  return err;
}

static clib_error_t *
memif_msg_receive_add_region (memif_if_t * mif, memif_msg_t * msg, int fd)
{
  memif_msg_add_region_t *ar = &msg->add_region;
  memif_region_t *mr;
  if (fd < 0)
    return clib_error_return (0, "missing memory region fd");

  if (ar->index != vec_len (mif->regions))
    return clib_error_return (0, "unexpected region index");

  if (ar->index > MEMIF_MAX_REGION)
    return clib_error_return (0, "too many regions");

  vec_validate_aligned (mif->regions, ar->index, CLIB_CACHE_LINE_BYTES);
  mr = vec_elt_at_index (mif->regions, ar->index);
  mr->fd = fd;
  mr->region_size = ar->size;

  return 0;
}

static clib_error_t *
memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd)
{
  memif_msg_add_ring_t *ar = &msg->add_ring;
  memif_queue_t *mq;

  if (fd < 0)
    return clib_error_return (0, "missing ring interrupt fd");

  if (ar->private_hdr_size != 0)
    return clib_error_return (0, "private headers not supported");

  if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M)
    {
      if (ar->index != vec_len (mif->rx_queues))
	return clib_error_return (0, "unexpected ring index");

      if (ar->index > MEMIF_MAX_S2M_RING)
	return clib_error_return (0, "too many rings");

      vec_validate_aligned (mif->rx_queues, ar->index, CLIB_CACHE_LINE_BYTES);
      mq = vec_elt_at_index (mif->rx_queues, ar->index);
      mif->run.num_s2m_rings = vec_len (mif->rx_queues);
    }
  else
    {
      if (ar->index != vec_len (mif->tx_queues))
	return clib_error_return (0, "unexpected ring index");

      if (ar->index > MEMIF_MAX_M2S_RING)
	return clib_error_return (0, "too many rings");

      vec_validate_aligned (mif->tx_queues, ar->index, CLIB_CACHE_LINE_BYTES);
      mq = vec_elt_at_index (mif->tx_queues, ar->index);
      mif->run.num_m2s_rings = vec_len (mif->tx_queues);
    }

  // clear previous cache data if interface reconncected
  memset (mq, 0, sizeof (memif_queue_t));
  mq->int_fd = fd;
  mq->int_clib_file_index = ~0;
  mq->log2_ring_size = ar->log2_ring_size;
  mq->region = ar->region;
  mq->offset = ar->offset;
  mq->type =
    (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ? MEMIF_RING_S2M :
    MEMIF_RING_M2S;

  return 0;
}

static clib_error_t *
memif_msg_receive_connect (memif_if_t * mif, memif_msg_t * msg)
{
  clib_error_t *err;
  memif_msg_connect_t *c = &msg->connect;

  if ((err = memif_connect (mif)))
    return err;

  mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name));

  return 0;
}

static clib_error_t *
memif_msg_receive_connected (memif_if_t * mif, memif_msg_t * msg)
{
  clib_error_t *err;
  memif_msg_connected_t *c = &msg->connected;

  if ((err = memif_connect (mif)))
    return err;

  mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name));
  return 0;
}

static clib_error_t *
memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg)
{
  memif_msg_disconnect_t *d = &msg->disconnect;

  mif->remote_disc_string = memif_str2vec (d->string, sizeof (d->string));
  return clib_error_return (0, "disconnect received");
}

static clib_error_t *
memif_msg_receive (memif_if_t ** mifp, clib_socket_t * sock, clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_msg_t msg = { 0 };
  clib_error_t *err = 0;
  int fd = -1;
  int i;
  memif_if_t *mif = *mifp;

  err = clib_socket_recvmsg (sock, &msg, sizeof (memif_msg_t), &fd, 1);
  if (err)
    goto error;

  if (mif == 0 && msg.type != MEMIF_MSG_TYPE_INIT)
    {
      memif_socket_close (&sock);
      err = clib_error_return (0, "unexpected message received");
      goto error;
    }

  vlib_log_debug (mm->log_class, "Message type %u received", msg.type);
  /* process the message based on its type */
  switch (msg.type)
    {
    case MEMIF_MSG_TYPE_ACK:
      break;

    case MEMIF_MSG_TYPE_HELLO:
      if ((err = memif_msg_receive_hello (mif, &msg)))
	goto error;
      if ((err = memif_init_regions_and_queues (mif)))
	goto error;
      memif_msg_enq_init (mif);
      /* *INDENT-OFF* */
      vec_foreach_index (i, mif->regions)
	memif_msg_enq_add_region (mif, i);
      vec_foreach_index (i, mif->tx_queues)
	memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M);
      vec_foreach_index (i, mif->rx_queues)
	memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S);
      /* *INDENT-ON* */
      memif_msg_enq_connect (mif);
      break;

    case MEMIF_MSG_TYPE_INIT:
      if ((err = memif_msg_receive_init (mifp, &msg, sock, uf->private_data)))
	goto error;
      mif = *mifp;
      vec_reset_length (uf->description);
      uf->description = format (uf->description, "%U ctl",
				format_memif_device_name, mif->dev_instance);
      memif_msg_enq_ack (mif);
      break;

    case MEMIF_MSG_TYPE_ADD_REGION:
      if ((err = memif_msg_receive_add_region (mif, &msg, fd)))
	goto error;
      memif_msg_enq_ack (mif);
      break;

    case MEMIF_MSG_TYPE_ADD_RING:
      if ((err = memif_msg_receive_add_ring (mif, &msg, fd)))
	goto error;
      memif_msg_enq_ack (mif);
      break;

    case MEMIF_MSG_TYPE_CONNECT:
      if ((err = memif_msg_receive_connect (mif, &msg)))
	goto error;
      memif_msg_enq_connected (mif);
      break;

    case MEMIF_MSG_TYPE_CONNECTED:
      if ((err = memif_msg_receive_connected (mif, &msg)))
	goto error;
      break;

    case MEMIF_MSG_TYPE_DISCONNECT:
      if ((err = memif_msg_receive_disconnect (mif, &msg)))
	goto error;
      break;

    default:
      err = clib_error_return (0, "unknown message type (0x%x)", msg.type);
      goto error;
    }

  if (clib_fifo_elts (mif->msg_queue))
    clib_file_set_data_available_to_write (&file_main,
					   mif->sock->private_data, 1);
  return 0;

error:
  vlib_log_err (mm->log_class, "%U", format_clib_error, err);
  return err;
}

clib_error_t *
memif_master_conn_fd_read_ready (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    pool_elt_at_index (mm->socket_files, uf->private_data);
  uword *p;
  memif_if_t *mif = 0;
  clib_socket_t *sock = 0;
  clib_error_t *err = 0;

  p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
  if (p)
    {
      mif = vec_elt_at_index (mm->interfaces, p[0]);
      sock = mif->sock;
    }
  else
    {
      /* This is new connection, remove index from pending vector */
      int i;
      vec_foreach_index (i, msf->pending_clients)
	if (msf->pending_clients[i]->fd == uf->file_descriptor)
	{
	  sock = msf->pending_clients[i];
	  vec_del1 (msf->pending_clients, i);
	  break;
	}
      ASSERT (sock != 0);
    }
  err = memif_msg_receive (&mif, sock, uf);
  if (err)
    {
      memif_disconnect (mif, err);
      clib_error_free (err);
    }
  return 0;
}

clib_error_t *
memif_slave_conn_fd_read_ready (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  clib_error_t *err;
  memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
  err = memif_msg_receive (&mif, mif->sock, uf);
  if (err)
    {
      memif_disconnect (mif, err);
      clib_error_free (err);
    }
  return 0;
}

static clib_error_t *
memif_conn_fd_write_ready (clib_file_t * uf, memif_if_t * mif)
{
  memif_msg_fifo_elt_t *e;
  clib_fifo_sub2 (mif->msg_queue, e);
  clib_file_set_data_available_to_write (&file_main,
					 mif->sock->private_data, 0);
  return clib_socket_sendmsg (mif->sock, &e->msg, sizeof (memif_msg_t),
			      &e->fd, e->fd > -1 ? 1 : 0);
}

clib_error_t *
memif_master_conn_fd_write_ready (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    pool_elt_at_index (mm->socket_files, uf->private_data);
  uword *p;
  memif_if_t *mif;

  p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
  if (!p)
    return 0;

  mif = vec_elt_at_index (mm->interfaces, p[0]);
  return memif_conn_fd_write_ready (uf, mif);
}

clib_error_t *
memif_slave_conn_fd_write_ready (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
  return memif_conn_fd_write_ready (uf, mif);
}

clib_error_t *
memif_slave_conn_fd_error (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data);
  clib_error_t *err;

  err = clib_error_return (0, "connection fd error");
  memif_disconnect (mif, err);
  clib_error_free (err);

  return 0;
}

clib_error_t *
memif_master_conn_fd_error (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    pool_elt_at_index (mm->socket_files, uf->private_data);
  uword *p;


  p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor);
  if (p)
    {
      memif_if_t *mif;
      clib_error_t *err;
      mif = vec_elt_at_index (mm->interfaces, p[0]);
      err = clib_error_return (0, "connection fd error");
      memif_disconnect (mif, err);
      clib_error_free (err);
    }
  else
    {
      int i;
      vec_foreach_index (i, msf->pending_clients)
	if (msf->pending_clients[i]->fd == uf->file_descriptor)
	{
	  clib_socket_t *s = msf->pending_clients[i];
	  memif_socket_close (&s);
	  vec_del1 (msf->pending_clients, i);
	  return 0;
	}
    }

  vlib_log_warn (mm->log_class, "Error on unknown file descriptor %d",
		 uf->file_descriptor);
  memif_file_del (uf);
  return 0;
}


clib_error_t *
memif_conn_fd_accept_ready (clib_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    pool_elt_at_index (mm->socket_files, uf->private_data);
  clib_file_t template = { 0 };
  clib_error_t *err;
  clib_socket_t *client;

  client = clib_mem_alloc (sizeof (clib_socket_t));
  memset (client, 0, sizeof (clib_socket_t));
  err = clib_socket_accept (msf->sock, client);
  if (err)
    goto error;

  template.read_function = memif_master_conn_fd_read_ready;
  template.write_function = memif_master_conn_fd_write_ready;
  template.error_function = memif_master_conn_fd_error;
  template.file_descriptor = client->fd;
  template.private_data = uf->private_data;
  template.description = format (0, "memif in conn on %s", msf->filename);

  memif_file_add (&client->private_data, &template);

  err = memif_msg_enq_hello (client);
  if (err)
    {
      clib_socket_close (client);
      goto error;
    }

  vec_add1 (msf->pending_clients, client);

  return 0;

error:
  vlib_log_err (mm->log_class, "%U", format_clib_error, err);
  clib_mem_free (client);
  return err;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */