summaryrefslogtreecommitdiffstats
path: root/src/vnet/ip/ip.h
blob: 02a1a9636ee17d8d98b7aaff7c26426417dda2e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * ip/ip.h: ip generic (4 or 6) main
 *
 * Copyright (c) 2008 Eliot Dresselhaus
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef included_ip_main_h
#define included_ip_main_h

#include <vppinfra/hash.h>
#include <vppinfra/heap.h>	/* adjacency heap */
#include <vppinfra/ptclosure.h>

#include <vnet/vnet.h>

#include <vnet/ip/format.h>
#include <vnet/ip/ip_packet.h>
#include <vnet/ip/lookup.h>

#include <vnet/ip/tcp_packet.h>
#include <vnet/ip/udp_packet.h>
#include <vnet/ip/icmp46_packet.h>

#include <vnet/ip/ip4.h>
#include <vnet/ip/ip4_error.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/icmp4.h>

#include <vnet/ip/ip6.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/ip6_error.h>
#include <vnet/ip/icmp6.h>
#include <vnet/classify/vnet_classify.h>

/* Per protocol info. */
typedef struct
{
  /* Protocol name (also used as hash key). */
  u8 *name;

  /* Protocol number. */
  ip_protocol_t protocol;

  /* Format function for this IP protocol. */
  format_function_t *format_header;

  /* Parser for header. */
  unformat_function_t *unformat_header;

  /* Parser for per-protocol matches. */
  unformat_function_t *unformat_match;

  /* Parser for packet generator edits for this protocol. */
  unformat_function_t *unformat_pg_edit;
} ip_protocol_info_t;

/* Per TCP/UDP port info. */
typedef struct
{
  /* Port name (used as hash key). */
  u8 *name;

  /* UDP/TCP port number in network byte order. */
  u16 port;

  /* Port specific format function. */
  format_function_t *format_header;

  /* Parser for packet generator edits for this protocol. */
  unformat_function_t *unformat_pg_edit;
} tcp_udp_port_info_t;

typedef struct
{
  /* Per IP protocol info. */
  ip_protocol_info_t *protocol_infos;

  /* Protocol info index hashed by 8 bit IP protocol. */
  uword *protocol_info_by_protocol;

  /* Hash table mapping IP protocol name (see protocols.def)
     to protocol number. */
  uword *protocol_info_by_name;

  /* Per TCP/UDP port info. */
  tcp_udp_port_info_t *port_infos;

  /* Hash table from network-byte-order port to port info index. */
  uword *port_info_by_port;

  /* Hash table mapping TCP/UDP name to port info index. */
  uword *port_info_by_name;
} ip_main_t;

extern ip_main_t ip_main;

clib_error_t *ip_main_init (vlib_main_t * vm);

static inline ip_protocol_info_t *
ip_get_protocol_info (ip_main_t * im, u32 protocol)
{
  uword *p;

  p = hash_get (im->protocol_info_by_protocol, protocol);
  return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0;
}

static inline tcp_udp_port_info_t *
ip_get_tcp_udp_port_info (ip_main_t * im, u32 port)
{
  uword *p;

  p = hash_get (im->port_info_by_port, port);
  return p ? vec_elt_at_index (im->port_infos, p[0]) : 0;
}

always_inline ip_csum_t
ip_incremental_checksum_buffer (vlib_main_t * vm,
				vlib_buffer_t * first_buffer,
				u32 first_buffer_offset,
				u32 n_bytes_to_checksum, ip_csum_t sum)
{
  vlib_buffer_t *b = first_buffer;
  u32 n_bytes_left = n_bytes_to_checksum;
  ASSERT (b->current_length >= first_buffer_offset);
  void *h;
  u32 n;

  n = clib_min (n_bytes_left, b->current_length);
  h = vlib_buffer_get_current (b) + first_buffer_offset;
  sum = ip_incremental_checksum (sum, h, n);
  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
    {
      while (1)
	{
	  n_bytes_left -= n;
	  if (n_bytes_left == 0)
	    break;
	  b = vlib_get_buffer (vm, b->next_buffer);
	  n = clib_min (n_bytes_left, b->current_length);
	  h = vlib_buffer_get_current (b);
	  sum = ip_incremental_checksum (sum, h, n);
	}
    }

  return sum;
}

void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);

extern vlib_node_registration_t ip4_inacl_node;
extern vlib_node_registration_t ip6_inacl_node;

#endif /* included_ip_main_h */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
an>, mq_conn_idx); } int vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq) { struct epoll_event e = { 0 }; vcl_mq_evt_conn_t *mqc; u32 mqc_index; int mq_fd; mq_fd = svm_msg_q_get_consumer_eventfd (mq); if (wrk->mqs_epfd < 0 || mq_fd == -1) return -1; mqc = vcl_mq_evt_conn_alloc (wrk); mqc_index = vcl_mq_evt_conn_index (wrk, mqc); mqc->mq_fd = mq_fd; mqc->mq = mq; e.events = EPOLLIN; e.data.u32 = mqc_index; if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, mq_fd, &e) < 0) { VDBG (0, "failed to add mq eventfd to mq epoll fd"); return -1; } return mqc_index; } int vcl_mq_epoll_del_evfd (vcl_worker_t * wrk, u32 mqc_index) { vcl_mq_evt_conn_t *mqc; if (wrk->mqs_epfd || mqc_index == ~0) return -1; mqc = vcl_mq_evt_conn_get (wrk, mqc_index); if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_DEL, mqc->mq_fd, 0) < 0) { VDBG (0, "failed to del mq eventfd to mq epoll fd"); return -1; } return 0; } static vcl_worker_t * vcl_worker_alloc (void) { vcl_worker_t *wrk; pool_get (vcm->workers, wrk); memset (wrk, 0, sizeof (*wrk)); wrk->wrk_index = wrk - vcm->workers; wrk->forked_child = ~0; return wrk; } static void vcl_worker_free (vcl_worker_t * wrk) { pool_put (vcm->workers, wrk); } void vcl_worker_cleanup (vcl_worker_t * wrk, u8 notify_vpp) { clib_spinlock_lock (&vcm->workers_lock); if (notify_vpp) { /* Notify vpp that the worker is going away */ if (wrk->wrk_index == vcl_get_worker_index ()) vcl_send_app_worker_add_del (0 /* is_add */ ); else vcl_send_child_worker_del (wrk); /* Disconnect the binary api */ if (vec_len (vcm->workers) == 1) vppcom_disconnect_from_vpp (); else vl_client_send_disconnect (1 /* vpp should cleanup */ ); } if (wrk->mqs_epfd > 0) close (wrk->mqs_epfd); hash_free (wrk->session_index_by_vpp_handles); vec_free (wrk->mq_events); vec_free (wrk->mq_msg_vector); vcl_worker_free (wrk); clib_spinlock_unlock (&vcm->workers_lock); } static void vcl_worker_cleanup_cb (void *arg) { vcl_worker_t *wrk = vcl_worker_get_current (); u32 wrk_index = wrk->wrk_index; vcl_worker_cleanup (wrk, 1 /* notify vpp */ ); vcl_set_worker_index (~0); VDBG (0, "cleaned up worker %u", wrk_index); } vcl_worker_t * vcl_worker_alloc_and_init () { vcl_worker_t *wrk; /* This was initialized already */ if (vcl_get_worker_index () != ~0) return 0; /* Use separate heap map entry for worker */ clib_mem_set_thread_index (); if (pool_elts (vcm->workers) == vcm->cfg.max_workers) { VDBG (0, "max-workers %u limit reached", vcm->cfg.max_workers); return 0; } clib_spinlock_lock (&vcm->workers_lock); wrk = vcl_worker_alloc (); vcl_set_worker_index (wrk->wrk_index); wrk->thread_id = pthread_self (); wrk->current_pid = getpid (); wrk->mqs_epfd = -1; if (vcm->cfg.use_mq_eventfd) { wrk->mqs_epfd = epoll_create (1); if (wrk->mqs_epfd < 0) { clib_unix_warning ("epoll_create() returned"); goto done; } } wrk->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); clib_time_init (&wrk->clib_time); vec_validate (wrk->mq_events, 64); vec_validate (wrk->mq_msg_vector, 128); vec_reset_length (wrk->mq_msg_vector); vec_validate (wrk->unhandled_evts_vector, 128); vec_reset_length (wrk->unhandled_evts_vector); clib_spinlock_unlock (&vcm->workers_lock); done: return wrk; } int vcl_worker_register_with_vpp (void) { vcl_worker_t *wrk = vcl_worker_get_current (); clib_spinlock_lock (&vcm->workers_lock); vcm->app_state = STATE_APP_ADDING_WORKER; vcl_send_app_worker_add_del (1 /* is_add */ ); if (vcl_wait_for_app_state_change (STATE_APP_READY)) { VDBG (0, "failed to add worker to vpp"); return -1; } if (pthread_key_create (&vcl_worker_stop_key, vcl_worker_cleanup_cb)) VDBG (0, "failed to add pthread cleanup function"); if (pthread_setspecific (vcl_worker_stop_key, &wrk->thread_id)) VDBG (0, "failed to setup key value"); clib_spinlock_unlock (&vcm->workers_lock); VDBG (0, "added worker %u", wrk->wrk_index); return 0; } int vcl_worker_set_bapi (void) { vcl_worker_t *wrk = vcl_worker_get_current (); int i; /* Find the first worker with the same pid */ for (i = 0; i < vec_len (vcm->workers); i++) { if (i == wrk->wrk_index) continue; if (vcm->workers[i].current_pid == wrk->current_pid) { wrk->vl_input_queue = vcm->workers[i].vl_input_queue; wrk->my_client_index = vcm->workers[i].my_client_index; return 0; } } return -1; } svm_msg_q_t * vcl_worker_ctrl_mq (vcl_worker_t * wrk) { return wrk->ctrl_mq; } void vcl_cleanup_bapi (void) { socket_client_main_t *scm = &socket_client_main; api_main_t *am = vlibapi_get_main (); am->my_client_index = ~0; am->my_registration = 0; am->vl_input_queue = 0; am->msg_index_by_name_and_crc = 0; scm->socket_fd = 0; vl_client_api_unmap (); } int vcl_session_read_ready (vcl_session_t * session) { u32 max_deq; /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ if (PREDICT_FALSE (session->is_vep)) { VDBG (0, "ERROR: session %u: cannot read from an epoll session!", session->session_index); return VPPCOM_EBADFD; } if (PREDICT_FALSE (!(session->session_state & (STATE_OPEN | STATE_LISTEN)))) { vcl_session_state_t state = session->session_state; int rv; rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); VDBG (1, "session %u [0x%llx]: not open! state 0x%x (%s), ret %d (%s)", session->session_index, session->vpp_handle, state, vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); return rv; } if (session->session_state & STATE_LISTEN) return clib_fifo_elts (session->accept_evts_fifo); if (vcl_session_is_ct (session)) return svm_fifo_max_dequeue_cons (session->ct_rx_fifo); max_deq = svm_fifo_max_dequeue_cons (session->rx_fifo); if (session->is_dgram) { session_dgram_pre_hdr_t ph; if (max_deq <= SESSION_CONN_HDR_LEN) return 0; if (svm_fifo_peek (session->rx_fifo, 0, sizeof (ph), (u8 *) & ph) < 0) return 0; if (ph.data_length + SESSION_CONN_HDR_LEN > max_deq) return 0; return ph.data_length; } return max_deq; } int vcl_session_write_ready (vcl_session_t * session) { /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ if (PREDICT_FALSE (session->is_vep)) { VDBG (0, "session %u [0x%llx]: cannot write to an epoll session!", session->session_index, session->vpp_handle); return VPPCOM_EBADFD; } if (PREDICT_FALSE (session->session_state & STATE_LISTEN)) { if (session->tx_fifo) return svm_fifo_max_enqueue_prod (session->tx_fifo); else return VPPCOM_EBADFD; } if (PREDICT_FALSE (!(session->session_state & STATE_OPEN))) { vcl_session_state_t state = session->session_state; int rv; rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); VDBG (0, "session %u [0x%llx]: not open! state 0x%x (%s), ret %d (%s)", session->session_index, session->vpp_handle, state, vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); return rv; } if (vcl_session_is_ct (session)) return svm_fifo_max_enqueue_prod (session->ct_tx_fifo); if (session->is_dgram) { u32 max_enq = svm_fifo_max_enqueue_prod (session->tx_fifo); if (max_enq <= sizeof (session_dgram_hdr_t)) return 0; return max_enq - sizeof (session_dgram_hdr_t); } return svm_fifo_max_enqueue_prod (session->tx_fifo); } int vcl_segment_attach (u64 segment_handle, char *name, ssvm_segment_type_t type, int fd) { fifo_segment_create_args_t _a, *a = &_a; int rv; memset (a, 0, sizeof (*a)); a->segment_name = name; a->segment_type = type; if (type == SSVM_SEGMENT_MEMFD) a->memfd_fd = fd; clib_rwlock_writer_lock (&vcm->segment_table_lock); if ((rv = fifo_segment_attach (&vcm->segment_main, a))) { clib_warning ("svm_fifo_segment_attach ('%s') failed", name); return rv; } hash_set (vcm->segment_table, segment_handle, a->new_segment_indices[0]); clib_rwlock_writer_unlock (&vcm->segment_table_lock); vec_reset_length (a->new_segment_indices); return 0; } u32 vcl_segment_table_lookup (u64 segment_handle) { uword *seg_indexp; clib_rwlock_reader_lock (&vcm->segment_table_lock); seg_indexp = hash_get (vcm->segment_table, segment_handle); clib_rwlock_reader_unlock (&vcm->segment_table_lock); if (!seg_indexp) return VCL_INVALID_SEGMENT_INDEX; return ((u32) * seg_indexp); } void vcl_segment_detach (u64 segment_handle) { fifo_segment_main_t *sm = &vcm->segment_main; fifo_segment_t *segment; u32 segment_index; segment_index = vcl_segment_table_lookup (segment_handle); if (segment_index == (u32) ~ 0) return; clib_rwlock_writer_lock (&vcm->segment_table_lock); segment = fifo_segment_get_segment (sm, segment_index); fifo_segment_delete (sm, segment); hash_unset (vcm->segment_table, segment_handle); clib_rwlock_writer_unlock (&vcm->segment_table_lock); VDBG (0, "detached segment %u handle %u", segment_index, segment_handle); } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */