summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/unix-formats.c
blob: 7059686555b798e98e8635b2cda106346ac1d39c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
3
# IPFIX support {#ipfix_doc}

VPP includes a high-performance IPFIX record exporter. This note
explains how to use the internal APIs to export IPFIX data, and how to
configure and send the required IPFIX templates.

As you'll see, a bit of typing is required. 

## First: create an ipfix "report"

Include the flow report header file, fill out a @ref
vnet_flow_report_add_del_args_t structure, and call vnet_flow_report_add_del.

```{.c}
   #include <vnet/ipfix-export/flow_report.h>
   /* Defined in flow_report.h, of interest when constructing reports */

   /* ipfix field definitions for a particular report */
   typedef struct
   {
     u32 info_element;
     u32 size;
   } ipfix_report_element_t;

   /* Report add/del argument structure */
   typedef struct
   {
     /* Callback to flush current ipfix packet / frame */
     vnet_flow_data_callback_t *flow_data_callback;

     /* Callback to build the template packet rewrite string */
     vnet_flow_rewrite_callback_t *rewrite_callback;

     /* List of ipfix elements in the report */
     ipfix_report_element_t *report_elements;
     u32 n_report_elements;
     /* Kept in flow report, used e.g. by flow classifier */
     opaque_t opaque;
     /* Add / delete a report */
     int is_add;
     /* Ipfix "domain-ID", see RFC, set as desired */
     u32 domain_id;
     /* ipfix packet source port, often set to UDP_DST_PORT_ipfix */
     u16 src_port;
     /* Set by ipfix infra, needed to send data packets */
     u32 *stream_indexp;
   } vnet_flow_report_add_del_args_t;

   /* Private header file contents */

   /* Report ipfix element definition */
   #define foreach_simple_report_ipfix_element     \
   _(sourceIPv4Address, 4)                         \
   _(destinationIPv4Address, 4)                    \
   _(sourceTransportPort, 2)                       \
   _(destinationTransportPort, 2)                  \
   _(protocolIdentifier, 1)                        \
   _(flowStartMicroseconds, 8)                     \
   _(flowEndMicroseconds, 8)

   static ipfix_report_element_t simple_report_elements[] = {
   #define _(a,b) {a,b},
     foreach_simple_report_ipfix_element
   #undef _
   };

   typedef struct
   {
     /** Buffers and frames, per thread */
     vlib_buffer_t **buffers_by_thread;
     vlib_frame_t **frames_by_thread;
     u32 *next_record_offset_by_thread;

     /** Template ID's */
     u16 *template_ids;

     /** Time reference pair */
     u64 usec_time_0;
     f64 vlib_time_0;

     /** Stream index */
     u32 stream_index;

     /* Convenience */
     flow_report_main_t *flow_report_main;
     vlib_main_t *vlib_main;
     vnet_main_t *vnet_main;
   } my_logging_main_t;
   
   extern my_logging_main_t my_logging_main;

   ...

   /* Recitations */
   flow_report_main_t *frm = &flow_report_main;
   my_logging_main_t *mlm = &my_logging_main;
   vnet_flow_report_add_del_args_t a;
   int rv;
   u16 template_id;

   ... 

   /* Init function: set up time reference pair */
   mlm->vlib_time_0 = vlib_time_now (vm);
   mlm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;

   ...

   /* Create a report */
   memset (&a, 0, sizeof (a));
   a.is_add = 1 /* to enable the report */;
   a.domain_id = 1 /* pick a domain ID */;
   a.src_port = UDP_DST_PORT_ipfix /* src port for reports */;

   /* Use the generic template packet rewrite string generator */
   a.rewrite_callback = vnet_flow_rewrite_generic_callback;

   /* Supply a list of ipfix report elements */
   a.report_elements = simple_report_elements;
   a.n_report_elements = ARRAY_LEN (simple_report_elements);

   /* Pointer to the ipfix stream index, set by the report infra */
   a.stream_indexp = &mlm->stream_index;
   a.flow_data_callback = my_flow_data_callback;

   /* Create the report */
   rv = vnet_flow_report_add_del (frm, &a, &template_id);
   if (rv) 
     oops...

   /* Save the template-ID for later use */
   mlm->template_id = template_id;

```

Several things are worth describing in more detail.

### vnet_flow_rewrite_generic_callback programming

This generic callback helps build ipfix template packets.  When
registering an ipfix report, pass an (array, count)
of ipfix elements as shown above. 

### my_flow_data_callback

The ipfix flow export infrastructure calls this callback to flush the
current ipfix packet; to make sure that ipfix data is not retained for
an unreasonably long period of time.

We typically code it as shown below, to call an application-specific
function with (uninteresting arguments), and "do_flush = 1":


```{.c}

      vlib_frame_t *my_flow_data_callback
                   (flow_report_main_t * frm,
	           flow_report_t * fr,
		   vlib_frame_t * f,
		   u32 * to_next, u32 node_index)
      { 

         my_buffer_flow_record (0, ... , 0, 1 /* do_flush */);
         return f;
      }
```

### my_flow_data_header

This function creates the packet header for an ipfix data packet

```{.c}

   static inline void
   my_flow_report_header (flow_report_main_t * frm,
			  vlib_buffer_t * b0, u32 * offset)
   {
      my_logging_main_t *mlm = &my_logging_main;
      flow_report_stream_t *stream;
      ip4_ipfix_template_packet_t *tp;
      ipfix_message_header_t *h = 0;


      ipfix_set_header_t *s = 0;
      ip4_header_t *ip;
      udp_header_t *udp;

      stream = &frm->streams[mlm->stream_index];

      b0->current_data = 0;
      b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
        sizeof (*s);
      b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
      vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
      vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
      tp = vlib_buffer_get_current (b0);
      ip = (ip4_header_t *) & tp->ip4;
      udp = (udp_header_t *) (ip + 1);
      h = (ipfix_message_header_t *) (udp + 1);
      s = (ipfix_set_header_t *) (h + 1);

      ip->ip_version_and_header_length = 0x45;
      ip->ttl = 254;
      ip->protocol = IP_PROTOCOL_UDP;
      ip->flags_and_fragment_offset = 0;
      ip->src_address.as_u32 = frm->src_address.as_u32;
      ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
      udp->src_port = clib_host_to_net_u16 (stream->src_port);
      udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
      udp->checksum = 0;

      h->export_time = clib_host_to_net_u32 ((u32)
            				 (((f64) frm->unix_time_0) +
               				  (vlib_time_now (frm->vlib_main) -
               				   frm->vlib_time_0)));
         h->sequence_number = clib_host_to_net_u32 (stream->sequence_number++);
         h->domain_id = clib_host_to_net_u32 (stream->domain_id);

         *offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
   }
   ```

   ### fixup and transmit a flow record

   ```{.c}
      
      static inline void
      my_send_ipfix_pkt (flow_report_main_t * frm,
           		 vlib_frame_t * f, vlib_buffer_t * b0, u16 template_id)
      {
        ip4_ipfix_template_packet_t *tp;
        ipfix_message_header_t *h = 0;
        ipfix_set_header_t *s = 0;
        ip4_header_t *ip;
        udp_header_t *udp;
        vlib_main_t *vm = frm->vlib_main;

        tp = vlib_buffer_get_current (b0);
        ip = (ip4_header_t *) & tp->ip4;
        udp = (udp_header_t *) (ip + 1);
        h = (ipfix_message_header_t *) (udp + 1);
        s = (ipfix_set_header_t *) (h + 1);

        s->set_id_length = ipfix_set_id_length (template_id,
      					  b0->current_length -
      					  (sizeof (*ip) + sizeof (*udp) +
      					   sizeof (*h)));
        h->version_length = version_length (b0->current_length -
      				      (sizeof (*ip) + sizeof (*udp)));

        ip->length = clib_host_to_net_u16 (b0->current_length);
        ip->checksum = ip4_header_checksum (ip);
        udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));

        if (frm->udp_checksum)
          {
            udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
            if (udp->checksum == 0)
      	udp->checksum = 0xffff;
          }

        ASSERT (ip->checksum == ip4_header_checksum (ip));

        vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
      }  
   ```

   ### my_buffer_flow_record

   This is the key routine which paints individual flow records into
   an ipfix packet under construction. It's pretty straightforward
   (albeit stateful) vpp data-plane code. The code shown below is
   thread-safe by construction.

   ```{.c}
   static inline void
   my_buffer_flow_record_internal (my_flow_record_t * rp, int do_flush,
                                       u32 thread_index)
   {
     vlib_main_t *vm = vlib_mains[thread_index];
     my_logging_main_t *mlm = &jvp_ipfix_main;
     flow_report_main_t *frm = &flow_report_main;
     vlib_frame_t *f;
     vlib_buffer_t *b0 = 0;
     u32 bi0 = ~0;
     u32 offset;

     b0 = mlm->buffers_by_thread[thread_index];

     if (PREDICT_FALSE (b0 == 0))
       {
         if (do_flush)
   	return;

         if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
   	{
   	  clib_warning ("can't allocate ipfix data buffer");
   	  return;
   	}

         b0 = vlib_get_buffer (vm, bi0);
         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
         offset = 0;
         mlm->buffers_by_thread[thread_index] = b0;
       }
     else
       {
         bi0 = vlib_get_buffer_index (vm, b0);
         offset = mlm->next_record_offset_by_thread[thread_index];
       }

     f = mlm->frames_by_thread[thread_index];
     if (PREDICT_FALSE (f == 0))
       {
         u32 *to_next;
         f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
         mlm->frames_by_thread[thread_index] = f;
         to_next = vlib_frame_vector_args (f);
         to_next[0] = bi0;
         f->n_vectors = 1;
         mlm->frames_by_thread[thread_index] = f;
       }

     if (PREDICT_FALSE (offset == 0))
       my_flow_report_header (frm, b0, &offset);

     if (PREDICT_TRUE (do_flush == 0))
       {
         /* Paint the new ipfix data record into the buffer */
         clib_memcpy (b0->data + offset, rp, sizeof (*rp));
         offset += sizeof (*rp);
         b0->current_length += sizeof (*rp);
       }

     if (PREDICT_FALSE (do_flush || (offset + sizeof (*rp)) > frm->path_mtu))
       {
         /* Nothing to send? */
         if (offset == 0)
   	return;

         send_ipfix_pkt (frm, f, b0, mlm->template_ids[0]);
         mlm->buffers_by_thread[thread_index] = 0;
         mlm->frames_by_thread[thread_index] = 0;
         offset = 0;
       }
     mlm->next_record_offset_by_thread[thread_index] = offset;
   }  

   static void
   my_buffer_flow_record (my_flow_record_t * rp, int do_flush)
   {
     u32 thread_index = vlib_get_thread_index();
     my_buffer_flow_record_internal (rp, do_flush, thread_index);
   }  

```
href='#n1000'>1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
  Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus

  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  without limitation the rights to use, copy, modify, merge, publish,
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#ifdef __KERNEL__

#if __linux__
# include <linux/unistd.h>
# include <linux/signal.h>
#endif

#else /* ! __KERNEL__ */

#ifdef __APPLE__
#define _XOPEN_SOURCE
#endif

#define _GNU_SOURCE		/* to get REG_* in ucontext.h */
#include <ucontext.h>
#undef _GNU_SOURCE
#undef __USE_GNU

#include <unistd.h>
#include <signal.h>
#include <grp.h>

#include <time.h>
#include <sys/socket.h>
#include <netdb.h>
#include <math.h>

#include <vppinfra/time.h>
#if __linux__
#include <vppinfra/linux/syscall.h>

#ifdef AF_NETLINK
#include <linux/types.h>
#include <linux/netlink.h>
#endif
#endif

#endif /* ! __KERNEL__ */


#ifdef __KERNEL__
# include <linux/socket.h>
# include <linux/in.h>
# include <linux/ip.h>
# include <linux/tcp.h>
# include <linux/udp.h>
# include <linux/icmp.h>
# include <linux/if_ether.h>
# include <linux/if_arp.h>
#else
# include <net/if.h>            /* struct ifnet may live here */
# include <netinet/in.h>
# include <netinet/ip.h>
# include <netinet/tcp.h>
# include <netinet/udp.h>
# include <netinet/ip_icmp.h>
# include <netinet/if_ether.h>
#endif /* __KERNEL__ */

#include <vppinfra/bitops.h> /* foreach_set_bit */
#include <vppinfra/format.h>
#include <vppinfra/error.h>

/* Format unix network address family (e.g. AF_INET). */
u8 * format_address_family (u8 * s, va_list * va)
{
  uword family = va_arg (*va, uword);
  u8 * t = (u8 *) "UNKNOWN";
  switch (family)
    {
#define _(x) case PF_##x: t = (u8 *) #x; break
      _ (UNSPEC);
      _ (UNIX);			/* Unix domain sockets 		*/
      _ (INET);			/* Internet IP Protocol 	*/
#ifdef PF_AX25
      _ (AX25);			/* Amateur Radio AX.25 		*/
#endif
#ifdef PF_IPX
      _ (IPX);			/* Novell IPX 			*/
#endif
#ifdef PF_APPLETALK
      _ (APPLETALK);		/* AppleTalk DDP 		*/
#endif
#ifdef PF_NETROM
      _ (NETROM);		/* Amateur Radio NET/ROM 	*/
#endif
#ifdef PF_BRIDGE
      _ (BRIDGE);		/* Multiprotocol bridge 	*/
#endif
#ifdef PF_ATMPVC
      _ (ATMPVC);		/* ATM PVCs			*/
#endif
#ifdef PF_X25
      _ (X25);			/* Reserved for X.25 project 	*/
#endif
#ifdef PF_INET6
      _ (INET6);		/* IP version 6			*/
#endif
#ifdef PF_ROSE
      _ (ROSE);			/* Amateur Radio X.25 PLP	*/
#endif
#ifdef PF_DECnet
      _ (DECnet);		/* Reserved for DECnet project	*/
#endif
#ifdef PF_NETBEUI
      _ (NETBEUI);		/* Reserved for 802.2LLC project*/
#endif
#ifdef PF_SECURITY
      _ (SECURITY);		/* Security callback pseudo AF */
#endif
#ifdef PF_KEY
      _ (KEY);			/* PF_KEY key management API */
#endif
#ifdef PF_NETLINK
      _ (NETLINK);
#endif
#ifdef PF_PACKET
      _ (PACKET);		/* Packet family		*/
#endif
#ifdef PF_ASH
      _ (ASH);			/* Ash				*/
#endif
#ifdef PF_ECONET
      _ (ECONET);		/* Acorn Econet			*/
#endif
#ifdef PF_ATMSVC
      _ (ATMSVC);		/* ATM SVCs			*/
#endif
#ifdef PF_SNA
      _ (SNA);			/* Linux SNA Project */
#endif
#ifdef PF_IRDA
      _ (IRDA);			/* IRDA sockets			*/
#endif
#undef _
    }
  vec_add (s, t, strlen ((char *) t));
  return s;
}

u8 * format_network_protocol (u8 * s, va_list * args)
{
  uword family = va_arg (*args, uword);
  uword protocol = va_arg (*args, uword);

#ifndef __KERNEL__
  struct protoent * p = getprotobynumber (protocol);

  ASSERT (family == AF_INET);
  if (p)
    return format (s, "%s", p->p_name);
  else
    return format (s, "%d", protocol);
#else
  return format (s, "%d/%d", family, protocol);
#endif
}

u8 * format_network_port (u8 * s, va_list * args)
{
  uword proto = va_arg (*args, uword);
  uword port = va_arg (*args, uword);

#ifndef __KERNEL__
  struct servent * p = getservbyport (port, proto == IPPROTO_UDP ? "udp" : "tcp");

  if (p)
    return format (s, "%s", p->s_name);
  else
    return format (s, "%d", port);
#else
  return format (s, "%s/%d", proto == IPPROTO_UDP ? "udp" : "tcp", port);
#endif
}

/* Format generic network address: takes two arguments family and address.
   Assumes network byte order. */
u8 * format_network_address (u8 * s, va_list * args)
{
  uword family = va_arg (*args, uword);
  u8 * addr    = va_arg (*args, u8 *);

  switch (family)
    {
    case AF_INET:
      s = format (s, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
      break;

    case AF_UNSPEC:
      /* We use AF_UNSPEC for ethernet addresses. */
      s = format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
		  addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
      break;

    default:
      clib_error ("unsupported address family %d", family);
    }

  return s;
}

u8 * format_sockaddr (u8 * s, va_list * args)
{
  void * v = va_arg (*args, void *);
  struct sockaddr * sa = v;
  static u32 local_counter;

  switch (sa->sa_family)
    {
    case AF_INET:
      {
	struct sockaddr_in * i = v;
	s = format (s, "%U:%U",
		    format_network_address, AF_INET, &i->sin_addr.s_addr,
		    format_network_port, IPPROTO_TCP, ntohs (i->sin_port));
      }
      break;

    case AF_LOCAL:
      {
        /* 
         * There isn't anything useful to print.
         * The unix cli world uses the output to make a node name,
         * so we need to return a unique name. 
         */
        s = format (s, "local:%u", local_counter++);
      }
      break;

#ifndef __KERNEL__
#ifdef AF_NETLINK
    case AF_NETLINK:
      {
	struct sockaddr_nl * n = v;
	s = format (s, "KERNEL-NETLINK");
	if (n->nl_groups)
	  s = format (s, " (groups 0x%x)", n->nl_groups);
	break;
      }
#endif
#endif

    default:
      s = format (s, "sockaddr family %d", sa->sa_family);
      break;
    }

  return s;
}

#ifndef __APPLE__
u8 * format_tcp4_packet (u8 * s, va_list * args)
{
  u8 * p = va_arg (*args, u8 *);
  struct iphdr * ip = (void *) p;
  struct tcphdr * tcp = (void *) (ip + 1);

  s = format (s, "tcp %U:%U -> %U:%U",
	      format_network_address, AF_INET,  &ip->saddr,
	      format_network_port, IPPROTO_TCP, ntohs (tcp->source),
	      format_network_address, AF_INET,  &ip->daddr,
	      format_network_port, IPPROTO_TCP, ntohs (tcp->dest));

  s = format (s, ", seq 0x%08x -> 0x%08x", tcp->seq, tcp->ack_seq);
#define _(f) if (tcp->f) s = format (s, ", " #f);
  _ (syn); _ (ack); _ (fin); _ (rst); _ (psh); _ (urg);
#undef _

  if (tcp->window)
    s = format (s, ", window 0x%04x", tcp->window);
  if (tcp->urg)
    s = format (s, ", urg 0x%04x", tcp->urg_ptr);

  return s;
}

u8 * format_udp4_packet (u8 * s, va_list * args)
{
  u8 * p = va_arg (*args, u8 *);
  struct iphdr * ip = (void *) p;
  struct udphdr * udp = (void *) (ip + 1);

  s = format (s, "udp %U:%U -> %U:%U",
	      format_network_address, AF_INET,  &ip->saddr,
	      format_network_port, IPPROTO_UDP, ntohs (udp->source),
	      format_network_address, AF_INET,  &ip->daddr,
	      format_network_port, IPPROTO_UDP, ntohs (udp->dest));

  return s;
}

u8 * format_icmp4_type_and_code (u8 * s, va_list * args)
{
  uword icmp_type = va_arg (*args, uword);
  uword icmp_code = va_arg (*args, uword);

  switch (icmp_type)
    {
#define _(f,str) case ICMP_##f: s = format (s, str); break;
      _ (ECHOREPLY, "echo reply");
      _ (DEST_UNREACH, "unreachable");
      _ (SOURCE_QUENCH, "source quench");
      _ (REDIRECT, "redirect");
      _ (ECHO, "echo request");
      _ (TIME_EXCEEDED, "time exceeded");
      _ (PARAMETERPROB, "parameter problem");
      _ (TIMESTAMP, "timestamp request");
      _ (TIMESTAMPREPLY, "timestamp reply");
      _ (INFO_REQUEST, "information request");
      _ (INFO_REPLY, "information reply");
      _ (ADDRESS, "address mask request");
      _ (ADDRESSREPLY, "address mask reply");
#undef _
    default:
      s = format (s, "unknown type 0x%x", icmp_type);
    }

  if (icmp_type == ICMP_DEST_UNREACH)
    {
      switch (icmp_code)
	{
#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
	  _ (NET_UNREACH, "network");
	  _ (HOST_UNREACH, "host");
	  _ (PROT_UNREACH, "protocol");
	  _ (PORT_UNREACH, "port");
	  _ (FRAG_NEEDED, ": fragmentation needed/DF set");
	  _ (SR_FAILED, "source route failed");
	  _ (NET_UNKNOWN, "network unknown");
	  _ (HOST_UNKNOWN, "host unknown");
	  _ (HOST_ISOLATED, "host isolated");
	  _ (NET_ANO, "network: admin. prohibited");
	  _ (HOST_ANO, "host: admin. prohibited");
	  _ (NET_UNR_TOS, "network for type-of-service");
	  _ (HOST_UNR_TOS, "host for type-of-service");
	  _ (PKT_FILTERED, ": packet filtered");
	  _ (PREC_VIOLATION, "precedence violation");
	  _ (PREC_CUTOFF, "precedence cut off");
#undef _
	default:
	  s = format (s, "unknown code 0x%x", icmp_code);
	}
    }
  else if (icmp_type == ICMP_REDIRECT)
    {
      switch (icmp_code)
	{
#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
	  _ (REDIR_NET, "network");
	  _ (REDIR_HOST, "host");
	  _ (REDIR_NETTOS, "network for type-of-service");
	  _ (REDIR_HOSTTOS, "host for type-of-service");
#undef _
	default:
	  s = format (s, "unknown code 0x%x", icmp_code);
	}
    }
  else if (icmp_type == ICMP_TIME_EXCEEDED)
    {
      switch (icmp_code)
	{
#define _(f,str) case ICMP_##f: s = format (s, " " # str); break;
	  _ (EXC_TTL, "time-to-live zero in transit");
	  _ (EXC_FRAGTIME, "time-to-live zero during reassembly");
#undef _
	default:
	  s = format (s, "unknown code 0x%x", icmp_code);
	}
    }

  return s;
}

typedef struct {
  u8 type;
  u8 code;
  u16 checksum;
} icmp4_t;

u8 * format_icmp4_packet (u8 * s, va_list * args)
{
  u8 * p = va_arg (*args, u8 *);
  struct iphdr * ip = (void *) p;
  icmp4_t * icmp = (void *) (ip + 1);
  s = format (s, "icmp %U %U -> %U",
	      format_icmp4_type_and_code, icmp->type, icmp->code,
	      format_network_address, AF_INET,  &ip->saddr,
	      format_network_address, AF_INET,  &ip->daddr);

  return s;
}

u8 * format_ip4_tos_byte (u8 * s, va_list * args)
{
  uword tos = va_arg (*args, uword);

  if (tos & IPTOS_LOWDELAY)
    s = format (s, "minimize-delay, ");
  if (tos & IPTOS_MINCOST)
    s = format (s, "minimize-cost, ");
  if (tos & IPTOS_THROUGHPUT)
    s = format (s, "maximize-throughput, ");
  if (tos & IPTOS_RELIABILITY)
    s = format (s, "maximize-reliability, ");

  switch (IPTOS_PREC (tos))
    {
#define _(x,y) case IPTOS_PREC_##x: s = format (s, y); break
      _ (NETCONTROL, "network");
      _ (INTERNETCONTROL, "internet");
      _ (CRITIC_ECP, "critical");
      _ (FLASH, "flash");
      _ (FLASHOVERRIDE, "flash-override");
      _ (IMMEDIATE, "immediate");
      _ (PRIORITY, "priority");
      _ (ROUTINE, "routine");
#undef _
    }

  return s;
}

u8 * format_ip4_packet (u8 * s, va_list * args)
{
  u8 * p = va_arg (*args, u8 *);
  struct iphdr * ip = (void *) p;

  static format_function_t * f[256];

  if (! f[IPPROTO_TCP])
    {
      f[IPPROTO_TCP] = format_tcp4_packet;
      f[IPPROTO_UDP] = format_udp4_packet;
      f[IPPROTO_ICMP] = format_icmp4_packet;
    }

  if (f[ip->protocol])
    return format (s, "%U", f[ip->protocol], p);

  s = format (s, "%U: %U -> %U",
	      format_network_protocol, AF_INET, ip->protocol,
	      format_network_address, AF_INET,  &ip->saddr,
	      format_network_address, AF_INET,  &ip->daddr);

  return s;
}

#define foreach_unix_arphrd_type		\
  _ (NETROM, 0)					\
  _ (ETHER, 1)					\
  _ (EETHER, 2)					\
  _ (AX25, 3)					\
  _ (PRONET, 4)					\
  _ (CHAOS, 5)					\
  _ (IEEE802, 6)				\
  _ (ARCNET, 7)					\
  _ (APPLETLK, 8)				\
  _ (DLCI, 15)					\
  _ (ATM, 19)					\
  _ (METRICOM, 23)				\
  _ (IEEE1394, 24)				\
  _ (EUI64, 27)					\
  _ (INFINIBAND, 32)				\
  _ (SLIP, 256)					\
  _ (CSLIP, 257)				\
  _ (SLIP6, 258)				\
  _ (CSLIP6, 259)				\
  _ (RSRVD, 260)				\
  _ (ADAPT, 264)				\
  _ (ROSE, 270)					\
  _ (X25, 271)					\
  _ (HWX25, 272)				\
  _ (PPP, 512)					\
  _ (HDLC, 513)					\
  _ (LAPB, 516)					\
  _ (DDCMP, 517)				\
  _ (RAWHDLC, 518)				\
  _ (TUNNEL, 768)				\
  _ (TUNNEL6, 769)				\
  _ (FRAD, 770)					\
  _ (SKIP, 771)					\
  _ (LOOPBACK, 772)				\
  _ (LOCALTLK, 773)				\
  _ (FDDI, 774)					\
  _ (BIF, 775)					\
  _ (SIT, 776)					\
  _ (IPDDP, 777)				\
  _ (IPGRE, 778)				\
  _ (PIMREG, 779)				\
  _ (HIPPI, 780)				\
  _ (ASH, 781)					\
  _ (ECONET, 782)				\
  _ (IRDA, 783)					\
  _ (FCPP, 784)					\
  _ (FCAL, 785)					\
  _ (FCPL, 786)					\
  _ (FCFABRIC, 787)				\
  _ (IEEE802_TR, 800)				\
  _ (IEEE80211, 801)				\
  _ (IEEE80211_PRISM, 802)			\
  _ (IEEE80211_RADIOTAP, 803)			\
  _ (VOID, 0xFFFF)				\
  _ (NONE, 0xFFFE)

u8 * format_unix_arphrd (u8 * s, va_list * args)
{
#ifndef __COVERITY__ /* doesn't understand this at all... */
  u32 x = va_arg (*args, u32);
  char * t;
  switch (x)
    {
#define _(f,n) case ARPHRD_##f: t = #f; break;
      foreach_unix_arphrd_type
#undef _
    default:
      t = 0;
      break;
    }

  if (t)
    s = format (s, "%s", t);
  else
    s = format (s, "unknown 0x%x", x);
#endif
  return s;
}

#define foreach_unix_interface_flag		\
  _ (up)					\
  _ (broadcast)					\
  _ (debug)					\
  _ (loopback)					\
  _ (pointopoint)				\
  _ (notrailers)				\
  _ (running)					\
  _ (noarp)					\
  _ (promisc)					\
  _ (allmulti)					\
  _ (master)					\
  _ (slave)					\
  _ (multicast)					\
  _ (portsel)					\
  _ (automedia)					\
  _ (dynamic)					\
  _ (lower_up)					\
  _ (dormant)					\
  _ (echo)

static char * unix_interface_flag_names[] = {
#define _(f) #f,
  foreach_unix_interface_flag
#undef _
};

u8 * format_unix_interface_flags (u8 * s, va_list * args)
{
  u32 x = va_arg (*args, u32);
  u32 i;

  if (x == 0)
    s = format (s, "none");
  else foreach_set_bit (i, x, ({
    if (i < ARRAY_LEN (unix_interface_flag_names))
      s = format (s, "%s", unix_interface_flag_names[i]);
    else
      s = format (s, "unknown %d", i);
    if (x >> (i + 1))
      s = format (s, ", ");
  }));
  return s;
}

typedef struct {
  u16 ar_hrd;			/* format of hardware address	*/
  u16 ar_pro;			/* format of protocol address	*/
  u8  ar_hln;			/* length of hardware address	*/
  u8  ar_pln;			/* length of protocol address	*/
  u16 ar_op;			/* ARP opcode (command)		*/
  u8  ar_sha[6];		/* sender hardware address	*/
  u8  ar_spa[4];		/* sender IP address		*/
  u8  ar_tha[6];		/* target hardware address	*/
  u8  ar_tpa[4];		/* target IP address		*/
} arp_ether_ip4_t;

u8 * format_arp_packet (u8 * s, va_list * args)
{
  arp_ether_ip4_t * a = va_arg (*args, arp_ether_ip4_t *);
  char * op = "unknown";

  if (a->ar_pro != ETH_P_IP ||
      a->ar_hrd != ARPHRD_ETHER)
    return s;

  switch (a->ar_op)
    {
#define _(f) case ARPOP_##f: op = #f; break;
      _ (REQUEST);
      _ (REPLY);
      _ (RREQUEST);
      _ (RREPLY);
#undef _
    }

  s = format (s, "%s %U %U -> %U %U",
	      op,
	      format_network_address, AF_INET,   a->ar_spa,
	      format_network_address, AF_UNSPEC, a->ar_sha,
	      format_network_address, AF_INET,   a->ar_tpa,
	      format_network_address, AF_UNSPEC, a->ar_tha);
  return s;
}

u8 * format_ethernet_proto (u8 * s, va_list * args)
{
  uword type = va_arg (*args, uword);
  char * t = 0;

  switch (type)
    {
    case 0: t = "BPDU"; break;
#define _(f) case ETH_P_##f: t = #f; break;
      _ (LOOP);
      _ (PUP);
#ifdef ETH_P_PUPAT
      _ (PUPAT);
#endif
      _ (IP);
      _ (X25);
      _ (ARP);
      _ (BPQ);
#ifdef ETH_P_PUPAT
      _ (IEEEPUP);
      _ (IEEEPUPAT);
#endif
      _ (DEC);
      _ (DNA_DL);
      _ (DNA_RC);
      _ (DNA_RT);
      _ (LAT);
      _ (DIAG);
      _ (CUST);
      _ (SCA);
      _ (RARP);
      _ (ATALK);
      _ (AARP);
      _ (IPX);
      _ (IPV6);
#ifdef ETH_P_PPP_DISC
      _ (PPP_DISC);
      _ (PPP_SES);
#endif
#ifdef ETH_P_ATMMPOA
      _ (ATMMPOA);
      _ (ATMFATE);
#endif
      _ (802_3);
      _ (AX25);
      _ (ALL);
      _ (802_2);
      _ (SNAP);
      _ (DDCMP);
      _ (WAN_PPP);
      _ (PPP_MP);
      _ (LOCALTALK);
      _ (PPPTALK);
      _ (TR_802_2);
      _ (MOBITEX);
      _ (CONTROL);
      _ (IRDA);
#ifdef ETH_P_ECONET
      _ (ECONET);
#endif
#undef _
    }

  if (t)
    vec_add (s, t, strlen (t));
  else
    s = format (s, "ether-type 0x%x", type);
  return s;
}

u8 * format_ethernet_packet (u8 * s, va_list * args)
{
  struct ethhdr * h = va_arg (*args, struct ethhdr *);
  uword proto = h->h_proto;
  u8 * payload = (void *) (h + 1);
  u32 indent;

  /* Check for 802.2/802.3 encapsulation. */
  if (proto < ETH_DATA_LEN)
    {
      typedef struct {
	u8 dsap, ssap, control;
	u8 orig_code[3];
	u16 proto;
      } ethhdr_802_t;
      ethhdr_802_t * h1 = (void *) (h + 1);
      proto = h1->proto;
      payload = (void *) (h1 + 1);
    }

  indent = format_get_indent (s);

  s = format (s, "%U: %U -> %U",
	      format_ethernet_proto, proto,
	      format_network_address, AF_UNSPEC, h->h_source,
	      format_network_address, AF_UNSPEC, h->h_dest);

  switch (proto)
    {
    case ETH_P_ARP:
      s = format (s, "\n%U%U",
		  format_white_space, indent,
		  format_arp_packet, payload);
      break;
    }

  return s;
}

#ifndef __KERNEL__
u8 * format_hostname (u8 * s, va_list * args)
{
  char buffer[1024];
  char * b = buffer;
  if (gethostname (b, sizeof (buffer)) < 0)
    b = "noname";
  return format (s, "%s", b);
}
#endif

#ifndef __KERNEL__
u8 * format_timeval (u8 * s, va_list * args)
{
  char * fmt = va_arg (*args, char *);
  struct timeval * tv = va_arg (*args, struct timeval *);
  struct tm * tm;
  word msec;
  char * f, c;

  if (! fmt)
    fmt = "y/m/d H:M:S:F";

  if (! tv)
    {
      static struct timeval now;
      gettimeofday (&now, 0);
      tv = &now;
    }

  msec = flt_round_nearest (1e-3 * tv->tv_usec);
  if (msec >= 1000)
    { msec = 0; tv->tv_sec++; }

  {
    time_t t = tv->tv_sec;
    tm = localtime (&t);
  }

  for (f = fmt; *f; f++)
    {
      uword what;
      char * what_fmt = "%d";

      switch (c = *f)
	{
	default:
	  vec_add1 (s, c);
	  continue;

	case 'y':
	  what = 1900 + tm->tm_year;
	  what_fmt = "%4d";
	  break;
	case 'm':
	  what = tm->tm_mon + 1;
	  what_fmt = "%02d";
	  break;
	case 'd':
	  what = tm->tm_mday;
	  what_fmt = "%02d";
	  break;
	case 'H':
	  what = tm->tm_hour;
	  what_fmt = "%02d";
	  break;
	case 'M':
	  what = tm->tm_min;
	  what_fmt = "%02d";
	  break;
	case 'S':
	  what = tm->tm_sec;
	  what_fmt = "%02d";
	  break;
	case 'F':
	  what = msec;
	  what_fmt = "%03d";
	  break;
	}

      s = format (s, what_fmt, what);
    }

  return s;
}
#endif

u8 * format_time_float (u8 * s, va_list * args)
{
  u8 * fmt = va_arg (*args, u8 *);
  f64 t = va_arg (*args, f64);
  struct timeval tv;
  if (t <= 0)
    t = unix_time_now ();
  tv.tv_sec = t;
  tv.tv_usec = 1e6*(t - tv.tv_sec);
  return format (s, "%U", format_timeval, fmt, &tv);
}

u8 * format_signal (u8 * s, va_list * args)
{
  uword signum = va_arg (*args, uword);
  char * t = 0;
  switch (signum)
    {
#define _(x) case x: t = #x; break;
      _ (SIGHUP);
      _ (SIGINT);
      _ (SIGQUIT);
      _ (SIGILL);
      _ (SIGTRAP);
      _ (SIGABRT);
      _ (SIGBUS);
      _ (SIGFPE);
      _ (SIGKILL);
      _ (SIGUSR1);
      _ (SIGSEGV);
      _ (SIGUSR2);
      _ (SIGPIPE);
      _ (SIGALRM);
      _ (SIGTERM);
#ifdef SIGSTKFLT
      _ (SIGSTKFLT);
#endif
      _ (SIGCHLD);
      _ (SIGCONT);
      _ (SIGSTOP);
      _ (SIGTSTP);
      _ (SIGTTIN);
      _ (SIGTTOU);
      _ (SIGURG);
      _ (SIGXCPU);
      _ (SIGXFSZ);
      _ (SIGVTALRM);
      _ (SIGPROF);
      _ (SIGWINCH);
      _ (SIGIO);
      _ (SIGPWR);
#ifdef SIGSYS
      _ (SIGSYS);
#endif
#undef _
    default:
      return format (s, "unknown %d", signum);
    }

  vec_add (s, t, strlen (t));
  return s;
}

u8 * format_ucontext_pc (u8 * s, va_list * args)
{
  ucontext_t * uc __attribute__((unused));
  unsigned long * regs = 0;
  uword reg_no = 0;

  uc = va_arg (*args, ucontext_t *);

#if defined (powerpc)
  regs = &uc->uc_mcontext.uc_regs->gregs[0];
#elif defined (powerpc64)
  regs = &uc->uc_mcontext.uc_regs->gp_regs[0];
#elif defined (i386) || defined (__x86_64__)
  regs = (void *) &uc->uc_mcontext.gregs[0];
#endif

#if defined (powerpc) || defined (powerpc64)
  reg_no = PT_NIP;
#elif defined (i386)
  reg_no = REG_EIP;
#elif defined (__x86_64__)
  reg_no = REG_RIP;
#else
  reg_no = 0;
  regs = 0;
#endif

  if (! regs)
    return format (s, "unsupported");
  else
    return format (s, "%p", regs[reg_no]);
}

uword
unformat_unix_gid (unformat_input_t * input, va_list * args)
{
  gid_t *gid = va_arg (*args, gid_t *);
  struct group *grp = 0;
  int r;
  u8 *s;

  if (unformat (input, "%d", &r))
    {
      grp = getgrgid (r);
    }
  else if (unformat (input, "%s", &s))
    {
      grp = getgrnam ((char *) s);
      vec_free (s);
    }
  if (grp)
    {
      *gid = grp->gr_gid;
      return 1;
    }
  return 0;
}

#define MAX_NUMNODES 16
u8 *
format_page_map (u8 * s, va_list * args)
{
  uword va = va_arg (*args, uword);
  uword size = va_arg (*args, uword);
  uword page_size = clib_mem_get_page_size ();
  u32 indent = format_get_indent (s);
  uword n_pages = size / page_size;
  uword pages_per_numa[MAX_NUMNODES] = { 0 };
  uword pages_not_mapped = 0;
  uword pages_unknown = 0;
  int *status = 0;
  void **ptr = 0;
  int i;

  s = format (s, "virtual memory start 0x%llx, size %lluk, %u pages, "
	      "page size %uk", va, size / 1024, n_pages, page_size / 1024);

  vec_validate (status, n_pages - 1);
  vec_validate (ptr, n_pages - 1);

  for (i = 0; i < n_pages; i++)
    ptr[i] = uword_to_pointer (va + i * page_size, void *);

  if (move_pages (0, n_pages, ptr, 0, status, 0) != 0)
    {
      s = format (s, "\n%Upage information not available (errno %u)",
		  format_white_space, indent + 2, errno);
      goto done;
    }

  for (i = 0; i < n_pages; i++)
    {
      if (status[i] >= 0 && status[i] < MAX_NUMNODES)
	pages_per_numa[status[i]]++;
      else if (status[i] == -EFAULT)
	pages_not_mapped++;
      else
	pages_unknown++;
    }

  for (i = 0; i < MAX_NUMNODES; i++)
    if (pages_per_numa[i])
      s = format (s, "\n%Unuma %u: %d pages, %luk", format_white_space,
		  indent + 2, i, pages_per_numa[i], pages_per_numa[i] *
		  page_size / 1024);

  s = format (s, "\n%Unot mapped: %u pages, %luk", format_white_space,
	      indent + 2, pages_not_mapped, pages_not_mapped *
	      page_size / 1024);

  if (pages_unknown)
    s = format (s, "\n%Uunknown: %u pages, %luk", format_white_space,
		indent + 2, pages_unknown, pages_unknown * page_size / 1024);

done:
  vec_free (status);
  vec_free (ptr);
  return s;
}

#endif /* __KERNEL__ */