summaryrefslogtreecommitdiffstats
path: root/src/vnet/lisp-gpe/rfc.txt
blob: 5e3da150c7085c4dd7e77fbc4bd64fd081d776d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
Network Working Group                                           D. Lewis
Internet-Draft                                       Cisco Systems, Inc.
Intended status: Informational                                P. Agarwal
Expires: January 5, 2015                                        Broadcom
                                                              L. Kreeger
                                                                F. Maino
                                                                P. Quinn
                                                                M. Smith
                                                                N. Yadav
                                                     Cisco Systems, Inc.
                                                            July 4, 2014


                    LISP Generic Protocol Extension
                      draft-lewis-lisp-gpe-02.txt

Abstract

   This draft describes extending the Locator/ID Separation Protocol
   (LISP) [RFC6830], via changes to the LISP header, with three new
   capabilities: support for multi-protocol encapsulation, operations,
   administration and management (OAM) signaling, and explicit
   versioning.

Status of this Memo

   This Internet-Draft is submitted in full conformance with the
   provisions of BCP 78 and BCP 79.

   Internet-Drafts are working documents of the Internet Engineering
   Task Force (IETF).  Note that other groups may also distribute
   working documents as Internet-Drafts.  The list of current Internet-
   Drafts is at http://datatracker.ietf.org/drafts/current/.

   Internet-Drafts are draft documents valid for a maximum of six months
   and may be updated, replaced, or obsoleted by other documents at any
   time.  It is inappropriate to use Internet-Drafts as reference
   material or to cite them other than as "work in progress."

   This Internet-Draft will expire on January 5, 2015.

Copyright Notice

   Copyright (c) 2014 IETF Trust and the persons identified as the
   document authors.  All rights reserved.

   This document is subject to BCP 78 and the IETF Trust's Legal
   Provisions Relating to IETF Documents



Lewis, et al.            Expires January 5, 2015                [Page 1]

Internet-Draft       LISP Generic Protocol Extension           July 2014


   (http://trustee.ietf.org/license-info) in effect on the date of
   publication of this document.  Please review these documents
   carefully, as they describe your rights and restrictions with respect
   to this document.  Code Components extracted from this document must
   include Simplified BSD License text as described in Section 4.e of
   the Trust Legal Provisions and are provided without warranty as
   described in the Simplified BSD License.


Table of Contents

   1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  3
   2.  LISP Header Without Protocol Extensions  . . . . . . . . . . .  4
   3.  Generic Protocol Extension for LISP (LISP-gpe) . . . . . . . .  5
     3.1.  Multi Protocol Support . . . . . . . . . . . . . . . . . .  5
     3.2.  OAM Support  . . . . . . . . . . . . . . . . . . . . . . .  6
     3.3.  Version Bits . . . . . . . . . . . . . . . . . . . . . . .  6
   4.  Backward Compatibility . . . . . . . . . . . . . . . . . . . .  8
     4.1.  LISP-gpe Routers to (legacy) LISP Routers  . . . . . . . .  8
     4.2.  (legacy) LISP Routers to LISP-gpe Routers  . . . . . . . .  8
     4.3.  Type of Service  . . . . . . . . . . . . . . . . . . . . .  8
     4.4.  VLAN Identifier (VID)  . . . . . . . . . . . . . . . . . .  8
   5.  LISP-gpe Examples  . . . . . . . . . . . . . . . . . . . . . .  9
   6.  Security Considerations  . . . . . . . . . . . . . . . . . . . 11
   7.  Acknowledgments  . . . . . . . . . . . . . . . . . . . . . . . 12
   8.  IANA Considerations  . . . . . . . . . . . . . . . . . . . . . 13
   9.  References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
     9.1.  Normative References . . . . . . . . . . . . . . . . . . . 14
     9.2.  Informative References . . . . . . . . . . . . . . . . . . 14
   Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15





















Lewis, et al.            Expires January 5, 2015                [Page 2]

Internet-Draft       LISP Generic Protocol Extension           July 2014


1.  Introduction

   LISP [RFC6830] defines an encapsulation format that carries IPv4 or
   IPv6 (henceforth referred to as IP) packets in a LISP header and
   outer UDP/IP transport.

   The LISP header does not specify the protocol being encapsulated and
   therefore is currently limited to encapsulating only IP packet
   payloads.  Other protocols, most notably VXLAN [VXLAN] (which defines
   a similar header format to LISP), are used to encapsulate L2
   protocols such as Ethernet.  LISP [RFC6830] can be extended to
   indicate the inner protocol, enabling the encapsulation of Ethernet,
   IP or any other desired protocol all the while ensuring compatibility
   with existing LISP [RFC6830] deployments.

   As LISP is deployed, there's also the need to provide increased
   visibility and diagnostic capabilities within the overlay.

   This document describes extending LISP ([RFC6830]) via the following
   changes:

   Next Protocol Bit (P bit):  A reserved flag bit is allocated, and set
      in the LISP-gpe header to indicate that a next protocol field is
      present.

   OAM Flag Bit (O bit):  A reserved flag bit is allocated, and set in
      the LISP-gpe header, to indicate that the packet is an OAM packet.

   Version:  Two reserved bits are allocated, and set in the LISP-gpe
      header, to indicate LISP-gpe protocol version.

   Next protocol:  An 8 bit next protocol field is present in the LISP-
      gpe header.


















Lewis, et al.            Expires January 5, 2015                [Page 3]

Internet-Draft       LISP Generic Protocol Extension           July 2014


2.  LISP Header Without Protocol Extensions

   As described in the introduction, the LISP header has no protocol
   identifier that indicates the type of payload being carried by LISP.
   Because of this, LISP is limited to an IP payload.  Furthermore, the
   LISP header has no mechanism to signal OAM packets.

   The LISP header contains flags (some defined, some reserved), a
   Nonce/Map-version field and an instance ID/Locator-status-bit field.
   The flags provide flexibility to define how the reserved bits can be
   used to change the definition of the LISP header.


   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|flags|            Nonce/Map-Version                  |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+


                           Figure 1: LISP Header






























Lewis, et al.            Expires January 5, 2015                [Page 4]

Internet-Draft       LISP Generic Protocol Extension           July 2014


3.  Generic Protocol Extension for LISP (LISP-gpe)

3.1.  Multi Protocol Support

   This draft defines the following changes to the LISP header in order
   to support multi-protocol encapsulation.

   P Bit:  Flag bit 5 is defined as the Next Protocol bit.  The P bit
      MUST be set to 1 to indicate the presence of the 8 bit next
      protocol field.

      P = 0 indicates that the payload MUST conform to LISP as defined
      in [RFC6830].

      Flag bit 5 was chosen as the P bit because this flag bit is
      currently unallocated in LISP [RFC6830].

   Next Protocol Field:  The lower 8 bits of the first word are used to
      carry a next protocol.  This next protocol field contains the
      protocol of the encapsulated payload packet.

      LISP [RFC6830] uses the lower 16 bits of the first word for either
      a nonce, an echo-nonce ([RFC6830]) or to support map-versioning
      ([RFC6834]).  These are all optional capabilities that are
      indicated by setting the N, E, and the V bit respectively.

      To maintain the desired data plane compatibility, when the P bit
      is set, the N, E, and V bits MUST be set to zero.

   A new protocol registry will be requested from IANA for the Next
   Protocol field.  This draft defines the following Next Protocol
   values:

      0x1 : IPv4

      0x2 : IPv6

      0x3 : Ethernet

      0x4: Network Service Header











Lewis, et al.            Expires January 5, 2015                [Page 5]

Internet-Draft       LISP Generic Protocol Extension           July 2014


    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|P|R|R|      Reserved                 | Next Protocol |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                  Figure 2: LISP-gpe Next Protocol (P=1)

3.2.  OAM Support

   Flag bit 7 is defined as the O bit.  When the O bit is set to 1, the
   packet is an OAM packet and OAM processing MUST occur.  The OAM
   protocol details are out of scope for this document.  As with the
   P-bit, bit 7 is currently a reserved flag in [RFC6830].




    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|P|R|O|      Reserved                 | Next Protocol |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                     Figure 3: LISP-gpe OAM bit (P=1)

3.3.  Version Bits

   LISP-gpe bits8 and 9 are defined as version bits.  The version field
   is used to ensure backward compatibility going forward with future
   LISP-gpe updates.

   The initial version for LISP-gpe is 0.










Lewis, et al.            Expires January 5, 2015                [Page 6]

Internet-Draft       LISP Generic Protocol Extension           July 2014


    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|P|R|O|Ver|      Reserved             | Next Protocol |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                   Figure 4: LISP-gpe Version bits (P=1)








































Lewis, et al.            Expires January 5, 2015                [Page 7]

Internet-Draft       LISP Generic Protocol Extension           July 2014


4.  Backward Compatibility

   Undefined (in RFC6830) flag bits 5 and 7, LISP-gpe P and O bits, were
   selected to ensure compatibility with existing LISP [RFC6830]
   deployments.

   Similarly, using P = 0 to indicate that the format of the header and
   payload conforms to [RFC6830] ensures compatibility with existing
   LISP hardware forwarding platforms.

4.1.  LISP-gpe Routers to (legacy) LISP Routers

   A LISP-gpe router MUST not encapsulate non-IP packet nor OAM packets
   to a LISP router.  A method for determining the capabilities of a
   LISP router (gpe or "legacy") is out of the scope of this draft.

   When encapsulating IP packets to a LISP router the P bit SHOULD be
   set to 1 and the UDP port MUST be set to 4341.  OAM bit MUST be set
   to 0.  The Next Protocol field SHOULD be 0x1 (IPv4) or 0x2 (IPv6).
   The (legacy) LISP router will ignore the P bit and the protocol type
   field.  The (legacy) LISP router will treat the packet as a LISP
   packet and inspect the first nibble of the payload to determine the
   IP version.

   When the P bit is set, the N, E, and V bits MUST be set to zero.  The
   receiving (legacy) LISP router will ignore N, E and V bits, when the
   P bit is set.

4.2.  (legacy) LISP Routers to LISP-gpe Routers

   When a LISP-gpe router receives a packet from a (legacy) LISP router,
   the P bit MUST not be set and the UDP port MUST be 4341.  The payload
   MUST be IP, and the LISP-gpe router will inspect the first nibble of
   the payload to determine IP version.

4.3.  Type of Service

   When a LISP-gpe router performs Ethernet encapsulation, the inner
   802.1Q [IEEE8021Q] priority code point (PCP) field MAY be mapped from
   the encapsulated frame to the Type of Service field in the outer IPv4
   header, or in the case of IPv6 the 'Traffic Class' field.

4.4.  VLAN Identifier (VID)

   When a LISP-gpe router performs Ethernet encapsulation, the inner
   header 802.1Q [IEEE8021Q] VLAN Identifier (VID) MAY be mapped to, or
   used to determine the LISP Instance ID field.




Lewis, et al.            Expires January 5, 2015                [Page 8]

Internet-Draft       LISP Generic Protocol Extension           July 2014


5.  LISP-gpe Examples

   This section provides two examples of IP protocols, and one example
   of Ethernet encapsulated LISP-gpe using the generic extension
   described in this document.



    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|1|0|0|0|   Reserved                  |   NP = IPv4   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |               Original IPv4 Packet                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                        Figure 5: IPv4 and LISP-gpe




    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|1|0|0|0|   Reserved                  |   NP = IPv6   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |               Original IPv6 Packet                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                        Figure 6: IPv6 and LISP-gpe













Lewis, et al.            Expires January 5, 2015                [Page 9]

Internet-Draft       LISP Generic Protocol Extension           July 2014


    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |N|L|E|V|I|1|0|0|0|   Reserved                  | NP = Ethernet |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Instance ID/Locator-Status-Bits               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |               Original Ethernet Frame                         |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



                      Figure 7: Ethernet and LISP-gpe






































Lewis, et al.            Expires January 5, 2015               [Page 10]

Internet-Draft       LISP Generic Protocol Extension           July 2014


6.  Security Considerations

   LISP-gpe security considerations are similar to the LISP security
   considerations documented at length in LISP [RFC6830].  With LISP-
   gpe, issues such as dataplane spoofing, flooding, and traffic
   redirection are dependent on the particular protocol payload
   encapsulated.












































Lewis, et al.            Expires January 5, 2015               [Page 11]

Internet-Draft       LISP Generic Protocol Extension           July 2014


7.  Acknowledgments

   A special thank you goes to Dino Farinacci for his guidance and
   detailed review.















































Lewis, et al.            Expires January 5, 2015               [Page 12]

Internet-Draft       LISP Generic Protocol Extension           July 2014


8.  IANA Considerations

   IANA is requested to set up a registry of "Next Protocol".  These are
   8-bit values.  Next Protocol values 0, 1, 2, 3 and 4 are defined in
   this draft.  New values are assigned via Standards Action [RFC5226].

              +---------------+-------------+---------------+
              | Next Protocol | Description | Reference     |
              +---------------+-------------+---------------+
              | 0             | Reserved    | This document |
              |               |             |               |
              | 1             | IPv4        | This document |
              |               |             |               |
              | 2             | IPv6        | This document |
              |               |             |               |
              | 3             | Ethernet    | This document |
              |               |             |               |
              | 4             | NSH         | This document |
              |               |             |               |
              | 5..253        | Unassigned  |               |
              +---------------+-------------+---------------+

                                  Table 1

   There are ten bits at the beginning of the LISP-gpe header.  New
   bits are assigned via Standards Action [RFC5226].

   Bits 0-3 - Assigned by LISP [RFC6830] 
   Bit 4 - Instance ID (I bit)
   Bit 5 - Next Protocol (P bit)
   Bit 6 - Reserved
   Bit 7 - OAM (O bit)
   Bits 8-9 - Version


















Lewis, et al.            Expires January 5, 2015               [Page 13]

Internet-Draft       LISP Generic Protocol Extension           July 2014


9.  References

9.1.  Normative References

   [RFC0768]  Postel, J., "User Datagram Protocol", STD 6, RFC 768,
              August 1980.

   [RFC0791]  Postel, J., "Internet Protocol", STD 5, RFC 791,
              September 1981.

   [RFC2119]  Bradner, S., "Key words for use in RFCs to Indicate
              Requirement Levels", BCP 14, RFC 2119, March 1997.

   [RFC5226]  Narten, T. and H. Alvestrand, "Guidelines for Writing an
              IANA Considerations Section in RFCs", BCP 26, RFC 5226,
              May 2008.

9.2.  Informative References

   [ETYPES]   The IEEE Registration Authority, "IEEE 802 Numbers", 2012,
              <http://www.iana.org/assignments/ieee-802-numbers/
              ieee-802-numbers.xml>.

   [IEEE8021Q]
              The IEEE Computer Society, "Media Access Control (MAC)
              Bridges and Virtual Bridge Local Area Networks", August
              2012, <http://standards.ieee.org/getieee802/download/
              802.1Q-2011.pdf>.

   [RFC1700]  Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
              October 1994.

   [RFC6830]  Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
              Locator/ID Separation Protocol (LISP)", RFC 6830,
              January 2013.

   [RFC6834]  Iannone, L., Saucez, D., and O. Bonaventure, "Locator/ID
              Separation Protocol (LISP) Map-Versioning", RFC 6834,
              January 2013.

   [VXLAN]    Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
              L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
              Framework for Overlaying Virtualized Layer 2 Networks over
              Layer 3 Networks", 2013.







Lewis, et al.            Expires January 5, 2015               [Page 14]

Internet-Draft       LISP Generic Protocol Extension           July 2014


Authors' Addresses

   Darrel Lewis
   Cisco Systems, Inc.

   Email: darlewis@cisco.com


   Puneet Agarwal
   Broadcom

   Email: pagarwal@broadcom.com


   Larry Kreeger
   Cisco Systems, Inc.

   Email: kreeger@cisco.com


   Fabio Maino
   Cisco Systems, Inc.

   Email: fmaino@cisco.com


   Paul Quinn
   Cisco Systems, Inc.

   Email: paulq@cisco.com


   Michael Smith
   Cisco Systems, Inc.

   Email: michsmit@cisco.com


   Navindra Yadav
   Cisco Systems, Inc.

   Email: nyadav@cisco.com
2672 } /* Name.Tag */ .highlight .nv { color: #f8f8f2 } /* Name.Variable */ .highlight .ow { color: #f92672 } /* Operator.Word */ .highlight .w { color: #f8f8f2 } /* Text.Whitespace */ .highlight .mb { color: #ae81ff } /* Literal.Number.Bin */ .highlight .mf { color: #ae81ff } /* Literal.Number.Float */ .highlight .mh { color: #ae81ff } /* Literal.Number.Hex */ .highlight .mi { color: #ae81ff } /* Literal.Number.Integer */ .highlight .mo { color: #ae81ff } /* Literal.Number.Oct */ .highlight .sa { color: #e6db74 } /* Literal.String.Affix */ .highlight .sb { color: #e6db74 } /* Literal.String.Backtick */ .highlight .sc { color: #e6db74 } /* Literal.String.Char */ .highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */ .highlight .sd { color: #e6db74 } /* Literal.String.Doc */ .highlight .s2 { color: #e6db74 } /* Literal.String.Double */ .highlight .se { color: #ae81ff } /* Literal.String.Escape */ .highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */ .highlight .si { color: #e6db74 } /* Literal.String.Interpol */ .highlight .sx { color: #e6db74 } /* Literal.String.Other */ .highlight .sr { color: #e6db74 } /* Literal.String.Regex */ .highlight .s1 { color: #e6db74 } /* Literal.String.Single */ .highlight .ss { color: #e6db74 } /* Literal.String.Symbol */ .highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #a6e22e } /* Name.Function.Magic */ .highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */ .highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */ .highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */ .highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */ .highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */ } @media (prefers-color-scheme: light) { .highlight .hll { background-color: #ffffcc } .highlight .c { color: #888888 } /* Comment */ .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ .highlight .k { color: #008800; font-weight: bold } /* Keyword */ .highlight .ch { color: #888888 } /* Comment.Hashbang */ .highlight .cm { color: #888888 } /* Comment.Multiline */ .highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */ .highlight .cpf { color: #888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2018 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file
 * @brief NAT44 endpoint-dependent outside to inside network translation
 */

#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/fib/ip4_fib.h>
#include <vnet/udp/udp_local.h>
#include <vppinfra/error.h>
#include <nat/nat.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat_inlines.h>
#include <nat/nat44/inlines.h>
#include <nat/lib/nat_syslog.h>
#include <nat/nat_ha.h>
#include <nat/nat44/ed_inlines.h>

static char *nat_out2in_ed_error_strings[] = {
#define _(sym,string) string,
  foreach_nat_out2in_ed_error
#undef _
};

typedef struct
{
  u32 sw_if_index;
  u32 next_index;
  u32 session_index;
  u32 is_slow_path;
} nat44_ed_out2in_trace_t;

typedef struct
{
  u16 thread_next;
} nat44_ed_out2in_handoff_trace_t;

static u8 *
format_nat44_ed_out2in_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
  char *tag;

  tag =
    t->is_slow_path ? "NAT44_OUT2IN_ED_SLOW_PATH" :
    "NAT44_OUT2IN_ED_FAST_PATH";

  s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
	      t->sw_if_index, t->next_index, t->session_index);

  return s;
}

static inline u32
icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
			  ip4_header_t * ip0, icmp46_header_t * icmp0,
			  u32 sw_if_index0, u32 rx_fib_index0,
			  vlib_node_runtime_t * node, u32 next0, f64 now,
			  u32 thread_index, snat_session_t ** p_s0)
{
  vlib_main_t *vm = vlib_get_main ();

  next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
		       next0, thread_index, p_s0, 0);
  snat_session_t *s0 = *p_s0;
  if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
    {
      /* Accounting */
      nat44_session_update_counters (s0, now,
				     vlib_buffer_length_in_chain
				     (vm, b0), thread_index);
      /* Per-user LRU list maintenance */
      nat44_session_update_lru (sm, s0, thread_index);
    }
  return next0;
}

#ifndef CLIB_MARCH_VARIANT
int
nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
{
  snat_main_t *sm = &snat_main;
  nat44_is_idle_session_ctx_t *ctx = arg;
  snat_session_t *s;
  u64 sess_timeout_time;
  u8 proto;
  u16 r_port, l_port;
  ip4_address_t *l_addr, *r_addr;
  u32 fib_index;
  clib_bihash_kv_16_8_t ed_kv;
  int i;
  //snat_address_t *a;
  snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
						       ctx->thread_index);

  s = pool_elt_at_index (tsm->sessions, kv->value);
  sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
  if (ctx->now >= sess_timeout_time)
    {
      l_addr = &s->in2out.addr;
      r_addr = &s->ext_host_addr;
      fib_index = s->in2out.fib_index;
      if (snat_is_unk_proto_session (s))
	{
	  proto = s->in2out.port;
	  r_port = 0;
	  l_port = 0;
	}
      else
	{
	  proto = nat_proto_to_ip_proto (s->nat_proto);
	  l_port = s->in2out.port;
	  r_port = s->ext_host_port;
	}
      if (is_twice_nat_session (s))
	{
	  r_addr = &s->ext_host_nat_addr;
	  r_port = s->ext_host_nat_port;
	}
      init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
      if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
	nat_elog_warn ("in2out_ed key del failed");

      if (snat_is_unk_proto_session (s))
	goto delete;

      nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
					  s->in2out.addr.as_u32,
					  s->out2in.addr.as_u32,
					  s->nat_proto,
					  s->in2out.port,
					  s->out2in.port,
					  s->in2out.fib_index);

      nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
			     &s->in2out.addr, s->in2out.port,
			     &s->ext_host_nat_addr, s->ext_host_nat_port,
			     &s->out2in.addr, s->out2in.port,
			     &s->ext_host_addr, s->ext_host_port,
			     s->nat_proto, is_twice_nat_session (s));

      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
		   s->ext_host_port, s->nat_proto, s->out2in.fib_index,
		   ctx->thread_index);

      if (is_twice_nat_session (s))
	{
	  for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
	    {
	      // FIXME TODO this is obviously wrong code ... needs fix!
	      //       key.protocol = s->nat_proto;
	      //       key.port = s->ext_host_nat_port;
	      //       a = sm->twice_nat_addresses + i;
	      //       if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
	      //      {
	      //        snat_free_outside_address_and_port (sm->twice_nat_addresses,
	      //                                            ctx->thread_index,
	      //                                            &key);
	      //        break;
	      //      }
	    }
	}

      if (snat_is_session_static (s))
	goto delete;

      snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
					  &s->out2in.addr, s->out2in.port,
					  s->nat_proto);
    delete:
      nat_ed_session_delete (sm, s, ctx->thread_index, 1);
      return 1;
    }

  return 0;
}
#endif

// allocate exact address based on preference
static_always_inline int
nat_alloc_addr_and_port_exact (snat_address_t * a,
			       u32 thread_index,
			       nat_protocol_t proto,
			       ip4_address_t * addr,
			       u16 * port,
			       u16 port_per_thread, u32 snat_thread_index)
{
  u32 portnum;

  switch (proto)
    {
#define _(N, j, n, s) \
    case NAT_PROTOCOL_##N: \
      if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
        { \
          while (1) \
            { \
              portnum = (port_per_thread * \
                snat_thread_index) + \
                snat_random_port(0, port_per_thread - 1) + 1024; \
              if (a->busy_##n##_port_refcounts[portnum]) \
                continue; \
	      --a->busy_##n##_port_refcounts[portnum]; \
              a->busy_##n##_ports_per_thread[thread_index]++; \
              a->busy_##n##_ports++; \
              *addr = a->addr; \
              *port = clib_host_to_net_u16(portnum); \
              return 0; \
            } \
        } \
      break;
      foreach_nat_protocol
#undef _
    default:
      nat_elog_info ("unknown protocol");
      return 1;
    }

  /* Totally out of translations to use... */
  nat_ipfix_logging_addresses_exhausted (thread_index, 0);
  return 1;
}


static snat_session_t *
create_session_for_static_mapping_ed (snat_main_t * sm,
				      vlib_buffer_t * b,
				      ip4_address_t i2o_addr,
				      u16 i2o_port,
				      u32 i2o_fib_index,
				      ip4_address_t o2i_addr,
				      u16 o2i_port,
				      u32 o2i_fib_index,
				      nat_protocol_t nat_proto,
				      vlib_node_runtime_t * node,
				      u32 rx_fib_index,
				      u32 thread_index,
				      twice_nat_type_t twice_nat,
				      lb_nat_type_t lb_nat, f64 now,
				      snat_static_mapping_t * mapping)
{
  snat_session_t *s;
  ip4_header_t *ip;
  udp_header_t *udp;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
  clib_bihash_kv_16_8_t kv;
  nat44_is_idle_session_ctx_t ctx;

  if (PREDICT_FALSE
      (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
    {
      b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
      nat_elog_notice ("maximum sessions exceeded");
      return 0;
    }

  s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
  if (!s)
    {
      b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
      nat_elog_warn ("create NAT session failed");
      return 0;
    }

  ip = vlib_buffer_get_current (b);
  udp = ip4_next_header (ip);

  s->ext_host_addr.as_u32 = ip->src_address.as_u32;
  s->ext_host_port = nat_proto == NAT_PROTOCOL_ICMP ? 0 : udp->src_port;
  s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
  if (lb_nat)
    s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
  if (lb_nat == AFFINITY_LB_NAT)
    s->flags |= SNAT_SESSION_FLAG_AFFINITY;
  s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
  s->out2in.addr = o2i_addr;
  s->out2in.port = o2i_port;
  s->out2in.fib_index = o2i_fib_index;
  s->in2out.addr = i2o_addr;
  s->in2out.port = i2o_port;
  s->in2out.fib_index = i2o_fib_index;
  s->nat_proto = nat_proto;

  /* Add to lookup tables */
  init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port,
	      o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions);
  ctx.now = now;
  ctx.thread_index = thread_index;
  if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv,
					       nat44_o2i_ed_is_idle_session_cb,
					       &ctx))
    nat_elog_notice ("out2in-ed key add failed");

  if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
				 ip->src_address.as_u32 == i2o_addr.as_u32))
    {
      int rc = 0;
      snat_address_t *filter = 0;

      // if exact address is specified use this address
      if (is_exact_address (mapping))
	{
	  snat_address_t *ap;
	  vec_foreach (ap, sm->twice_nat_addresses)
	  {
	    if (mapping->pool_addr.as_u32 == ap->addr.as_u32)
	      {
		filter = ap;
		break;
	      }
	  }
	}

      if (filter)
	{
	  rc = nat_alloc_addr_and_port_exact (filter,
					      thread_index,
					      nat_proto,
					      &s->ext_host_nat_addr,
					      &s->ext_host_nat_port,
					      sm->port_per_thread,
					      tsm->snat_thread_index);
	  s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
	}
      else
	{
	  rc =
	    snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
						 thread_index, nat_proto,
						 &s->ext_host_nat_addr,
						 &s->ext_host_nat_port,
						 sm->port_per_thread,
						 tsm->snat_thread_index);
	}

      if (rc)
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
	  nat_ed_session_delete (sm, s, thread_index, 1);
	  if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0))
	    nat_elog_notice ("out2in-ed key del failed");
	  return 0;
	}

      s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
      init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr,
		  s->ext_host_nat_port, i2o_fib_index, ip->protocol,
		  thread_index, s - tsm->sessions);
    }
  else
    {
      init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr,
		  s->ext_host_port, i2o_fib_index, ip->protocol,
		  thread_index, s - tsm->sessions);
    }
  if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
					       nat44_i2o_ed_is_idle_session_cb,
					       &ctx))
    nat_elog_notice ("in2out-ed key add failed");

  nat_ipfix_logging_nat44_ses_create (thread_index,
				      s->in2out.addr.as_u32,
				      s->out2in.addr.as_u32,
				      s->nat_proto,
				      s->in2out.port,
				      s->out2in.port, s->in2out.fib_index);

  nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
			 &s->in2out.addr, s->in2out.port,
			 &s->ext_host_nat_addr, s->ext_host_nat_port,
			 &s->out2in.addr, s->out2in.port,
			 &s->ext_host_addr, s->ext_host_port,
			 s->nat_proto, is_twice_nat_session (s));

  nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
	       s->out2in.port, &s->ext_host_addr, s->ext_host_port,
	       &s->ext_host_nat_addr, s->ext_host_nat_port,
	       s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);

  per_vrf_sessions_register_session (s, thread_index);

  return s;
}

static int
next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port,
	      u16 dst_port, u32 thread_index, u32 rx_fib_index)
{
  clib_bihash_kv_16_8_t kv, value;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];

  init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
	     rx_fib_index, ip->protocol);
  if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
    return 1;

  return 0;
}

static void
create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
		       u32 rx_fib_index, u32 thread_index)
{
  clib_bihash_kv_16_8_t kv, value;
  udp_header_t *udp;
  snat_session_t *s = 0;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
  vlib_main_t *vm = vlib_get_main ();
  f64 now = vlib_time_now (vm);
  u16 l_port, r_port;

  if (ip->protocol == IP_PROTOCOL_ICMP)
    {
      if (get_icmp_o2i_ed_key
	  (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv))
	return;
    }
  else
    {
      if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
	{
	  udp = ip4_next_header (ip);
	  l_port = udp->dst_port;
	  r_port = udp->src_port;
	}
      else
	{
	  l_port = 0;
	  r_port = 0;
	}
      init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port,
		 rx_fib_index, ip->protocol);
    }

  if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
    {
      ASSERT (thread_index == ed_value_get_thread_index (&value));
      s =
	pool_elt_at_index (tsm->sessions,
			   ed_value_get_session_index (&value));
    }
  else if (ip->protocol == IP_PROTOCOL_ICMP &&
	   icmp_type_is_error_message
	   (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
    {
      return;
    }
  else
    {
      u32 proto;

      if (PREDICT_FALSE
	  (nat44_ed_maximum_sessions_exceeded
	   (sm, rx_fib_index, thread_index)))
	return;

      s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
      if (!s)
	{
	  nat_elog_warn ("create NAT session failed");
	  return;
	}

      proto = ip_proto_to_nat_proto (ip->protocol);

      s->ext_host_addr = ip->src_address;
      s->ext_host_port = r_port;
      s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
      s->out2in.addr = ip->dst_address;
      s->out2in.port = l_port;
      s->nat_proto = proto;
      if (proto == NAT_PROTOCOL_OTHER)
	{
	  s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
	  s->out2in.port = ip->protocol;
	}
      s->out2in.fib_index = rx_fib_index;
      s->in2out.addr = s->out2in.addr;
      s->in2out.port = s->out2in.port;
      s->in2out.fib_index = s->out2in.fib_index;

      kv.value = s - tsm->sessions;
      if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
	nat_elog_notice ("in2out_ed key add failed");

      per_vrf_sessions_register_session (s, thread_index);
    }

  if (ip->protocol == IP_PROTOCOL_TCP)
    {
      tcp_header_t *tcp = ip4_next_header (ip);
      nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
				       tcp->ack_number, tcp->seq_number,
				       thread_index);
    }

  /* Accounting */
  nat44_session_update_counters (s, now, 0, thread_index);
  /* Per-user LRU list maintenance */
  nat44_session_update_lru (sm, s, thread_index);
}

static_always_inline int
create_bypass_for_fwd_worker (snat_main_t * sm,
			      vlib_buffer_t * b, ip4_header_t * ip,
			      u32 rx_fib_index, u32 thread_index)
{
  ip4_header_t tmp = {
    .src_address = ip->dst_address,
  };
  u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0);

  if (index != thread_index)
    {
      vnet_buffer2 (b)->nat.thread_next = index;
      return 1;
    }

  create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
  return 0;
}

#ifndef CLIB_MARCH_VARIANT
u32
icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
		      u32 thread_index, vlib_buffer_t * b,
		      ip4_header_t * ip, ip4_address_t * addr,
		      u16 * port, u32 * fib_index, nat_protocol_t * proto,
		      void *d, void *e, u8 * dont_translate)
{
  u32 next = ~0, sw_if_index, rx_fib_index;
  clib_bihash_kv_16_8_t kv, value;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
  snat_session_t *s = 0;
  u8 is_addr_only, identity_nat;
  u16 l_port, r_port;
  vlib_main_t *vm = vlib_get_main ();
  ip4_address_t sm_addr;
  u16 sm_port;
  u32 sm_fib_index;
  *dont_translate = 0;
  snat_static_mapping_t *m;

  sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
  rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);

  if (get_icmp_o2i_ed_key
      (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv))
    {
      b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
      next = NAT_NEXT_DROP;
      goto out;
    }

  if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
    {
      if (snat_static_mapping_match
	  (sm, ip->dst_address, l_port, rx_fib_index,
	   ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
	   &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
	{
	  // static mapping not matched
	  if (!sm->forwarding_enabled)
	    {
	      /* Don't NAT packet aimed at the intfc address */
	      if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index,
						    ip->dst_address.as_u32)))
		{
		  *dont_translate = 1;
		}
	      else
		{
		  b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
		  next = NAT_NEXT_DROP;
		}
	    }
	  else
	    {
	      *dont_translate = 1;
	      if (next_src_nat (sm, ip, l_port, r_port,
				thread_index, rx_fib_index))
		{
		  next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
		}
	      else
		{
		  if (sm->num_workers > 1)
		    {
		      if (create_bypass_for_fwd_worker (sm, b, ip,
							rx_fib_index,
							thread_index))
			{
			  next = NAT_NEXT_OUT2IN_ED_HANDOFF;
			}
		    }
		  else
		    {
		      create_bypass_for_fwd (sm, b, ip, rx_fib_index,
					     thread_index);
		    }
		}
	    }
	  goto out;
	}

      if (PREDICT_FALSE
	  (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
	   ICMP4_echo_reply
	   && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
	       ICMP4_echo_request || !is_addr_only)))
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
	  next = NAT_NEXT_DROP;
	  goto out;
	}

      if (PREDICT_FALSE (identity_nat))
	{
	  *dont_translate = 1;
	  goto out;
	}

      /* Create session initiated by host from external network */
      s =
	create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port,
					      sm_fib_index, ip->dst_address,
					      l_port, rx_fib_index, *proto,
					      node, rx_fib_index,
					      thread_index, 0, 0,
					      vlib_time_now (vm), m);
      if (!s)
	next = NAT_NEXT_DROP;
    }
  else
    {
      if (PREDICT_FALSE
	  (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
	   ICMP4_echo_reply
	   && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
	   ICMP4_echo_request
	   && !icmp_type_is_error_message (vnet_buffer (b)->ip.
					   reass.icmp_type_or_tcp_flags)))
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
	  next = NAT_NEXT_DROP;
	  goto out;
	}

      ASSERT (thread_index == ed_value_get_thread_index (&value));
      s =
	pool_elt_at_index (tsm->sessions,
			   ed_value_get_session_index (&value));
    }
out:
  if (s)
    {
      *addr = s->in2out.addr;
      *port = s->in2out.port;
      *fib_index = s->in2out.fib_index;
    }
  if (d)
    *(snat_session_t **) d = s;
  return next;
}
#endif

static snat_session_t *
nat44_ed_out2in_unknown_proto (snat_main_t * sm,
			       vlib_buffer_t * b,
			       ip4_header_t * ip,
			       u32 rx_fib_index,
			       u32 thread_index,
			       f64 now,
			       vlib_main_t * vm, vlib_node_runtime_t * node)
{
  clib_bihash_kv_8_8_t kv, value;
  clib_bihash_kv_16_8_t s_kv, s_value;
  snat_static_mapping_t *m;
  u32 old_addr, new_addr;
  ip_csum_t sum;
  snat_session_t *s;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];

  old_addr = ip->dst_address.as_u32;

  init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index,
	     ip->protocol);

  if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
    {
      ASSERT (thread_index == ed_value_get_thread_index (&s_value));
      s =
	pool_elt_at_index (tsm->sessions,
			   ed_value_get_session_index (&s_value));
      new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
    }
  else
    {
      if (PREDICT_FALSE
	  (nat44_ed_maximum_sessions_exceeded
	   (sm, rx_fib_index, thread_index)))
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
	  nat_elog_notice ("maximum sessions exceeded");
	  return 0;
	}

      init_nat_k (&kv, ip->dst_address, 0, 0, 0);
      if (clib_bihash_search_8_8
	  (&sm->static_mapping_by_external, &kv, &value))
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
	  return 0;
	}

      m = pool_elt_at_index (sm->static_mappings, value.value);

      new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;

      /* Create a new session */
      s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
      if (!s)
	{
	  b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
	  nat_elog_warn ("create NAT session failed");
	  return 0;
	}

      s->ext_host_addr.as_u32 = ip->src_address.as_u32;
      s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
      s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
      s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
      s->out2in.addr.as_u32 = old_addr;
      s->out2in.fib_index = rx_fib_index;
      s->in2out.addr.as_u32 = new_addr;
      s->in2out.fib_index = m->fib_index;
      s->in2out.port = s->out2in.port = ip->protocol;

      /* Add to lookup tables */
      s_kv.value = s - tsm->sessions;
      if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
	nat_elog_notice ("out2in key add failed");

      init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index,
		  ip->protocol, thread_index, s - tsm->sessions);
      if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
	nat_elog_notice ("in2out key add failed");

      per_vrf_sessions_register_session (s, thread_index);
    }

  /* Update IP checksum */
  sum = ip->checksum;
  sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
  ip->checksum = ip_csum_fold (sum);

  vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;

  /* Accounting */
  nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
				 thread_index);
  /* Per-user LRU list maintenance */
  nat44_session_update_lru (sm, s, thread_index);

  return s;
}

static inline uword
nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
					  vlib_node_runtime_t * node,
					  vlib_frame_t * frame,
					  int is_multi_worker)
{
  u32 n_left_from, *from;
  snat_main_t *sm = &snat_main;
  f64 now = vlib_time_now (vm);
  u32 thread_index = vm->thread_index;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];

  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;

  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
  u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
  vlib_get_buffers (vm, from, b, n_left_from);

  while (n_left_from > 0)
    {
      vlib_buffer_t *b0;
      u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
      u16 old_port0, new_port0;
      ip4_header_t *ip0;
      udp_header_t *udp0;
      tcp_header_t *tcp0;
      snat_session_t *s0 = 0;
      clib_bihash_kv_16_8_t kv0, value0;
      ip_csum_t sum0;

      b0 = *b;
      b++;

      /* Prefetch next iteration. */
      if (PREDICT_TRUE (n_left_from >= 2))
	{
	  vlib_buffer_t *p2;

	  p2 = *b;

	  vlib_prefetch_buffer_header (p2, LOAD);

	  CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
	}

      next[0] = vnet_buffer2 (b0)->nat.arc_next;

      vnet_buffer (b0)->snat.flags = 0;
      ip0 = vlib_buffer_get_current (b0);

      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
      rx_fib_index0 =
	fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);

      if (PREDICT_FALSE (ip0->ttl == 1))
	{
	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
	  icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
				       0);
	  next[0] = NAT_NEXT_ICMP_ERROR;
	  goto trace0;
	}

      udp0 = ip4_next_header (ip0);
      tcp0 = (tcp_header_t *) udp0;
      proto0 = ip_proto_to_nat_proto (ip0->protocol);

      if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
	{
	  next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	  goto trace0;
	}

      if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
	{
	  next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	  goto trace0;
	}

      init_ed_k (&kv0, ip0->dst_address,
		 vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
		 vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
		 ip0->protocol);

      /* there is a stashed index in vnet_buffer2 from handoff node,
       * see if we can use it */
      if (is_multi_worker
	  &&
	  PREDICT_TRUE (!pool_is_free_index
			(tsm->sessions,
			 vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index)))
	{
	  s0 = pool_elt_at_index (tsm->sessions,
				  vnet_buffer2 (b0)->
				  nat.ed_out2in_nat_session_index);
	  if (PREDICT_TRUE
	      (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32
	       && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port
	       && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol)
	       && s0->out2in.fib_index == rx_fib_index0
	       && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32
	       && s0->ext_host_port ==
	       vnet_buffer (b0)->ip.reass.l4_src_port))
	    {
	      /* yes, this is the droid we're looking for */
	      goto skip_lookup;
	    }
	}

      // lookup for session
      if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
	{
	  // session does not exist go slow path
	  next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	  goto trace0;
	}
      ASSERT (thread_index == ed_value_get_thread_index (&value0));
      s0 =
	pool_elt_at_index (tsm->sessions,
			   ed_value_get_session_index (&value0));

    skip_lookup:

      if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
	{
	  // session is closed, go slow path
	  nat_free_session_data (sm, s0, thread_index, 0);
	  nat_ed_session_delete (sm, s0, thread_index, 1);
	  next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	  goto trace0;
	}

      if (s0->tcp_closed_timestamp)
	{
	  if (now >= s0->tcp_closed_timestamp)
	    {
	      // session is closed, go slow path, freed in slow path
	      next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	    }
	  else
	    {
	      // session in transitory timeout, drop
	      b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
	      next[0] = NAT_NEXT_DROP;
	    }
	  goto trace0;
	}

      // drop if session expired
      u64 sess_timeout_time;
      sess_timeout_time =
	s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
      if (now >= sess_timeout_time)
	{
	  // session is closed, go slow path
	  nat_free_session_data (sm, s0, thread_index, 0);
	  nat_ed_session_delete (sm, s0, thread_index, 1);
	  next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
	  goto trace0;
	}

      old_addr0 = ip0->dst_address.as_u32;
      new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
      vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;

      sum0 = ip0->checksum;
      sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
			     dst_address);
      if (PREDICT_FALSE (is_twice_nat_session (s0)))
	sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
			       s0->ext_host_nat_addr.as_u32, ip4_header_t,
			       src_address);
      ip0->checksum = ip_csum_fold (sum0);

      old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;

      if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
	{
	  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
	    {
	      new_port0 = udp0->dst_port = s0->in2out.port;
	      sum0 = tcp0->checksum;
	      sum0 =
		ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
				dst_address);
	      sum0 =
		ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
				length);
	      if (is_twice_nat_session (s0))
		{
		  sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
					 s0->ext_host_nat_addr.as_u32,
					 ip4_header_t, dst_address);
		  sum0 =
		    ip_csum_update (sum0,
				    vnet_buffer (b0)->ip.reass.l4_src_port,
				    s0->ext_host_nat_port, ip4_header_t,
				    length);
		  tcp0->src_port = s0->ext_host_nat_port;
		  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
		}
	      tcp0->checksum = ip_csum_fold (sum0);
	    }
	  vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp,
					 thread_index, sw_if_index0, 1);
	  nat44_set_tcp_session_state_o2i (sm, now, s0,
					   vnet_buffer (b0)->ip.
					   reass.icmp_type_or_tcp_flags,
					   vnet_buffer (b0)->ip.
					   reass.tcp_ack_number,
					   vnet_buffer (b0)->ip.
					   reass.tcp_seq_number,
					   thread_index);
	}
      else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
	       && udp0->checksum)
	{
	  new_port0 = udp0->dst_port = s0->in2out.port;
	  sum0 = udp0->checksum;
	  sum0 =
	    ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
			    dst_address);
	  sum0 =
	    ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
	  if (PREDICT_FALSE (is_twice_nat_session (s0)))
	    {
	      sum0 =
		ip_csum_update (sum0, ip0->src_address.as_u32,
				s0->ext_host_nat_addr.as_u32, ip4_header_t,
				dst_address);
	      sum0 =
		ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port,
				s0->ext_host_nat_port, ip4_header_t, length);
	      udp0->src_port = s0->ext_host_nat_port;
	      ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
	    }
	  udp0->checksum = ip_csum_fold (sum0);
	  vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
					 thread_index, sw_if_index0, 1);
	}
      else
	{
	  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
	    {
	      new_port0 = udp0->dst_port = s0->in2out.port;
	      if (PREDICT_FALSE (is_twice_nat_session (s0)))
		{
		  udp0->src_port = s0->ext_host_nat_port;
		  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
		}
	    }
	  vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
					 thread_index, sw_if_index0, 1);
	}

      /* Accounting */
      nat44_session_update_counters (s0, now,
				     vlib_buffer_length_in_chain (vm, b0),
				     thread_index);
      /* Per-user LRU list maintenance */
      nat44_session_update_lru (sm, s0, thread_index);

    trace0:
      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
			 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
	{
	  nat44_ed_out2in_trace_t *t =
	    vlib_add_trace (vm, node, b0, sizeof (*t));
	  t->sw_if_index = sw_if_index0;
	  t->next_index = next[0];
	  t->is_slow_path = 0;

	  if (s0)
	    t->session_index = s0 - tsm->sessions;
	  else
	    t->session_index = ~0;
	}

      if (next[0] == NAT_NEXT_DROP)
	{
	  vlib_increment_simple_counter (&sm->counters.fastpath.
					 out2in_ed.drops, thread_index,
					 sw_if_index0, 1);
	}

      n_left_from--;
      next++;
    }

  vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
			       frame->n_vectors);
  return frame->n_vectors;
}

static inline uword
nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
					  vlib_node_runtime_t * node,
					  vlib_frame_t * frame)
{
  u32 n_left_from, *from;
  snat_main_t *sm = &snat_main;
  f64 now = vlib_time_now (vm);
  u32 thread_index = vm->thread_index;
  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
  snat_static_mapping_t *m;

  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;

  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
  u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
  vlib_get_buffers (vm, from, b, n_left_from);

  while (n_left_from > 0)
    {
      vlib_buffer_t *b0;
      u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
      u16 old_port0, new_port0;
      ip4_header_t *ip0;
      udp_header_t *udp0;
      tcp_header_t *tcp0;
      icmp46_header_t *icmp0;
      snat_session_t *s0 = 0;
      clib_bihash_kv_16_8_t kv0, value0;
      ip_csum_t sum0;
      lb_nat_type_t lb_nat0;
      twice_nat_type_t twice_nat0;
      u8 identity_nat0;
      ip4_address_t sm_addr;
      u16 sm_port;
      u32 sm_fib_index;

      b0 = *b;
      next[0] = vnet_buffer2 (b0)->nat.arc_next;

      vnet_buffer (b0)->snat.flags = 0;
      ip0 = vlib_buffer_get_current (b0);

      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
      rx_fib_index0 =
	fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);

      if (PREDICT_FALSE (ip0->ttl == 1))
	{
	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
	  icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
				       ICMP4_time_exceeded_ttl_exceeded_in_transit,
				       0);
	  next[0] = NAT_NEXT_ICMP_ERROR;
	  goto trace0;
	}

      udp0 = ip4_next_header (ip0);
      tcp0 = (tcp_header_t *) udp0;
      icmp0 = (icmp46_header_t *) udp0;
      proto0 = ip_proto_to_nat_proto (ip0->protocol);

      if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
	{
	  s0 =
	    nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0,
					   thread_index, now, vm, node);
	  if (!sm->forwarding_enabled)
	    {
	      if (!s0)
		next[0] = NAT_NEXT_DROP;
	    }
	  vlib_increment_simple_counter (&sm->counters.slowpath.
					 out2in_ed.other, thread_index,
					 sw_if_index0, 1);
	  goto trace0;
	}

      if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
	{
	  next[0] = icmp_out2in_ed_slow_path
	    (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
	     next[0], now, thread_index, &s0);
	  vlib_increment_simple_counter (&sm->counters.slowpath.
					 out2in_ed.icmp, thread_index,
					 sw_if_index0, 1);
	  goto trace0;
	}

      init_ed_k (&kv0, ip0->dst_address,
		 vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
		 vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
		 ip0->protocol);

      s0 = NULL;
      if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
	{
	  ASSERT (thread_index == ed_value_get_thread_index (&value0));
	  s0 =
	    pool_elt_at_index (tsm->sessions,
			       ed_value_get_session_index (&value0));

	  if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
	    {
	      nat_free_session_data (sm, s0, thread_index, 0);
	      nat_ed_session_delete (sm, s0, thread_index, 1);
	      s0 = NULL;
	    }
	}

      if (!s0)
	{
	  /* Try to match static mapping by external address and port,
	     destination address and port in packet */

	  if (snat_static_mapping_match
	      (sm, ip0->dst_address,
	       vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
	       proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
	       &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
	    {
	      /*
	       * Send DHCP packets to the ipv4 stack, or we won't
	       * be able to use dhcp client on the outside interface
	       */
	      if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
				 && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
				     clib_host_to_net_u16
				     (UDP_DST_PORT_dhcp_to_client))))
		{
		  goto trace0;
		}

	      if (!sm->forwarding_enabled)
		{
		  b0->error =
		    node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
		  next[0] = NAT_NEXT_DROP;
		}
	      else
		{
		  if (next_src_nat
		      (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
		       vnet_buffer (b0)->ip.reass.l4_dst_port,
		       thread_index, rx_fib_index0))
		    {
		      next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
		    }
		  else
		    {
		      if ((sm->num_workers > 1)
			  && create_bypass_for_fwd_worker (sm, b0, ip0,
							   rx_fib_index0,
							   thread_index))
			{
			  next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF;
			}
		      else
			{
			  create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
						 thread_index);
			}
		    }
		}
	      goto trace0;
	    }

	  if (PREDICT_FALSE (identity_nat0))
	    goto trace0;

	  if ((proto0 == NAT_PROTOCOL_TCP)
	      && !tcp_flags_is_init (vnet_buffer (b0)->ip.
				     reass.icmp_type_or_tcp_flags))
	    {
	      b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
	      next[0] = NAT_NEXT_DROP;
	      goto trace0;
	    }

	  /* Create session initiated by host from external network */
	  s0 = create_session_for_static_mapping_ed (sm, b0,
						     sm_addr, sm_port,
						     sm_fib_index,
						     ip0->dst_address,
						     vnet_buffer (b0)->
						     ip.reass.l4_dst_port,
						     rx_fib_index0, proto0,
						     node, rx_fib_index0,
						     thread_index, twice_nat0,
						     lb_nat0, now, m);
	  if (!s0)
	    {
	      next[0] = NAT_NEXT_DROP;
	      goto trace0;
	    }
	}

      old_addr0 = ip0->dst_address.as_u32;
      new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
      vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;

      sum0 = ip0->checksum;
      sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
			     dst_address);
      if (PREDICT_FALSE (is_twice_nat_session (s0)))
	sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
			       s0->ext_host_nat_addr.as_u32, ip4_header_t,
			       src_address);
      ip0->checksum = ip_csum_fold (sum0);

      old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;

      if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
	{
	  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
	    {
	      new_port0 = udp0->dst_port = s0->in2out.port;
	      sum0 = tcp0->checksum;
	      sum0 =
		ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
				dst_address);
	      sum0 =
		ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
				length);
	      if (is_twice_nat_session (s0))
		{
		  sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
					 s0->ext_host_nat_addr.as_u32,
					 ip4_header_t, dst_address);
		  sum0 =
		    ip_csum_update (sum0,
				    vnet_buffer (b0)->ip.reass.l4_src_port,
				    s0->ext_host_nat_port, ip4_header_t,
				    length);
		  tcp0->src_port = s0->ext_host_nat_port;
		  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
		}
	      tcp0->checksum = ip_csum_fold (sum0);
	    }
	  vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp,
					 thread_index, sw_if_index0, 1);
	  nat44_set_tcp_session_state_o2i (sm, now, s0,
					   vnet_buffer (b0)->ip.
					   reass.icmp_type_or_tcp_flags,
					   vnet_buffer (b0)->ip.
					   reass.tcp_ack_number,
					   vnet_buffer (b0)->ip.
					   reass.tcp_seq_number,
					   thread_index);
	}
      else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
	       && udp0->checksum)
	{
	  new_port0 = udp0->dst_port = s0->in2out.port;
	  sum0 = udp0->checksum;
	  sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
				 dst_address);
	  sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
				 length);
	  if (PREDICT_FALSE (is_twice_nat_session (s0)))
	    {
	      sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
				     s0->ext_host_nat_addr.as_u32,
				     ip4_header_t, dst_address);
	      sum0 =
		ip_csum_update (sum0,
				vnet_buffer (b0)->ip.reass.l4_src_port,
				s0->ext_host_nat_port, ip4_header_t, length);
	      udp0->src_port = s0->ext_host_nat_port;
	      ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
	    }
	  udp0->checksum = ip_csum_fold (sum0);
	  vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
					 thread_index, sw_if_index0, 1);
	}
      else
	{
	  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
	    {
	      new_port0 = udp0->dst_port = s0->in2out.port;
	      if (PREDICT_FALSE (is_twice_nat_session (s0)))
		{
		  udp0->src_port = s0->ext_host_nat_port;
		  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
		}
	    }
	  vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
					 thread_index, sw_if_index0, 1);
	}

      /* Accounting */
      nat44_session_update_counters (s0, now,
				     vlib_buffer_length_in_chain (vm, b0),
				     thread_index);
      /* Per-user LRU list maintenance */
      nat44_session_update_lru (sm, s0, thread_index);

    trace0:
      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
			 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
	{
	  nat44_ed_out2in_trace_t *t =
	    vlib_add_trace (vm, node, b0, sizeof (*t));
	  t->sw_if_index = sw_if_index0;
	  t->next_index = next[0];
	  t->is_slow_path = 1;

	  if (s0)
	    t->session_index = s0 - tsm->sessions;
	  else
	    t->session_index = ~0;
	}

      if (next[0] == NAT_NEXT_DROP)
	{
	  vlib_increment_simple_counter (&sm->counters.slowpath.
					 out2in_ed.drops, thread_index,
					 sw_if_index0, 1);
	}

      n_left_from--;
      next++;
      b++;
    }

  vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
			       frame->n_vectors);

  return frame->n_vectors;
}

static inline uword
nat_handoff_node_fn_inline (vlib_main_t * vm,
			    vlib_node_runtime_t * node,
			    vlib_frame_t * frame, u32 fq_index)
{
  u32 n_enq, n_left_from, *from;

  u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices;
  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;

  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;

  vlib_get_buffers (vm, from, b, n_left_from);

  while (n_left_from >= 4)
    {
      if (PREDICT_TRUE (n_left_from >= 8))
	{
	  vlib_prefetch_buffer_header (b[4], LOAD);
	  vlib_prefetch_buffer_header (b[5], LOAD);
	  vlib_prefetch_buffer_header (b[6], LOAD);
	  vlib_prefetch_buffer_header (b[7], LOAD);
	  CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
	  CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
	  CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD);
	  CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD);
	}

      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
      ti[1] = vnet_buffer2 (b[1])->nat.thread_next;
      ti[2] = vnet_buffer2 (b[2])->nat.thread_next;
      ti[3] = vnet_buffer2 (b[3])->nat.thread_next;

      b += 4;
      ti += 4;
      n_left_from -= 4;
    }

  while (n_left_from > 0)
    {
      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;

      b += 1;
      ti += 1;
      n_left_from -= 1;
    }

  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
    {
      u32 i;
      b = bufs;
      ti = thread_indices;

      for (i = 0; i < frame->n_vectors; i++)
	{
	  if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
	    {
	      nat44_ed_out2in_handoff_trace_t *t =
		vlib_add_trace (vm, node, b[0], sizeof (*t));
	      t->thread_next = ti[0];
	      b += 1;
	      ti += 1;
	    }
	  else
	    break;
	}
    }

  n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
					 frame->n_vectors, 1);

  if (n_enq < frame->n_vectors)
    {
      vlib_node_increment_counter (vm, node->node_index,
				   NAT44_HANDOFF_ERROR_CONGESTION_DROP,
				   frame->n_vectors - n_enq);
    }

  return frame->n_vectors;
}

VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
				     vlib_node_runtime_t * node,
				     vlib_frame_t * frame)
{
  if (snat_main.num_workers > 1)
    {
      return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 1);
    }
  else
    {
      return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 0);
    }
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
  .name = "nat44-ed-out2in",
  .vector_size = sizeof (u32),
  .sibling_of = "nat-default",
  .format_trace = format_nat44_ed_out2in_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
  .error_strings = nat_out2in_ed_error_strings,
  .runtime_data_bytes = sizeof (snat_runtime_t),
};
/* *INDENT-ON* */

VLIB_NODE_FN (nat44_ed_out2in_slowpath_node) (vlib_main_t * vm,
					      vlib_node_runtime_t * node,
					      vlib_frame_t * frame)
{
  return nat44_ed_out2in_slow_path_node_fn_inline (vm, node, frame);
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
  .name = "nat44-ed-out2in-slowpath",
  .vector_size = sizeof (u32),
  .sibling_of = "nat-default",
  .format_trace = format_nat44_ed_out2in_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
  .error_strings = nat_out2in_ed_error_strings,
  .runtime_data_bytes = sizeof (snat_runtime_t),
};
/* *INDENT-ON* */

static u8 *
format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  nat44_ed_out2in_handoff_trace_t *t =
    va_arg (*args, nat44_ed_out2in_handoff_trace_t *);
  return format (s, "out2in ed handoff thread_next index %d", t->thread_next);
}

VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm,
					     vlib_node_runtime_t * node,
					     vlib_frame_t * frame)
{
  return nat_handoff_node_fn_inline (vm, node, frame,
				     snat_main.ed_out2in_node_index);
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = {
  .name = "nat44-ed-out2in-handoff",
  .vector_size = sizeof (u32),
  .sibling_of = "nat-default",
  .format_trace = format_nat44_ed_out2in_handoff_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = 0,
};
/* *INDENT-ON* */

static u8 *
format_nat_pre_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
  return format (s, "out2in next_index %d arc_next_index %d", t->next_index,
		 t->arc_next_index);
}

VLIB_NODE_FN (nat_pre_out2in_node) (vlib_main_t * vm,
				    vlib_node_runtime_t * node,
				    vlib_frame_t * frame)
{
  return nat_pre_node_fn_inline (vm, node, frame,
				 NAT_NEXT_OUT2IN_ED_FAST_PATH);
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_pre_out2in_node) = {
  .name = "nat-pre-out2in",
  .vector_size = sizeof (u32),
  .sibling_of = "nat-default",
  .format_trace = format_nat_pre_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = 0,
 };
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */