aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/ioam/ip6/ioam_cache.h
blob: e75deba73f7a98e3f056275e64984e15f12b1cd1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
/*
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __included_ioam_cache_h__
#define __included_ioam_cache_h__

#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ip/ip_packet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/srv6/sr.h>

#include <vppinfra/pool.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
#include <vppinfra/bihash_8_8.h>
#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
#include <vppinfra/tw_timer_16t_2w_512sl.h>
/*
 * ioam_cache.h
 * This header contains routines for caching of ioam header and
 * buffer:
 * 1 - On application facing node: to cache ioam header recvd
 *     in request and reattach in response to provide round
 *     trip path visibility. Since request response matching
 *     is needed works with TCP and relies on (5 tuples,seq no)
 * 2 - On M-Anycast server node: This node replicates requests
 *    towards multiple anycast service nodes serving anycast
 *    IP6 address. It evaluates response and forwards the best
 *    response towards the client of requesting the service.
 *    Again since request-response matching is needed, works
 *    with TCP  and relies on (5 tuples,seq no) for matching.
 *    To do this it caches SYN-ACK responses for a short time to
 *    evaluate multiple responses received before the selected
 *    SYN-ACK response is forwared and others dropped.
 *
 * M-Anycast server cache:
 *   - There is a pool of cache entries per worker thread.
 *   - Cache entry is created when SYN is received expected
 *     number of responses are marked based on number of
 *     SR tunnels for the anycast destination address
 *   - The pool/thread id and pool index are attached in the
 *    message as an ioam option for quick look up.
 *   - When is received SYN-ACK the ioam option containing
 *     thread id + pool index of the cache entry is used to
 *     look up cache entry.
 *   - Cache synchronization:
 *      - This is achieved by cache entry add/del/update all handled
 *        by the same worker/main thread
 *      - Packets from client to threads - syn packets, can be disctributed
 *        based on incoming interface affinity to the cpu core pinned to
 *        the thread or a simple sequence number based distribution
 *        if thread per interface is not scaling
 *      - Response packets from server towards clients - syn-acks, are
 *        forced to the same thread that created the cache entry
 *        using SR and the destination of SR v6 address assigned
 *        to the core/thread. This adderss is sent as an ioam option
 *        in the syn that can be then used on the other side to
 *        populate v6 dst address in the response
 *      - Timeout: timer wheel per thread is used to track the syn-ack wait
 *        time. The timer wheel tick is updated via an input node per thread.
 *
 * Application facing node/Service side cache:
 *  - Single pool of cache entries.
 *  - Cache entry is created when SYN is received. Caches the ioam
 *    header. Hash table entry is created based on 5 tuple and
 *    TCP seq no to pool index
 *  - Response SYN-ACK processed by looking up pool index in hash table
 *    and cache entry in the pool is used to get the ioam header rewrite
 *    string. Entry is freed from pool and hash table after use.
 *  - Locking/Synchronization: Currently this functionality is deployed
 *    with main/single thread only. Hence no locking is used.
 *  - Deployment: A VPP node per application server servicing anycast
 *    address is expected. Locking/synchronization needed when the server
 *    /application facing node is started with multiple worker threads.
 *
 */

/*
 * Application facing server side caching:
 * Cache entry for ioam header
 * Currently caters to TCP and relies on
 * TCP - 5 tuples + seqno to cache and reinsert
 * ioam header b/n TCP request response
 */
typedef struct
{
  /** Required for pool_get_aligned */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  ip6_address_t src_address;
  ip6_address_t dst_address;
  u16 src_port;
  u16 dst_port;
  u8 protocol;
  u32 seq_no;
  ip6_address_t next_hop;
  u16 my_address_offset;
  u8 *ioam_rewrite_string;
} ioam_cache_entry_t;

/*
 * Cache entry for anycast server selection
 * Works for TCP as 5 tuple + sequence number
 * is required for request response matching
 * max_responses expected is set based on number
 *              of SR tunnels for the dst_address
 * Timeout or all response_received = max_responses
 *            will clear the entry
 * buffer_index index of the response msg vlib buffer
 *           that is currently the best response
 */
typedef struct
{
  /** Required for pool_get_aligned */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  u32 pool_id;
  u32 pool_index;
  ip6_address_t src_address;
  ip6_address_t dst_address;
  u16 src_port;
  u16 dst_port;
  u8 protocol;
  u32 seq_no;
  u32 buffer_index;
  ip6_hop_by_hop_header_t *hbh;	//pointer to hbh header in the buffer
  u64 created_at;
  u8 response_received;
  u8 max_responses;
  u32 stop_timer_handle;
  /** Handle returned from tw_start_timer */
  u32 timer_handle;
  /** entry should expire at this clock tick */
  u32 expected_to_expire;
} ioam_cache_ts_entry_t;

/*
 * Per thread tunnel selection cache stats
 */
typedef struct
{
  u64 inuse;
  u64 add_failed;
} ioam_cache_ts_pool_stats_t;

/* Server side: iOAM header caching */
#define MAX_CACHE_ENTRIES 4096
/* M-Anycast: Cache for SR tunnel selection */
#define MAX_CACHE_TS_ENTRIES 1048576

#define IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS (4 * 1024)
#define IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE (2<<20)

typedef struct
{
  /* API message ID base */
  u16 msg_id_base;

  /* Pool of ioam_cache_buffer_t */
  ioam_cache_entry_t *ioam_rewrite_pool;

  /* For steering packets ioam cache entry is followed by
   * SR header. This is the SR rewrite template */
  u8 *sr_rewrite_template;
  /* The current rewrite string being used */
  u8 *rewrite;
  u8 rewrite_pool_index_offset;
  ip6_address_t sr_localsid_cache;

  u64 lookup_table_nbuckets;
  u64 lookup_table_size;
  clib_bihash_8_8_t ioam_rewrite_cache_table;

  /* M-Anycast: Pool of ioam_cache_ts_entry_t per thread */
  ioam_cache_ts_entry_t **ioam_ts_pool;
  ioam_cache_ts_pool_stats_t *ts_stats;
  /** per thread single-wheel */
  tw_timer_wheel_16t_2w_512sl_t *timer_wheels;

  /*
   * Selection criteria: oneway delay: Server to M-Anycast
   * or RTT
   */
  bool criteria_oneway;
  u8 wait_for_responses;
  ip6_address_t sr_localsid_ts;

  /* convenience */
  vlib_main_t *vlib_main;

  uword cache_hbh_slot;
  uword ts_hbh_slot;
  u32 ip6_hbh_pop_node_index;
  u32 error_node_index;
  u32 cleanup_process_node_index;
} ioam_cache_main_t;

extern ioam_cache_main_t ioam_cache_main;

extern vlib_node_registration_t ioam_cache_node;
extern vlib_node_registration_t ioam_cache_ts_node;

/*  Compute flow hash.  We'll use it to select which Sponge to use for this
 *  flow.  And other things.
 *  ip6_compute_flow_hash in ip6.h doesnt locate tcp/udp when
 *  ext headers are present. While it could be made to it will be a
 *  performance hit for ECMP flows.
 *  HEnce this function here, with L4 information directly input
 *  Useful when tcp/udp headers are already located in presence of
 *  ext headers
 */
always_inline u32
ip6_compute_flow_hash_ext (const ip6_header_t * ip,
			   u8 protocol,
			   u16 src_port,
			   u16 dst_port, flow_hash_config_t flow_hash_config)
{
  u64 a, b, c;
  u64 t1, t2;

  t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
  t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;

  t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
  t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;

  a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
  b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
  b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0;

  t1 = src_port;
  t2 = dst_port;

  t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
  t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;

  c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
    ((t1 << 16) | t2) : ((t2 << 16) | t1);

  hash_mix64 (a, b, c);
  return (u32) c;
}


/* 2 new ioam E2E options :
 * 1. HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID: IP6 address
 *                of ioam node that inserted ioam header
 * 2. HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID: Pool id and index
 *                   to look up tunnel select cache entry
 */
#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID 30
#define HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID 31

typedef CLIB_PACKED (struct
		     {
		     ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 reserved[5];
		     ip6_address_t id;
		     }) ioam_e2e_id_option_t;

typedef CLIB_PACKED (struct
		     {
		     ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 pool_id;
		     u32 pool_index;
		     }) ioam_e2e_cache_option_t;

#define IOAM_E2E_ID_OPTION_RND ((sizeof(ioam_e2e_id_option_t) + 7) & ~7)
#define IOAM_E2E_ID_HBH_EXT_LEN (IOAM_E2E_ID_OPTION_RND >> 3)
#define IOAM_E2E_CACHE_OPTION_RND ((sizeof(ioam_e2e_cache_option_t) + 7) & ~7)
#define IOAM_E2E_CACHE_HBH_EXT_LEN (IOAM_E2E_CACHE_OPTION_RND >> 3)

static inline void
ioam_e2e_id_rewrite_handler (ioam_e2e_id_option_t * e2e_option,
			     ip6_address_t * address)
{
  e2e_option->id.as_u64[0] = address->as_u64[0];
  e2e_option->id.as_u64[1] = address->as_u64[1];

}

/* Following functions are for the caching of ioam header
 * to enable reattaching it for a complete request-response
 * message exchange */
inline static void
ioam_cache_entry_free (ioam_cache_entry_t * entry)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  if (entry)
    {
      vec_free (entry->ioam_rewrite_string);
      clib_memset (entry, 0, sizeof (*entry));
      pool_put (cm->ioam_rewrite_pool, entry);
    }
}

inline static ioam_cache_entry_t *
ioam_cache_entry_cleanup (u32 pool_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_entry_t *entry = 0;

  entry = pool_elt_at_index (cm->ioam_rewrite_pool, pool_index);
  ioam_cache_entry_free (entry);
  return (0);
}

inline static ioam_cache_entry_t *
ioam_cache_lookup (ip6_header_t * ip0, u16 src_port, u16 dst_port, u32 seq_no)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  u32 flow_hash = ip6_compute_flow_hash_ext (ip0, ip0->protocol,
					     src_port, dst_port,
					     IP_FLOW_HASH_DEFAULT |
					     IP_FLOW_HASH_REVERSE_SRC_DST);
  clib_bihash_kv_8_8_t kv, value;

  kv.key = (u64) flow_hash << 32 | seq_no;
  kv.value = 0;
  value.key = 0;
  value.value = 0;

  if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
      0)
    {
      ioam_cache_entry_t *entry = 0;

      entry = pool_elt_at_index (cm->ioam_rewrite_pool, value.value);
      /* match */
      if (ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
	  ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
	  entry->src_port == dst_port &&
	  entry->dst_port == src_port && entry->seq_no == seq_no)
	{
	  /* If lookup is successful remove it from the hash */
	  clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 0);
	  return (entry);
	}
      else
	return (0);

    }
  return (0);
}

/*
 * Caches ioam hbh header
 * Extends the hbh header with option to contain IP6 address of the node
 * that caches it
 */
inline static int
ioam_cache_add (vlib_buffer_t * b0,
		ip6_header_t * ip0,
		u16 src_port,
		u16 dst_port, ip6_hop_by_hop_header_t * hbh0, u32 seq_no)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_entry_t *entry = 0;
  u32 rewrite_len = 0, e2e_id_offset = 0;
  u32 pool_index = 0;
  ioam_e2e_id_option_t *e2e = 0;

  pool_get_aligned (cm->ioam_rewrite_pool, entry, CLIB_CACHE_LINE_BYTES);
  clib_memset (entry, 0, sizeof (*entry));
  pool_index = entry - cm->ioam_rewrite_pool;

  clib_memcpy_fast (entry->dst_address.as_u64, ip0->dst_address.as_u64,
		    sizeof (ip6_address_t));
  clib_memcpy_fast (entry->src_address.as_u64, ip0->src_address.as_u64,
		    sizeof (ip6_address_t));
  entry->src_port = src_port;
  entry->dst_port = dst_port;
  entry->seq_no = seq_no;
  rewrite_len = ((hbh0->length + 1) << 3);
  vec_validate (entry->ioam_rewrite_string, rewrite_len - 1);
  e2e = ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
  if (e2e)
    {
      entry->next_hop.as_u64[0] = e2e->id.as_u64[0];
      entry->next_hop.as_u64[1] = e2e->id.as_u64[1];
    }
  else
    {
      return (-1);
    }
  e2e_id_offset = (u8 *) e2e - (u8 *) hbh0;
  /* setup e2e id option to insert v6 address of the node caching it */
  clib_memcpy_fast (entry->ioam_rewrite_string, hbh0, rewrite_len);
  hbh0 = (ip6_hop_by_hop_header_t *) entry->ioam_rewrite_string;

  /* suffix rewrite string with e2e ID option */
  e2e = (ioam_e2e_id_option_t *) (entry->ioam_rewrite_string + e2e_id_offset);
  ioam_e2e_id_rewrite_handler (e2e, &cm->sr_localsid_cache);
  entry->my_address_offset = (u8 *) (&e2e->id) - (u8 *) hbh0;

  /* add it to hash, replacing and freeing any collision for now */
  u32 flow_hash =
    ip6_compute_flow_hash_ext (ip0, hbh0->protocol, src_port, dst_port,
			       IP_FLOW_HASH_DEFAULT);
  clib_bihash_kv_8_8_t kv, value;
  kv.key = (u64) flow_hash << 32 | seq_no;
  kv.value = 0;
  if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
      0)
    {
      /* replace */
      ioam_cache_entry_cleanup (value.value);
    }
  kv.value = pool_index;
  clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 1);
  return (0);
}

/* Creates SR rewrite string
 * This is appended with ioam header on the server facing
 * node.
 * This SR header is necessary to attract packets towards
 * selected Anycast server.
 */
inline static void
ioam_cache_sr_rewrite_template_create (void)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ip6_address_t *segments = 0;
  ip6_address_t *this_seg = 0;

  /* This nodes address and the original dest will be
   * filled when the packet is processed */
  vec_add2 (segments, this_seg, 1);
  clib_memset (this_seg, 0xfe, sizeof (ip6_address_t));
  cm->sr_rewrite_template = ip6_sr_compute_rewrite_string_insert (segments);
  vec_free (segments);
}

inline static int
ioam_cache_table_init (vlib_main_t * vm)
{
  ioam_cache_main_t *cm = &ioam_cache_main;

  pool_alloc_aligned (cm->ioam_rewrite_pool,
		      MAX_CACHE_ENTRIES, CLIB_CACHE_LINE_BYTES);
  cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
  cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
  cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;

  clib_bihash_init_8_8 (&cm->ioam_rewrite_cache_table,
			"ioam rewrite cache table",
			cm->lookup_table_nbuckets, cm->lookup_table_size);
  /* Create SR rewrite template */
  ioam_cache_sr_rewrite_template_create ();
  return (1);
}

inline static int
ioam_cache_table_destroy (vlib_main_t * vm)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_entry_t *entry = 0;
  /* free pool and hash table */
  clib_bihash_free_8_8 (&cm->ioam_rewrite_cache_table);
  pool_foreach (entry, cm->ioam_rewrite_pool, (
						{
						ioam_cache_entry_free (entry);
						}));
  pool_free (cm->ioam_rewrite_pool);
  cm->ioam_rewrite_pool = 0;
  vec_free (cm->sr_rewrite_template);
  cm->sr_rewrite_template = 0;
  return (0);
}

inline static u8 *
format_ioam_cache_entry (u8 * s, va_list * args)
{
  ioam_cache_entry_t *e = va_arg (*args, ioam_cache_entry_t *);
  ioam_cache_main_t *cm = &ioam_cache_main;
  int rewrite_len = vec_len (e->ioam_rewrite_string);

  s = format (s, "%d: %U:%d to  %U:%d seq_no %lu\n",
	      (e - cm->ioam_rewrite_pool),
	      format_ip6_address, &e->src_address,
	      e->src_port,
	      format_ip6_address, &e->dst_address, e->dst_port, e->seq_no);

  if (rewrite_len)
    {
      s = format (s, "  %U",
		  format_ip6_hop_by_hop_ext_hdr,
		  (ip6_hop_by_hop_header_t *) e->ioam_rewrite_string,
		  rewrite_len - 1);
    }
  return s;
}

void ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable);

#define IOAM_CACHE_TS_TIMEOUT 1.0	//SYN timeout 1 sec
#define IOAM_CACHE_TS_TICK 100e-3
/* Timer delays as multiples of 100ms */
#define IOAM_CACHE_TS_TIMEOUT_TICKS IOAM_CACHE_TS_TICK*9
#define TIMER_HANDLE_INVALID ((u32) ~0)


void expired_cache_ts_timer_callback (u32 * expired_timers);

/*
 * Following functions are to manage M-Anycast server selection
 * cache
 * There is a per worker thread pool to create a cache entry
 * for a TCP SYN received. TCP SYN-ACK contians ioam header
 * with HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID option to point to the
 * entry.
 */
inline static int
ioam_cache_ts_table_init (vlib_main_t * vm)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  int no_of_threads = vec_len (vlib_worker_threads);
  int i;

  vec_validate_aligned (cm->ioam_ts_pool, no_of_threads - 1,
			CLIB_CACHE_LINE_BYTES);
  vec_validate_aligned (cm->ts_stats, no_of_threads - 1,
			CLIB_CACHE_LINE_BYTES);
  vec_validate (cm->timer_wheels, no_of_threads - 1);
  cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
  cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
  cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;
  for (i = 0; i < no_of_threads; i++)
    {
      pool_alloc_aligned (cm->ioam_ts_pool[i],
			  MAX_CACHE_TS_ENTRIES, CLIB_CACHE_LINE_BYTES);
      clib_memset (&cm->ts_stats[i], 0, sizeof (ioam_cache_ts_pool_stats_t));
      tw_timer_wheel_init_16t_2w_512sl (&cm->timer_wheels[i],
					expired_cache_ts_timer_callback,
					IOAM_CACHE_TS_TICK
					/* timer period 100ms */ ,
					10e4);
      cm->timer_wheels[i].last_run_time = vlib_time_now (vm);
    }
  ioam_cache_ts_timer_node_enable (vm, 1);
  return (1);
}

always_inline void
ioam_cache_ts_timer_set (ioam_cache_main_t * cm,
			 ioam_cache_ts_entry_t * entry, u32 interval)
{
  entry->timer_handle
    = tw_timer_start_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
				   entry->pool_index, 1, interval);
}

always_inline void
ioam_cache_ts_timer_reset (ioam_cache_main_t * cm,
			   ioam_cache_ts_entry_t * entry)
{
  tw_timer_stop_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
			      entry->timer_handle);
  entry->timer_handle = TIMER_HANDLE_INVALID;
}

inline static void
ioam_cache_ts_entry_free (u32 thread_id,
			  ioam_cache_ts_entry_t * entry, u32 node_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  vlib_main_t *vm = cm->vlib_main;
  vlib_frame_t *nf = 0;
  u32 *to_next;

  if (entry)
    {
      if (entry->hbh != 0)
	{
	  nf = vlib_get_frame_to_node (vm, node_index);
	  nf->n_vectors = 0;
	  to_next = vlib_frame_vector_args (nf);
	  nf->n_vectors = 1;
	  to_next[0] = entry->buffer_index;
	  vlib_put_frame_to_node (vm, node_index, nf);
	}
      pool_put (cm->ioam_ts_pool[thread_id], entry);
      cm->ts_stats[thread_id].inuse--;
      clib_memset (entry, 0, sizeof (*entry));
    }
}

inline static int
ioam_cache_ts_table_destroy (vlib_main_t * vm)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;
  int no_of_threads = vec_len (vlib_worker_threads);
  int i;

  /* free pool and hash table */
  for (i = 0; i < no_of_threads; i++)
    {
      pool_foreach (entry, cm->ioam_ts_pool[i], (
						  {
						  ioam_cache_ts_entry_free (i,
									    entry,
									    cm->error_node_index);
						  }
		    ));
      pool_free (cm->ioam_ts_pool[i]);
      cm->ioam_ts_pool = 0;
      tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
    }
  vec_free (cm->ioam_ts_pool);
  return (0);
}

inline static int
ioam_cache_ts_entry_cleanup (u32 thread_id, u32 pool_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;

  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
  ioam_cache_ts_entry_free (thread_id, entry, cm->error_node_index);
  return (0);
}

/*
 * Caches buffer for ioam SR tunnel select for Anycast service
 */
inline static int
ioam_cache_ts_add (ip6_header_t * ip0,
		   u16 src_port,
		   u16 dst_port,
		   u32 seq_no,
		   u8 max_responses, u64 now, u32 thread_id, u32 * pool_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;

  if (cm->ts_stats[thread_id].inuse == MAX_CACHE_TS_ENTRIES)
    {
      cm->ts_stats[thread_id].add_failed++;
      return (-1);
    }

  pool_get_aligned (cm->ioam_ts_pool[thread_id], entry,
		    CLIB_CACHE_LINE_BYTES);
  clib_memset (entry, 0, sizeof (*entry));
  *pool_index = entry - cm->ioam_ts_pool[thread_id];

  clib_memcpy_fast (entry->dst_address.as_u64, ip0->dst_address.as_u64,
		    sizeof (ip6_address_t));
  clib_memcpy_fast (entry->src_address.as_u64, ip0->src_address.as_u64,
		    sizeof (ip6_address_t));
  entry->src_port = src_port;
  entry->dst_port = dst_port;
  entry->seq_no = seq_no;
  entry->response_received = 0;
  entry->max_responses = max_responses;
  entry->created_at = now;
  entry->hbh = 0;
  entry->buffer_index = 0;
  entry->pool_id = thread_id;
  entry->pool_index = *pool_index;
  ioam_cache_ts_timer_set (cm, entry, IOAM_CACHE_TS_TIMEOUT);
  cm->ts_stats[thread_id].inuse++;
  return (0);
}

inline static void
ioam_cache_ts_send (u32 thread_id, i32 pool_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;

  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
  if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
    {
      /* send and free pool entry */
      ioam_cache_ts_entry_free (thread_id, entry, cm->ip6_hbh_pop_node_index);
    }
}

inline static void
ioam_cache_ts_check_and_send (u32 thread_id, i32 pool_index)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;
  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
  if (entry && entry->hbh)
    {
      if (entry->response_received == entry->max_responses ||
	  entry->created_at + IOAM_CACHE_TS_TIMEOUT <=
	  vlib_time_now (cm->vlib_main))
	{
	  ioam_cache_ts_timer_reset (cm, entry);
	  ioam_cache_ts_send (thread_id, pool_index);
	}
    }
}

inline static int
ioam_cache_ts_update (u32 thread_id,
		      i32 pool_index,
		      u32 buffer_index, ip6_hop_by_hop_header_t * hbh)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_cache_ts_entry_t *entry = 0;
  vlib_main_t *vm = cm->vlib_main;
  vlib_frame_t *nf = 0;
  u32 *to_next;

  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
  if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
    {
      /* drop existing buffer */
      if (entry->hbh != 0)
	{
	  nf = vlib_get_frame_to_node (vm, cm->error_node_index);
	  nf->n_vectors = 0;
	  to_next = vlib_frame_vector_args (nf);
	  nf->n_vectors = 1;
	  to_next[0] = entry->buffer_index;
	  vlib_put_frame_to_node (vm, cm->error_node_index, nf);
	}
      /* update */
      entry->buffer_index = buffer_index;
      entry->hbh = hbh;
      /* check and send */
      ioam_cache_ts_check_and_send (thread_id, pool_index);
      return (0);
    }
  return (-1);
}

/*
 * looks up the entry based on the e2e option pool index
 * result = 0 found the entry
 * result < 0 indicates failture to find an entry
 */
inline static int
ioam_cache_ts_lookup (ip6_header_t * ip0,
		      u8 protocol,
		      u16 src_port,
		      u16 dst_port,
		      u32 seq_no,
		      ip6_hop_by_hop_header_t ** hbh,
		      u32 * pool_index, u8 * thread_id, u8 response_seen)
{
  ioam_cache_main_t *cm = &ioam_cache_main;
  ip6_hop_by_hop_header_t *hbh0 = 0;
  ioam_e2e_cache_option_t *e2e = 0;

  hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
  e2e =
    (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset);
  if ((u8 *) e2e < ((u8 *) hbh0 + ((hbh0->length + 1) << 3))
      && e2e->hdr.type == HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID)
    {
      ioam_cache_ts_entry_t *entry = 0;
      *pool_index = e2e->pool_index;
      *thread_id = e2e->pool_id;
      entry = pool_elt_at_index (cm->ioam_ts_pool[*thread_id], *pool_index);
      /* match */
      if (entry &&
	  ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
	  ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
	  entry->src_port == dst_port &&
	  entry->dst_port == src_port && entry->seq_no == seq_no)
	{
	  *hbh = entry->hbh;
	  entry->response_received += response_seen;
	  return (0);
	}
      else if (entry)
	{
	  return (-1);
	}
    }
  return (-1);
}

inline static u8 *
format_ioam_cache_ts_entry (u8 * s, va_list * args)
{
  ioam_cache_ts_entry_t *e = va_arg (*args, ioam_cache_ts_entry_t *);
  u32 thread_id = va_arg (*args, u32);
  ioam_cache_main_t *cm = &ioam_cache_main;
  ioam_e2e_id_option_t *e2e = 0;
  vlib_main_t *vm = cm->vlib_main;
  clib_time_t *ct = &vm->clib_time;

  if (!e)
    goto end;

  if (e->hbh)
    {
      e2e =
	ip6_ioam_find_hbh_option (e->hbh,
				  HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);

      s =
	format (s,
		"%d: %U:%d to  %U:%d seq_no %u buffer %u %U \n\t\tCreated at %U Received %d\n",
		(e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
		&e->src_address, e->src_port, format_ip6_address,
		&e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
		format_ip6_address, e2e ? &e2e->id : 0, format_time_interval,
		"h:m:s:u",
		(e->created_at -
		 vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
		e->response_received);
    }
  else
    {
      s =
	format (s,
		"%d: %U:%d to  %U:%d seq_no %u Buffer %u \n\t\tCreated at %U Received %d\n",
		(e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
		&e->src_address, e->src_port, format_ip6_address,
		&e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
		format_time_interval, "h:m:s:u",
		(e->created_at -
		 vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
		e->response_received);
    }

end:
  return s;
}

/*
 * Get extended rewrite string for iOAM data in v6
 * This makes space for an e2e options to carry cache pool info
 * and manycast server address.
 * It set the rewrite string per configs in ioam ip6 + new option
 * for cache along with offset to the option to populate cache
 * pool id and index
 */
static inline int
ip6_ioam_ts_cache_set_rewrite (void)
{
  ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
  ioam_cache_main_t *cm = &ioam_cache_main;
  ip6_hop_by_hop_header_t *hbh;
  u32 rewrite_len = 0;
  ioam_e2e_cache_option_t *e2e = 0;
  ioam_e2e_id_option_t *e2e_id = 0;

  vec_free (cm->rewrite);
  ip6_ioam_set_rewrite (&(cm->rewrite), hm->has_trace_option,
			hm->has_pot_option, hm->has_seqno_option);
  hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
  rewrite_len = ((hbh->length + 1) << 3);
  vec_validate (cm->rewrite,
		rewrite_len - 1 + IOAM_E2E_CACHE_OPTION_RND +
		IOAM_E2E_ID_OPTION_RND);
  hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
  /* setup e2e id option to insert pool id and index of the node caching it */
  hbh->length += IOAM_E2E_CACHE_HBH_EXT_LEN + IOAM_E2E_ID_HBH_EXT_LEN;
  cm->rewrite_pool_index_offset = rewrite_len;
  e2e = (ioam_e2e_cache_option_t *) (cm->rewrite + rewrite_len);
  e2e->hdr.type = HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID
    | HBH_OPTION_TYPE_SKIP_UNKNOWN;
  e2e->hdr.length = sizeof (ioam_e2e_cache_option_t) -
    sizeof (ip6_hop_by_hop_option_t);
  e2e->e2e_type = 2;
  e2e_id =
    (ioam_e2e_id_option_t *) ((u8 *) e2e + sizeof (ioam_e2e_cache_option_t));
  e2e_id->hdr.type =
    HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID | HBH_OPTION_TYPE_SKIP_UNKNOWN;
  e2e_id->hdr.length =
    sizeof (ioam_e2e_id_option_t) - sizeof (ip6_hop_by_hop_option_t);
  e2e_id->e2e_type = 1;

  return (0);
}

static inline int
ip6_ioam_ts_cache_cleanup_rewrite (void)
{
  ioam_cache_main_t *cm = &ioam_cache_main;

  vec_free (cm->rewrite);
  cm->rewrite = 0;
  cm->rewrite_pool_index_offset = 0;
  return (0);
}
#endif /* __included_ioam_cache_h__ */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */