diff options
author | Damjan Marion <damarion@cisco.com> | 2021-10-28 12:02:15 +0200 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2021-10-28 15:54:25 +0000 |
commit | e6709ff37dc0f3a58ed5ad98aace73fe801f1e9d (patch) | |
tree | d71e12113f9eb31d789d059aa4073b07a44409fb | |
parent | 48c0534c2eafe23fe8efba8c913109f30f6a294c (diff) |
ip: improve csum fold on x86_64
New code seems to be 1.5 clocks faster.
old:
mov eax,edi
shr rdi,0x20
add rdi,rax
movzx edx,di
shr rdi,0x10
add rdx,rdi
movzx eax,dx
shr rdx,0x10
add rax,rdx
mov rdx,rax
shr rdx,0x10
add eax,edx
new:
mov rax,rdi
shr rax,0x20
add eax,edi
mov edi,0x10
shrx edi,eax,edi
adc ax,di
adc ax,0x0
Type: improvement
Change-Id: I3c565812c67ff4c3db197a9d4137a6c131b5b66c
Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r-- | src/vnet/ip/ip_packet.h | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h index b0b5f41260c..d862caa3a52 100644 --- a/src/vnet/ip/ip_packet.h +++ b/src/vnet/ip/ip_packet.h @@ -301,6 +301,20 @@ always_inline u16 ip_csum_fold (ip_csum_t c) { /* Reduce to 16 bits. */ +#ifdef __x86_64__ + u64 tmp; + asm volatile( + /* using ADC is much faster than mov, shift, add sequence + * compiler produces */ + "mov %k[sum], %k[tmp] \n\t" + "shr $32, %[sum] \n\t" + "add %k[tmp], %k[sum] \n\t" + "mov $16, %k[tmp] \n\t" + "shrx %k[tmp], %k[sum], %k[tmp] \n\t" + "adc %w[tmp], %w[sum] \n\t" + "adc $0, %w[sum] \n\t" + : [ sum ] "+&r"(c), [ tmp ] "=&r"(tmp)); +#else #if uword_bits == 64 c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32); c = (c & 0xffff) + (c >> 16); @@ -308,7 +322,7 @@ ip_csum_fold (ip_csum_t c) c = (c & 0xffff) + (c >> 16); c = (c & 0xffff) + (c >> 16); - +#endif return c; } |