aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNitin Saxena <nitin.saxena@cavium.com>2018-02-07 11:32:00 +0000
committerDamjan Marion <dmarion.lists@gmail.com>2018-02-07 18:47:00 +0000
commitd3cb7ba2ab042cbfb580ea3f01e41e98d41d0508 (patch)
tree40b783ec13e488f93396817af40bb9eaeef6a2af
parent8cedff2f462ef9deada9aae223868a216c7aa7f2 (diff)
vhost: Added ARMV8 NEON version of function map_guest_mem()
(VPP-1085) The NEON implementation searches particular address in VHOST_MEMORY_MAX_NREGIONS regions. Searching two regions at a time. Change-Id: Icc3c6746bc98e3a1fa71424e51b64f62efbfdc74 Signed-off-by: Nitin Saxena <nitin.saxena@cavium.com>
-rw-r--r--src/vnet/devices/virtio/vhost-user.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 9c93ef4ebfb..be3405488fe 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -256,7 +256,76 @@ map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
+#elif __aarch64__ && __ARM_NEON
+ uint64x2_t al, ah, rl, rh, r;
+ uint32_t u32 = 0;
+ al = vdupq_n_u64 (addr + 1);
+ ah = vdupq_n_u64 (addr);
+
+ /*First Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
+
+ if (u32)
+ {
+ i = __builtin_ctzll (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Second Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
+
+ if (u32)
+ {
+ i = __builtin_ctzll (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Third Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 4);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 5);
+
+ if (u32)
+ {
+ i = __builtin_ctzll (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Fourth Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[6]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[6]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
+
+ i = __builtin_ctzll (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
+
+vhost_map_guest_mem_done:
+ if (i < vui->nregions)
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
#else
for (i = 0; i < vui->nregions; i++)
{