1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
Description: eal: sPAPR IOMMU support in pci probing for vfio-pci in ppc64le
From: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
Below changes adds pci probing support for vfio-pci devices in power8.
v3 - better validation for kernel not implementing few iocts called
v2 - kernel version checked and doc updated
Signed-off-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
Note: can be dropped >=DPDK 17.05
Forwarded: yes, http://dpdk.org/dev/patchwork/patch/21482/
Original-Author: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
Bug-Ubuntu: https://bugs.launchpad.net/ubuntu/+source/dpdk/+bug/1670689
Author: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Last-Update: 2017-03-07
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -50,12 +50,15 @@
static struct vfio_config vfio_cfg;
static int vfio_type1_dma_map(int);
+static int vfio_spapr_dma_map(int);
static int vfio_noiommu_dma_map(int);
/* IOMMU types we support */
static const struct vfio_iommu_type iommu_types[] = {
/* x86 IOMMU, otherwise known as type 1 */
{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ /* ppc64 IOMMU, otherwise known as spapr */
+ { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
/* IOMMU-less mode */
{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
};
@@ -537,6 +540,93 @@
}
return 0;
+}
+
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
+ };
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* calculate window size based on number of hugepages configured */
+ create.window_size = rte_eal_get_physmem_size();
+ create.page_shift = __builtin_ctzll(ms->hugepage_sz);
+ create.levels = 2;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ reg.vaddr = (uintptr_t) ms[i].addr;
+ reg.size = ms[i].len;
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ }
+
+ return 0;
}
static int
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -54,6 +54,31 @@
#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
+#ifndef VFIO_SPAPR_TCE_v2_IOMMU
+#define RTE_VFIO_SPAPR 7
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)
+#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
+struct vfio_iommu_spapr_register_memory {
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t vaddr;
+ uint64_t size;
+};
+struct vfio_iommu_spapr_tce_create {
+ uint32_t argsz;
+ uint32_t page_shift;
+ uint64_t window_size;
+ uint32_t levels;
+};
+struct vfio_iommu_spapr_tce_remove {
+ uint32_t argsz;
+ uint64_t start_addr;
+};
+#else
+#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU
+#endif
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
#define RTE_VFIO_NOIOMMU 8
#else
|