diff options
-rw-r--r-- | netlink/librtnl/mapper.c | 4 | ||||
-rw-r--r-- | netlink/librtnl/netns.c | 297 | ||||
-rw-r--r-- | netlink/librtnl/netns.h | 7 | ||||
-rw-r--r-- | netlink/librtnl/rtnl.c | 265 | ||||
-rw-r--r-- | router/README.md | 26 | ||||
-rw-r--r-- | router/router/tap_inject_netlink.c | 180 |
6 files changed, 446 insertions, 333 deletions
diff --git a/netlink/librtnl/mapper.c b/netlink/librtnl/mapper.c index 65cc13a..05b7f1d 100644 --- a/netlink/librtnl/mapper.c +++ b/netlink/librtnl/mapper.c @@ -97,7 +97,7 @@ int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) FIB_ENTRY_FLAG_NONE, prefix.fp_proto, &nh, map->sw_if_index, ns->v6fib_index, 0 /* weight */, - (mpls_label_t *) MPLS_LABEL_INVALID, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, FIB_ROUTE_PATH_FLAG_NONE); #endif /* FIB_VERSION == 1 */ } else { @@ -128,7 +128,7 @@ int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) FIB_ENTRY_FLAG_NONE, prefix.fp_proto, &nh, map->sw_if_index, ns->v4fib_index, 0 /* weight */, - (mpls_label_t *) MPLS_LABEL_INVALID, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, FIB_ROUTE_PATH_FLAG_NONE); #endif /* FIB_VERSION == 1 */ } diff --git a/netlink/librtnl/netns.c b/netlink/librtnl/netns.c index 6af2645..1b40227 100644 --- a/netlink/librtnl/netns.c +++ b/netlink/librtnl/netns.c @@ -23,10 +23,10 @@ /* Enable some RTA values debug */ //#define RTNL_CHECK -#define is_nonzero(x) \ - ({ \ - u8 __is_zero_zero[sizeof(x)] = {}; \ - memcmp(__is_zero_zero, &x, sizeof(x)); \ +#define is_nonzero(x) \ + ({ \ + u8 __is_zero_zero[sizeof(x)] = {}; \ + memcmp(__is_zero_zero, &x, sizeof(x)); \ }) typedef struct { @@ -36,22 +36,22 @@ typedef struct { u16 size; //Length of the attribute } rtnl_mapping_t; -#define ns_foreach_ifla \ - _(IFLA_ADDRESS, hwaddr) \ - _(IFLA_BROADCAST, broadcast) \ - _(IFLA_IFNAME, ifname) \ - _(IFLA_MASTER, master) \ - _(IFLA_MTU, mtu) \ +#define ns_foreach_ifla \ + _(IFLA_ADDRESS, hwaddr) \ + _(IFLA_BROADCAST, broadcast) \ + _(IFLA_IFNAME, ifname) \ + _(IFLA_MASTER, master) \ + _(IFLA_MTU, mtu) \ _(IFLA_QDISC, qdisc) static rtnl_mapping_t ns_ifmap[] = { -#define _(t, e) \ -{ \ - .type = t, \ - .offset = offsetof(ns_link_t, e), \ - .size = sizeof(((ns_link_t*)0)->e) \ -}, - ns_foreach_ifla +#define _(t, e) \ + { \ + .type = t, \ + .offset = offsetof(ns_link_t, e), \ + .size = sizeof(((ns_link_t*)0)->e) \ + }, + ns_foreach_ifla #undef _ { .type = 0 } }; @@ -63,25 +63,27 @@ u8 *format_ns_link (u8 *s, va_list *args) return s; } -#define ns_foreach_rta \ - _(RTA_DST, dst, 1) \ - _(RTA_SRC, src, 1) \ - _(RTA_GATEWAY, gateway, 1) \ - _(RTA_IIF, iif, 1) \ - _(RTA_OIF, oif, 1) \ - _(RTA_PREFSRC, prefsrc, 0) \ - _(RTA_TABLE, table, 0) \ - _(RTA_PRIORITY, priority, 0) \ - _(RTA_CACHEINFO, cacheinfo, 0) +#define ns_foreach_rta \ + _(RTA_DST, dst, 1) \ + _(RTA_SRC, src, 1) \ + _(RTA_VIA, via, 1) \ + _(RTA_GATEWAY, gateway, 1) \ + _(RTA_IIF, iif, 1) \ + _(RTA_OIF, oif, 1) \ + _(RTA_PREFSRC, prefsrc, 0) \ + _(RTA_TABLE, table, 0) \ + _(RTA_PRIORITY, priority, 0) \ + _(RTA_CACHEINFO, cacheinfo, 0) \ + _(RTA_ENCAP, encap, 1) static rtnl_mapping_t ns_routemap[] = { -#define _(t, e, u) \ -{ \ - .type = t, .unique = u, \ - .offset = offsetof(ns_route_t, e), \ - .size = sizeof(((ns_route_t*)0)->e) \ -}, - ns_foreach_rta +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_route_t, e), \ + .size = sizeof(((ns_route_t*)0)->e) \ + }, + ns_foreach_rta #undef _ { .type = 0 } }; @@ -108,22 +110,22 @@ u8 *format_ns_route (u8 *s, va_list *args) return s; } -#define ns_foreach_ifaddr \ - _(IFA_ADDRESS, addr, 1) \ - _(IFA_LOCAL, local, 1) \ - _(IFA_LABEL, label, 0) \ - _(IFA_BROADCAST, broadcast, 0) \ - _(IFA_ANYCAST, anycast, 0) \ +#define ns_foreach_ifaddr \ + _(IFA_ADDRESS, addr, 1) \ + _(IFA_LOCAL, local, 1) \ + _(IFA_LABEL, label, 0) \ + _(IFA_BROADCAST, broadcast, 0) \ + _(IFA_ANYCAST, anycast, 0) \ _(IFA_CACHEINFO, cacheinfo, 0) static rtnl_mapping_t ns_addrmap[] = { -#define _(t, e, u) \ -{ \ - .type = t, .unique = u, \ - .offset = offsetof(ns_addr_t, e), \ - .size = sizeof(((ns_addr_t*)0)->e) \ -}, - ns_foreach_ifaddr +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_addr_t, e), \ + .size = sizeof(((ns_addr_t*)0)->e) \ + }, + ns_foreach_ifaddr #undef _ { .type = 0 } }; @@ -140,7 +142,7 @@ u8 *format_ns_addr (u8 *s, va_list *args) if (is_nonzero(a->anycast)) s = format(s, " anycast %U", format_ip, a->anycast); if (is_nonzero(a->local)) - s = format(s, " local %U", format_ip, a->local); + s = format(s, " local %U", format_ip, a->local); return s; } @@ -149,20 +151,20 @@ u8 *format_ns_addr (u8 *s, va_list *args) #define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) #endif -#define ns_foreach_neigh \ - _(NDA_DST, dst, 1) \ - _(NDA_LLADDR, lladdr, 0) \ - _(NDA_PROBES, probes, 0) \ +#define ns_foreach_neigh \ + _(NDA_DST, dst, 1) \ + _(NDA_LLADDR, lladdr, 0) \ + _(NDA_PROBES, probes, 0) \ _(NDA_CACHEINFO, cacheinfo, 0) static rtnl_mapping_t ns_neighmap[] = { -#define _(t, e, u) \ -{ \ - .type = t, .unique = u, \ - .offset = offsetof(ns_neigh_t, e), \ - .size = sizeof(((ns_neigh_t*)0)->e) \ -}, - ns_foreach_neigh +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_neigh_t, e), \ + .size = sizeof(((ns_neigh_t*)0)->e) \ + }, + ns_foreach_neigh #undef _ { .type = 0 } }; @@ -175,7 +177,7 @@ u8 *format_ns_neigh (u8 *s, va_list *args) if (is_nonzero(n->lladdr)) s = format(s, " lladdr %U", format_ethernet_address, n->lladdr); if (n->probes) - s = format(s, " probes %d", n->probes); + s = format(s, " probes %d", n->probes); return s; } @@ -282,14 +284,23 @@ rtnl_entry_set(void *entry, int init) { for (; map->type != 0; map++) { + struct rtattr *rta = rtas[map->type]; - if (rta) { + + if(map->type == RTA_ENCAP && rta) { + /*Data of RTA_ENCAP is a pointer to rta attributes for MPLS*/ + rta = (struct rtattr*)RTA_DATA(rta); if (RTA_PAYLOAD(rta) > map->size) { - clib_warning("rta (type=%d len=%d) too long (max %d)", - rta->rta_type, rta->rta_len, map->size); + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); + return -1; + } + memcpy(entry + map->offset, RTA_DATA(rta), map->size); + memset(entry + map->offset + map->size, 0, 0); + } else if (rta) { + if (RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); return -1; } - memcpy(entry + map->offset, RTA_DATA(rta), RTA_PAYLOAD(rta)); memset(entry + map->offset + RTA_PAYLOAD(rta), 0, map->size - RTA_PAYLOAD(rta)); } else if (init) { @@ -305,9 +316,9 @@ netns_notify(netns_p *ns, void *obj, netns_type_t type, u32 flags) netns_main_t *nm = &netns_main; netns_handle_t *h; pool_foreach(h, nm->handles, { - if (h->netns_index == (ns - nm->netnss) && h->notify) - h->notify(obj, type, flags, h->opaque); - }); + if (h->netns_index == (ns - nm->netnss) && h->notify) + h->notify(obj, type, flags, h->opaque); + }); } static_always_inline int @@ -330,7 +341,7 @@ ns_get_link(netns_p *ns, struct ifinfomsg *ifi, struct rtattr *rtas[]) pool_foreach(link, ns->netns.links, { if(ifi->ifi_index == link->ifi.ifi_index) return link; - }); + }); return NULL; } @@ -343,12 +354,12 @@ ns_rcv_link(netns_p *ns, struct nlmsghdr *hdr) size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); if(datalen < sizeof(*ifi)) - return -1; + return -1; ifi = NLMSG_DATA(hdr); if((datalen > NLMSG_ALIGN(sizeof(*ifi))) && - rtnl_parse_rtattr(rtas, IFLA_MAX, IFLA_RTA(ifi), - IFLA_PAYLOAD(hdr))) { + rtnl_parse_rtattr(rtas, IFLA_MAX, IFLA_RTA(ifi), + IFLA_PAYLOAD(hdr))) { return -1; } #ifdef RTNL_CHECK @@ -385,19 +396,19 @@ ns_get_route(netns_p *ns, struct rtmsg *rtm, struct rtattr *rtas[]) //This describes the values which uniquely identify a route struct rtmsg msg = { - .rtm_family = 0xff, - .rtm_dst_len = 0xff, - .rtm_src_len = 0xff, - .rtm_table = 0xff, - .rtm_protocol = 0xff, - .rtm_type = 0xff + .rtm_family = 0xff, + .rtm_dst_len = 0xff, + .rtm_src_len = 0xff, + .rtm_table = 0xff, + .rtm_protocol = 0xff, + .rtm_type = 0xff }; pool_foreach(route, ns->netns.routes, { if(mask_match(&route->rtm, rtm, &msg, sizeof(struct rtmsg)) && - rtnl_entry_match(route, rtas, ns_routemap)) + rtnl_entry_match(route, rtas, ns_routemap)) return route; - }); + }); return NULL; } @@ -414,8 +425,8 @@ ns_rcv_route(netns_p *ns, struct nlmsghdr *hdr) rtm = NLMSG_DATA(hdr); if((datalen > NLMSG_ALIGN(sizeof(*rtm))) && - rtnl_parse_rtattr(rtas, RTA_MAX, RTM_RTA(rtm), - RTM_PAYLOAD(hdr))) { + rtnl_parse_rtattr(rtas, RTA_MAX, RTM_RTA(rtm), + RTM_PAYLOAD(hdr))) { return -1; } #ifdef RTNL_CHECK @@ -452,15 +463,15 @@ ns_get_addr(netns_p *ns, struct ifaddrmsg *ifaddr, struct rtattr *rtas[]) //This describes the values which uniquely identify a route struct ifaddrmsg msg = { - .ifa_family = 0xff, - .ifa_prefixlen = 0xff, + .ifa_family = 0xff, + .ifa_prefixlen = 0xff, }; pool_foreach(addr, ns->netns.addresses, { if(mask_match(&addr->ifaddr, ifaddr, &msg, sizeof(struct ifaddrmsg)) && - rtnl_entry_match(addr, rtas, ns_addrmap)) + rtnl_entry_match(addr, rtas, ns_addrmap)) return addr; - }); + }); return NULL; } @@ -477,8 +488,8 @@ ns_rcv_addr(netns_p *ns, struct nlmsghdr *hdr) ifaddr = NLMSG_DATA(hdr); if((datalen > NLMSG_ALIGN(sizeof(*ifaddr))) && - rtnl_parse_rtattr(rtas, IFA_MAX, IFA_RTA(ifaddr), - IFA_PAYLOAD(hdr))) { + rtnl_parse_rtattr(rtas, IFA_MAX, IFA_RTA(ifaddr), + IFA_PAYLOAD(hdr))) { return -1; } #ifdef RTNL_CHECK @@ -515,15 +526,15 @@ ns_get_neigh(netns_p *ns, struct ndmsg *nd, struct rtattr *rtas[]) //This describes the values which uniquely identify a route struct ndmsg msg = { - .ndm_family = 0xff, - .ndm_ifindex = 0xff, + .ndm_family = 0xff, + .ndm_ifindex = 0xff, }; pool_foreach(neigh, ns->netns.neighbors, { if(mask_match(&neigh->nd, nd, &msg, sizeof(&msg)) && - rtnl_entry_match(neigh, rtas, ns_neighmap)) + rtnl_entry_match(neigh, rtas, ns_neighmap)) return neigh; - }); + }); return NULL; } @@ -540,8 +551,8 @@ ns_rcv_neigh(netns_p *ns, struct nlmsghdr *hdr) nd = NLMSG_DATA(hdr); if((datalen > NLMSG_ALIGN(sizeof(*nd))) && - rtnl_parse_rtattr(rtas, NDA_MAX, NDA_RTA(nd), - NDA_PAYLOAD(hdr))) { + rtnl_parse_rtattr(rtas, NDA_MAX, NDA_RTA(nd), + NDA_PAYLOAD(hdr))) { return -1; } #ifdef RTNL_CHECK @@ -571,10 +582,10 @@ ns_rcv_neigh(netns_p *ns, struct nlmsghdr *hdr) return 0; } -#define ns_object_foreach \ - _(neighbors, NETNS_TYPE_NEIGH) \ - _(routes, NETNS_TYPE_ROUTE) \ - _(addresses, NETNS_TYPE_ADDR) \ +#define ns_object_foreach \ + _(neighbors, NETNS_TYPE_NEIGH) \ + _(routes, NETNS_TYPE_ROUTE) \ + _(addresses, NETNS_TYPE_ADDR) \ _(links, NETNS_TYPE_LINK) static void @@ -585,20 +596,20 @@ ns_recv_error(rtnl_error_t err, uword o) u32 *indexes = 0; u32 *i = 0; -#define _(pool, type) \ - pool_foreach_index(*i, ns->netns.pool, { \ - vec_add1(indexes, *i); \ - }) \ - vec_foreach(i, indexes) { \ - pool_put_index(ns->netns.pool, *i); \ - netns_notify(ns, &ns->netns.pool[*i], type, NETNS_F_DEL);\ - } \ +#define _(pool, type) \ + pool_foreach_index(*i, ns->netns.pool, { \ + vec_add1(indexes, *i); \ + }) \ + vec_foreach(i, indexes) { \ + pool_put_index(ns->netns.pool, *i); \ + netns_notify(ns, &ns->netns.pool[*i], type, NETNS_F_DEL); \ + } \ vec_reset_length(indexes); ns_object_foreach #undef _ - vec_free(indexes); + vec_free(indexes); } static void @@ -606,25 +617,25 @@ ns_recv_rtnl(struct nlmsghdr *hdr, uword o) { netns_p *ns = &netns_main.netnss[o]; switch (hdr->nlmsg_type) { - case RTM_NEWROUTE: - case RTM_DELROUTE: - ns_rcv_route(ns, hdr); - break; - case RTM_NEWLINK: - case RTM_DELLINK: - ns_rcv_link(ns, hdr); - break; - case RTM_NEWADDR: - case RTM_DELADDR: - ns_rcv_addr(ns, hdr); - break; - case RTM_NEWNEIGH: - case RTM_DELNEIGH: - ns_rcv_neigh(ns, hdr); - break; - default: - clib_warning("unknown rtnl type %d", hdr->nlmsg_type); - break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + ns_rcv_route(ns, hdr); + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ns_rcv_link(ns, hdr); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ns_rcv_addr(ns, hdr); + break; + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ns_rcv_neigh(ns, hdr); + break; + default: + clib_warning("unknown rtnl type %d", hdr->nlmsg_type); + break; } } @@ -648,16 +659,16 @@ netns_get(char *name) pool_foreach(ns, nm->netnss, { if (!strcmp(name, ns->netns.name)) return ns; - }); + }); if (strlen(name) > RTNL_NETNS_NAMELEN) return NULL; pool_get(nm->netnss, ns); rtnl_stream_t s = { - .recv_message = ns_recv_rtnl, - .error = ns_recv_error, - .opaque = (uword)(ns - nm->netnss), + .recv_message = ns_recv_rtnl, + .error = ns_recv_error, + .opaque = (uword)(ns - nm->netnss), }; strcpy(s.name, name); @@ -721,30 +732,30 @@ void netns_callme(u32 handle, char del) if (!h->notify) return; -#define _(pool, type) \ - pool_foreach_index(i, ns->netns.pool, { \ - h->notify(&ns->netns.pool[i], type, \ - del?NETNS_F_DEL:NETNS_F_ADD, h->opaque); \ - }); +#define _(pool, type) \ + pool_foreach_index(i, ns->netns.pool, { \ + h->notify(&ns->netns.pool[i], type, \ + del?NETNS_F_DEL:NETNS_F_ADD, h->opaque); \ + }); ns_object_foreach #undef _ -} + } u8 *format_ns_object(u8 *s, va_list *args) { netns_type_t t = va_arg(*args, netns_type_t); void *o = va_arg(*args, void *); switch (t) { - case NETNS_TYPE_ADDR: - return format(s, "addr %U", format_ns_addr, o); - case NETNS_TYPE_ROUTE: - return format(s, "route %U", format_ns_route, o); - case NETNS_TYPE_LINK: - return format(s, "link %U", format_ns_link, o); - case NETNS_TYPE_NEIGH: - return format(s, "neigh %U", format_ns_neigh, o); + case NETNS_TYPE_ADDR: + return format(s, "addr %U", format_ns_addr, o); + case NETNS_TYPE_ROUTE: + return format(s, "route %U", format_ns_route, o); + case NETNS_TYPE_LINK: + return format(s, "link %U", format_ns_link, o); + case NETNS_TYPE_NEIGH: + return format(s, "neigh %U", format_ns_neigh, o); } return s; } diff --git a/netlink/librtnl/netns.h b/netlink/librtnl/netns.h index fb87ac7..23f2dc2 100644 --- a/netlink/librtnl/netns.h +++ b/netlink/librtnl/netns.h @@ -25,6 +25,11 @@ #include <librtnl/rtnl.h> +/*include it for 'struct mpls_label'*/ +#include <linux/mpls.h> +/*so far depth is fixed, looking into ways to be dynamic*/ +#define MPLS_STACK_DEPTH 7 + typedef struct { struct ifinfomsg ifi; u8 hwaddr[IFHWADDRLEN]; @@ -41,6 +46,7 @@ typedef struct { struct rtmsg rtm; u8 dst[16]; u8 src[16]; + u8 via[16]; u8 prefsrc[16]; u32 iif; u32 oif; @@ -48,6 +54,7 @@ typedef struct { u8 gateway[16]; u32 priority; struct rta_cacheinfo cacheinfo; + struct mpls_label encap[MPLS_STACK_DEPTH]; f64 last_updated; } ns_route_t; diff --git a/netlink/librtnl/rtnl.c b/netlink/librtnl/rtnl.c index fa31617..14ea0e8 100644 --- a/netlink/librtnl/rtnl.c +++ b/netlink/librtnl/rtnl.c @@ -79,6 +79,16 @@ static vlib_node_registration_t rtnl_process_node; #define RTNL_BUFFSIZ 16384 #define RTNL_DUMP_TIMEOUT 1 +static inline u32 grpmask(u32 g) +{ + ASSERT (g <= 31); + if (g) { + return 1 << (g - 1); + } else + return 0; +} + + u8 *format_rtnl_nsname2path(u8 *s, va_list *args) { char *nsname = va_arg(*args, char *); @@ -116,21 +126,21 @@ int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; struct nlmsghdr nlh = { - .nlmsg_len = NLMSG_LENGTH(len), - .nlmsg_type = type, - .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST, - .nlmsg_pid = 0, - .nlmsg_seq = ++ns->rtnl_seq, + .nlmsg_len = NLMSG_LENGTH(len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST, + .nlmsg_pid = 0, + .nlmsg_seq = ++ns->rtnl_seq, }; struct iovec iov[2] = { - { .iov_base = &nlh, .iov_len = sizeof(nlh) }, - { .iov_base = req, .iov_len = len } + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } }; struct msghdr msg = { - .msg_name = &nladdr, - .msg_namelen = sizeof(nladdr), - .msg_iov = iov, - .msg_iovlen = 2, + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, }; if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) return -1; @@ -165,10 +175,10 @@ static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, v pthread_t thread; void *thread_ret; struct rtnl_thread_exec ex = { - .fd = fd, - .fn = fn, - .arg = arg, - .ret = ret + .fd = fd, + .fn = fn, + .arg = arg, + .ret = ret }; if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex)) return -errno; @@ -246,13 +256,14 @@ static int rtnl_socket_open(rtnl_ns_t *ns) } struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - .nl_pad = 0, - .nl_pid = 0, - .nl_groups = - RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_IFADDR | - RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_NEIGH | - RTMGRP_NOTIFY, + .nl_family = AF_NETLINK, + .nl_pad = 0, + .nl_pid = 0, + /*add mpls message group*/ + .nl_groups = grpmask(RTNLGRP_LINK)| grpmask(RTNLGRP_IPV6_IFADDR) | + grpmask(RTNLGRP_IPV4_IFADDR) | grpmask(RTNLGRP_IPV4_ROUTE) | + grpmask(RTNLGRP_IPV6_ROUTE) | grpmask(RTNLGRP_NEIGH) | + grpmask(RTNLGRP_NOTIFY) | grpmask(RTNLGRP_MPLS_ROUTE), }; if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) { @@ -298,49 +309,49 @@ rtnl_sync_done(rtnl_ns_t *ns) struct rtmsg rtmsg; struct ndmsg ndmsg; switch (ns->sync_state) { - case RTNL_SS_OPENING: - //Cannot happen here - break; - case RTNL_SS_LINK: - memset(&addrmsg, 0, sizeof(addrmsg)); - addrmsg.ifa_family = AF_UNSPEC; - if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) { - rtnl_sync_reset(ns); - rtnl_schedule_timeout(ns, rm->now + 1); - return; - } - rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); - ns->sync_state = RTNL_SS_ADDR; - break; - case RTNL_SS_ADDR: - case RTNL_SS_ROUTE4: - memset(&rtmsg, 0, sizeof(rtmsg)); - rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6; - rtmsg.rtm_table = RT_TABLE_UNSPEC; - if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) { - rtnl_sync_reset(ns); - rtnl_schedule_timeout(ns, rm->now + 1); - return; - } - rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); - ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6; - break; - case RTNL_SS_ROUTE6: - memset(&ndmsg, 0, sizeof(ndmsg)); - ndmsg.ndm_family = AF_UNSPEC; - if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) { - rtnl_sync_reset(ns); - rtnl_schedule_timeout(ns, rm->now + 1); - return; - } - rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); - ns->sync_state = RTNL_SS_NEIGH; - break; - case RTNL_SS_NEIGH: - ns->state = RTNL_S_READY; - ns->sync_state = 0; - rtnl_cancel_timeout(ns); - break; + case RTNL_SS_OPENING: + //Cannot happen here + break; + case RTNL_SS_LINK: + memset(&addrmsg, 0, sizeof(addrmsg)); + addrmsg.ifa_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_ADDR; + break; + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + memset(&rtmsg, 0, sizeof(rtmsg)); + rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6; + rtmsg.rtm_table = RT_TABLE_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6; + break; + case RTNL_SS_ROUTE6: + memset(&ndmsg, 0, sizeof(ndmsg)); + ndmsg.ndm_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_NEIGH; + break; + case RTNL_SS_NEIGH: + ns->state = RTNL_S_READY; + ns->sync_state = 0; + rtnl_cancel_timeout(ns); + break; } } @@ -350,28 +361,28 @@ rtnl_sync_timeout(rtnl_ns_t *ns) rtnl_main_t *rm = &rtnl_main; struct ifinfomsg imsg = {}; switch (ns->sync_state) { - case RTNL_SS_OPENING: - if (rtnl_socket_open(ns)) { - rtnl_schedule_timeout(ns, rm->now + 10); - return; - } - imsg.ifi_family = AF_UNSPEC; - if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) { - rtnl_sync_reset(ns); - rtnl_schedule_timeout(ns, rm->now + 10); - } - ns->sync_state = RTNL_SS_LINK; - rtnl_schedule_timeout(ns, rm->now + 2); - break; - case RTNL_SS_LINK: - case RTNL_SS_ADDR: - case RTNL_SS_ROUTE4: - case RTNL_SS_ROUTE6: - case RTNL_SS_NEIGH: - //Timeout happened while synchronizing + case RTNL_SS_OPENING: + if (rtnl_socket_open(ns)) { + rtnl_schedule_timeout(ns, rm->now + 10); + return; + } + imsg.ifi_family = AF_UNSPEC; + if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) { rtnl_sync_reset(ns); - rtnl_schedule_timeout(ns, rm->now + 1); - break; + rtnl_schedule_timeout(ns, rm->now + 10); + } + ns->sync_state = RTNL_SS_LINK; + rtnl_schedule_timeout(ns, rm->now + 2); + break; + case RTNL_SS_LINK: + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + case RTNL_SS_ROUTE6: + case RTNL_SS_NEIGH: + //Timeout happened while synchronizing + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + break; } } @@ -383,34 +394,34 @@ rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr) if (ns->state == RTNL_S_SYNC && ((hdr->nlmsg_flags & RTM_F_NOTIFY) || - (hdr->nlmsg_seq != (ns->rtnl_seq)))) { + (hdr->nlmsg_seq != (ns->rtnl_seq)))) { clib_warning("Received notification while in sync. Restart synchronization."); rtnl_sync_reset(ns); rtnl_schedule_timeout(ns, rm->now); } switch (hdr->nlmsg_type) { - case NLMSG_DONE: - rtnl_sync_done(ns); - break; - case NLMSG_ERROR: - if((ret = rtnl_rcv_error(ns, hdr, &error))) - return ret; - break; - case RTM_NEWROUTE: - case RTM_DELROUTE: - case RTM_NEWLINK: - case RTM_DELLINK: - case RTM_NEWADDR: - case RTM_DELADDR: - case RTM_NEWNEIGH: - case RTM_DELNEIGH: - if (ns->stream.recv_message) - ns->stream.recv_message(hdr, ns->stream.opaque); - break; - default: - clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type); - break; + case NLMSG_DONE: + rtnl_sync_done(ns); + break; + case NLMSG_ERROR: + if((ret = rtnl_rcv_error(ns, hdr, &error))) + return ret; + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + if (ns->stream.recv_message) + ns->stream.recv_message(hdr, ns->stream.opaque); + break; + default: + clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type); + break; } return 0; } @@ -457,7 +468,7 @@ rtnl_process_read(rtnl_ns_t *ns) for(hdr = (struct nlmsghdr *) buff; len > 0; len -= NLMSG_ALIGN(hdr->nlmsg_len), - hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) { + hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) { if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) { clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len); return -1; @@ -473,13 +484,13 @@ static void rtnl_process_timeout(rtnl_ns_t *ns) { switch (ns->state) { - case RTNL_S_SYNC: - rtnl_sync_timeout(ns); - break; - case RTNL_S_INIT: - case RTNL_S_READY: - clib_warning("Should not happen"); - break; + case RTNL_S_SYNC: + rtnl_sync_timeout(ns); + break; + case RTNL_S_INIT: + case RTNL_S_READY: + clib_warning("Should not happen"); + break; } } @@ -503,18 +514,18 @@ rtnl_process (vlib_main_t * vm, if (event_type == ~0) { //Clock event or no event pool_foreach(ns, rm->streams, { - if (ns->timeout < rm->now) { - ns->timeout = DBL_MAX; - rtnl_process_timeout(ns); - } - }); + if (ns->timeout < rm->now) { + ns->timeout = DBL_MAX; + rtnl_process_timeout(ns); + } + }); } else { rtnl_ns_t *ns; uword *d; vec_foreach(d, event_data) { ns = &rm->streams[d[0]]; switch (event_type) - { + { case RTNL_E_CLOSE: rtnl_process_close(ns); break; @@ -524,7 +535,7 @@ rtnl_process (vlib_main_t * vm, case RTNL_E_READ: rtnl_process_read(ns); break; - } + } } } @@ -534,15 +545,15 @@ rtnl_process (vlib_main_t * vm, pool_foreach(ns, rm->streams, { if (ns->timeout < timeout) timeout = ns->timeout; - }); + }); } return frame->n_vectors; } VLIB_REGISTER_NODE(rtnl_process_node, static) = { - .function = rtnl_process, - .name = "rtnl-process", - .type = VLIB_NODE_TYPE_PROCESS, + .function = rtnl_process, + .name = "rtnl-process", + .type = VLIB_NODE_TYPE_PROCESS, }; u32 diff --git a/router/README.md b/router/README.md index 3401c76..b3ec5ae 100644 --- a/router/README.md +++ b/router/README.md @@ -66,6 +66,30 @@ The objective of this project is to continue to build out better integration with host operating system and for providing a basis to enable completely or partially unmodified applications to take advantage of a fast datapath. +### MPLS related enhancement to router plugin + +Some changes made to router plugin enable it to support mpls label encapsulation. + +To test this feature, first install router plugin using instructions +in section "Build/Install", and then run following commands + +$vppctl enable tap-inject +$ifconfig vpp0 <IP> up +$ip route add <IP> encap mpls <LABELS> via <IP> dev vpp0 +$vppctl show ip fib + +You should see labels in ip fib table. + +you can also run following commands + +$vppctl enable tap-inject +$vppctl mpls table add 0 +$vppctl set int mpls <INTERFACE> enable +$ip -f mpls route add 333 dev vpp0 +$vppctl show mpls fib + +You should see dst label and interface in mpls table. + ### Main contributors -Jeff Shaw - LF-ID:jbshaw +Jeff Shaw - LF-ID:jbshaw, Chad Chengwei Wang - LF-ID:flinter (MPLS) diff --git a/router/router/tap_inject_netlink.c b/router/router/tap_inject_netlink.c index 19d5d04..3dca013 100644 --- a/router/router/tap_inject_netlink.c +++ b/router/router/tap_inject_netlink.c @@ -15,7 +15,6 @@ */ #include "tap_inject.h" - #include <librtnl/netns.h> #include <vlibmemory/api.h> #include <vnet/ethernet/arp_packet.h> @@ -30,6 +29,10 @@ #define FIB_VERSION 2 #endif +#include <arpa/inet.h> +#include <linux/mpls.h> +#include <vnet/mpls/packet.h> + static void add_del_addr (ns_addr_t * a, int is_del) { @@ -37,7 +40,7 @@ add_del_addr (ns_addr_t * a, int is_del) u32 sw_if_index; sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( - a->ifaddr.ifa_index); + a->ifaddr.ifa_index); if (sw_if_index == ~0) return; @@ -45,12 +48,12 @@ add_del_addr (ns_addr_t * a, int is_del) if (a->ifaddr.ifa_family == AF_INET) { ip4_add_del_interface_address (vm, sw_if_index, - (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del); + (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del); } else if (a->ifaddr.ifa_family == AF_INET6) { ip6_add_del_interface_address (vm, sw_if_index, - (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del); + (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del); } } @@ -75,7 +78,7 @@ add_del_link (ns_link_t * l, int is_del) u32 sw_if_index; sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( - l->ifi.ifi_index); + l->ifi.ifi_index); if (sw_if_index == ~0) return; @@ -104,7 +107,7 @@ add_del_neigh (ns_neigh_t * n, int is_del) u32 sw_if_index; sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( - n->nd.ndm_ifindex); + n->nd.ndm_ifindex); if (sw_if_index == ~0) return; @@ -122,40 +125,40 @@ add_del_neigh (ns_neigh_t * n, int is_del) if (n->nd.ndm_state & NUD_REACHABLE) { #if FIB_VERSION == 1 - vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0); + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0); #else - vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, - &a, 0 /* static */ , - 0 /* no fib entry */); + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, + &a, 0 /* static */ , + 0 /* no fib entry */); #endif /* FIB_VERSION == 1 */ } else if (n->nd.ndm_state & NUD_FAILED) { #if FIB_VERSION == 1 - vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a); + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a); #else - vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, &a); + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, &a); #endif /* FIB_VERSION == 1 */ } } else if (n->nd.ndm_family == AF_INET6) { if (n->nd.ndm_state & NUD_REACHABLE) - { + { #if FIB_VERSION == 1 - vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, - (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0); + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0); #else - vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, - (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, - 0 /* static */, - 0 /* no fib entry */); + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, + 0 /* static */, + 0 /* no fib entry */); #endif /* FIB_VERSION == 1 */ - } + } else vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, - (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN); + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN); } } @@ -163,67 +166,124 @@ add_del_neigh (ns_neigh_t * n, int is_del) #define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254 static void +get_mpls_label_stack(struct mpls_label *addr, u32* l) +{ + u32 entry = ntohl(addr[0].entry); + u32 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + + for(int i = 1; label != 0; i++) { + *l++ = label; + if(entry & MPLS_LS_S_MASK) + return; + entry = ntohl(addr[i].entry); + label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + } +} + +static void add_del_route (ns_route_t * r, int is_del) { u32 sw_if_index; sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif); - if (sw_if_index == ~0 || r->table != TAP_INJECT_HOST_ROUTE_TABLE_MAIN) + if (sw_if_index == ~0) return; if (r->rtm.rtm_family == AF_INET) { + u32 stack[MPLS_STACK_DEPTH] = {0}; + #if FIB_VERSION == 1 ip4_add_del_route_next_hop (&ip4_main, - is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, - (ip4_address_t *) r->dst, r->rtm.rtm_dst_len, - (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, + (ip4_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0); #else - fib_prefix_t prefix; - ip46_address_t nh; - - memset (&prefix, 0, sizeof (prefix)); - prefix.fp_len = r->rtm.rtm_dst_len; - prefix.fp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&prefix.fp_addr.ip4, r->dst, sizeof (prefix.fp_addr.ip4)); - - memset (&nh, 0, sizeof (nh)); - clib_memcpy (&nh.ip4, r->gateway, sizeof (nh.ip4)); - - fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, - FIB_ENTRY_FLAG_NONE, prefix.fp_proto, - &nh, sw_if_index, 0, - 0 /* weight */, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&prefix.fp_addr.ip4, r->dst, sizeof (prefix.fp_addr.ip4)); + get_mpls_label_stack(r->encap, stack); + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip4, r->gateway, sizeof (nh.ip4)); + if(*stack == 0) + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + else { + fib_route_path_t *rpaths = NULL, rpath; + memset(&rpath, 0, sizeof(rpath)); + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + clib_memcpy(&rpath.frp_addr.ip4, r->gateway, sizeof(rpath.frp_addr.ip4)); + rpath.frp_sw_if_index = sw_if_index; + for(int i = 0; i < MPLS_STACK_DEPTH && stack[i] != 0; i++) { + fib_mpls_label_t fib_label = {stack[i],0,0,0}; + vec_add1(rpath.frp_label_stack, fib_label); + } + vec_add1(rpaths, rpath); + fib_table_entry_path_add2(0, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + } #endif /* FIB_VERSION == 1 */ } else if (r->rtm.rtm_family == AF_INET6) { #if FIB_VERSION == 1 ip6_add_del_route_next_hop (&ip6_main, - is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD, - (ip6_address_t *) r->dst, r->rtm.rtm_dst_len, - (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD, + (ip6_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0); #else - fib_prefix_t prefix; - ip46_address_t nh; - - memset (&prefix, 0, sizeof (prefix)); - prefix.fp_len = r->rtm.rtm_dst_len; - prefix.fp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&prefix.fp_addr.ip6, r->dst, sizeof (prefix.fp_addr.ip6)); - - memset (&nh, 0, sizeof (nh)); - clib_memcpy (&nh.ip6, r->gateway, sizeof (nh.ip6)); - - fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, - FIB_ENTRY_FLAG_NONE, prefix.fp_proto, - &nh, sw_if_index, 0, - 0 /* weight */, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + fib_prefix_t prefix; + ip46_address_t nh; + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&prefix.fp_addr.ip6, r->dst, sizeof (prefix.fp_addr.ip6)); + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip6, r->gateway, sizeof (nh.ip6)); + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); #endif /* FIB_VERSION == 1 */ } + else if (r->rtm.rtm_family == AF_MPLS) + { + u32 dst_label; + get_mpls_label_stack((struct mpls_label*) r->dst, &dst_label); + struct rtvia *via = (struct rtvia*) r->via; + fib_prefix_t prefix; + fib_route_path_t *rpaths = NULL, rpath; + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = 21; + prefix.fp_label = dst_label; + prefix.fp_proto = FIB_PROTOCOL_MPLS; + prefix.fp_payload_proto = DPO_PROTO_IP4; + memset(&rpath, 0, sizeof(rpath)); + clib_memcpy (&rpath.frp_addr.ip4, via->rtvia_addr, sizeof (rpath.frp_addr.ip4)); + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + rpath.frp_fib_index = 0; + rpath.frp_sw_if_index = sw_if_index; + vec_add1(rpaths, rpath); + fib_table_entry_path_add2(0, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + } } |