diff --git a/man/systemd.network.xml b/man/systemd.network.xml index e7e166a9c7..4cd2520173 100644 --- a/man/systemd.network.xml +++ b/man/systemd.network.xml @@ -1378,6 +1378,16 @@ service type to CS6 (network control) or CS4 (Realtime). Defaults to CS6. + + MultiPathRoute=address[@name] [weight] + + Configures multipath route. Multipath routing is the technique of using multiple + alternative paths through a network. Takes gateway address. Optionally, takes a network + interface name or index separated with @, and a weight in 1..256 for + this multipath route separated with whitespace. This setting can be specified multiple + times. If an empty string is assigned, then the all previous assignments are cleared. + + diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c index 247868221d..39e3c36ad2 100644 --- a/src/libsystemd/sd-netlink/netlink-message.c +++ b/src/libsystemd/sd-netlink/netlink-message.c @@ -142,11 +142,9 @@ int sd_netlink_message_is_broadcast(const sd_netlink_message *m) { /* If successful the updated message will be correctly aligned, if unsuccessful the old message is untouched. */ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *data, size_t data_length) { - uint32_t rta_length; - size_t message_length, padding_length; + size_t message_length; struct nlmsghdr *new_hdr; struct rtattr *rta; - char *padding; unsigned i; int offset; @@ -154,16 +152,10 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da assert(m->hdr); assert(!m->sealed); assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len); - assert(!data || data_length); - - /* get offset of the new attribute */ - offset = m->hdr->nlmsg_len; - - /* get the size of the new rta attribute (with padding at the end) */ - rta_length = RTA_LENGTH(data_length); + assert(!data || data_length > 0); /* get the new message size (with padding at the end) */ - message_length = offset + RTA_ALIGN(rta_length); + message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length); /* buffer should be smaller than both one page or 8K to be accepted by the kernel */ if (message_length > MIN(page_size(), 8192UL)) @@ -176,33 +168,19 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da m->hdr = new_hdr; /* get pointer to the attribute we are about to add */ - rta = (struct rtattr *) ((uint8_t *) m->hdr + offset); + rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len); + + rtattr_append_attribute_internal(rta, type, data, data_length); /* if we are inside containers, extend them */ for (i = 0; i < m->n_containers; i++) - GET_CONTAINER(m, i)->rta_len += message_length - offset; - - /* fill in the attribute */ - rta->rta_type = type; - rta->rta_len = rta_length; - if (data) - /* we don't deal with the case where the user lies about the type - * and gives us too little data (so don't do that) - */ - padding = mempcpy(RTA_DATA(rta), data, data_length); - - else - /* if no data was passed, make sure we still initialize the padding - note that we can have data_length > 0 (used by some containers) */ - padding = RTA_DATA(rta); - - /* make sure also the padding at the end of the message is initialized */ - padding_length = (uint8_t*)m->hdr + message_length - (uint8_t*)padding; - memzero(padding, padding_length); + GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length); /* update message size */ + offset = m->hdr->nlmsg_len; m->hdr->nlmsg_len = message_length; + /* return old message size */ return offset; } diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c index 80725f6cb4..c35d6ba4a5 100644 --- a/src/libsystemd/sd-netlink/netlink-util.c +++ b/src/libsystemd/sd-netlink/netlink-util.c @@ -2,6 +2,7 @@ #include "sd-netlink.h" +#include "memory-util.h" #include "netlink-internal.h" #include "netlink-util.h" #include "strv.h" @@ -178,3 +179,60 @@ int rtnl_log_parse_error(int r) { int rtnl_log_create_error(int r) { return log_error_errno(r, "Failed to create netlink message: %m"); } + +void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) { + size_t padding_length; + char *padding; + + assert(rta); + assert(!data || data_length > 0); + + /* fill in the attribute */ + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(data_length); + if (data) + /* we don't deal with the case where the user lies about the type + * and gives us too little data (so don't do that) + */ + padding = mempcpy(RTA_DATA(rta), data, data_length); + + else + /* if no data was passed, make sure we still initialize the padding + note that we can have data_length > 0 (used by some containers) */ + padding = RTA_DATA(rta); + + /* make sure also the padding at the end of the message is initialized */ + padding_length = (char *) rta + RTA_SPACE(data_length) - padding; + memzero(padding, padding_length); +} + +int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) { + struct rtattr *new_rta, *sub_rta; + size_t message_length; + + assert(rta); + assert(!data || data_length > 0); + + /* get the new message size (with padding at the end) */ + message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length); + + /* buffer should be smaller than both one page or 8K to be accepted by the kernel */ + if (message_length > MIN(page_size(), 8192UL)) + return -ENOBUFS; + + /* realloc to fit the new attribute */ + new_rta = realloc(*rta, message_length); + if (!new_rta) + return -ENOMEM; + *rta = new_rta; + + /* get pointer to the attribute we are about to add */ + sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len)); + + rtattr_append_attribute_internal(sub_rta, type, data, data_length); + + /* update rta_len */ + (*rta)->rta_len = message_length; + + return 0; +} diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h index 8c43fafec7..1782786841 100644 --- a/src/libsystemd/sd-netlink/netlink-util.h +++ b/src/libsystemd/sd-netlink/netlink-util.h @@ -77,3 +77,6 @@ int rtnl_log_create_error(int r); int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data); int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data); + +void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length); +int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length); diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index 73dde11c0e..23d0ee675b 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -1040,7 +1040,7 @@ int link_request_set_routes(Link *link) { for (phase = 0; phase < _PHASE_MAX; phase++) LIST_FOREACH(routes, rt, link->network->static_routes) { - if (in_addr_is_null(rt->family, &rt->gw) != (phase == PHASE_NON_GATEWAY)) + if ((in_addr_is_null(rt->family, &rt->gw) && ordered_set_isempty(rt->multipath_routes)) != (phase == PHASE_NON_GATEWAY)) continue; r = route_configure(rt, link, route_handler); diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf index 06dca18cb7..ecb82c237f 100644 --- a/src/network/networkd-network-gperf.gperf +++ b/src/network/networkd-network-gperf.gperf @@ -147,6 +147,7 @@ Route.InitialAdvertisedReceiveWindow, config_parse_tcp_window, Route.QuickAck, config_parse_quickack, 0, 0 Route.FastOpenNoCookie, config_parse_fast_open_no_cookie, 0, 0 Route.TTLPropagate, config_parse_route_ttl_propagate, 0, 0 +Route.MultiPathRoute, config_parse_multipath_route, 0, 0 NextHop.Id, config_parse_nexthop_id, 0, 0 NextHop.Gateway, config_parse_nexthop_gateway, 0, 0 DHCPv4.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier) diff --git a/src/network/networkd-route.c b/src/network/networkd-route.c index 2a42628e5b..ecc8d219b7 100644 --- a/src/network/networkd-route.c +++ b/src/network/networkd-route.c @@ -144,6 +144,8 @@ void route_free(Route *route) { set_remove(route->link->routes_foreign, route); } + ordered_set_free_free(route->multipath_routes); + sd_event_source_unref(route->expire); free(route); @@ -516,6 +518,88 @@ int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata) { return 1; } +static int append_nexthop_one(Route *route, MultipathRoute *m, struct rtattr **rta, size_t offset) { + struct rtnexthop *rtnh; + struct rtattr *new_rta; + int r; + + assert(route); + assert(m); + assert(rta); + assert(*rta); + + new_rta = realloc(*rta, RTA_ALIGN((*rta)->rta_len) + RTA_SPACE(sizeof(struct rtnexthop))); + if (!new_rta) + return -ENOMEM; + *rta = new_rta; + + rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset); + *rtnh = (struct rtnexthop) { + .rtnh_len = sizeof(*rtnh), + .rtnh_ifindex = m->ifindex, + .rtnh_hops = m->weight > 0 ? m->weight - 1 : 0, + }; + + (*rta)->rta_len += sizeof(struct rtnexthop); + + if (route->family == m->gateway.family) { + r = rtattr_append_attribute(rta, RTA_GATEWAY, &m->gateway.address, FAMILY_ADDRESS_SIZE(m->gateway.family)); + if (r < 0) + goto clear; + rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset); + rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family)); + } else { + r = rtattr_append_attribute(rta, RTA_VIA, &m->gateway, FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family)); + if (r < 0) + goto clear; + rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset); + rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family)); + } + + return 0; + +clear: + (*rta)->rta_len -= sizeof(struct rtnexthop); + return r; +} + +static int append_nexthops(Route *route, sd_netlink_message *req) { + _cleanup_free_ struct rtattr *rta = NULL; + struct rtnexthop *rtnh; + MultipathRoute *m; + size_t offset; + Iterator i; + int r; + + if (ordered_set_isempty(route->multipath_routes)) + return 0; + + rta = new(struct rtattr, 1); + if (!rta) + return -ENOMEM; + + *rta = (struct rtattr) { + .rta_type = RTA_MULTIPATH, + .rta_len = RTA_LENGTH(0), + }; + offset = (uint8_t *) RTA_DATA(rta) - (uint8_t *) rta; + + ORDERED_SET_FOREACH(m, route->multipath_routes, i) { + r = append_nexthop_one(route, m, &rta, offset); + if (r < 0) + return r; + + rtnh = (struct rtnexthop *)((uint8_t *) rta + offset); + offset = (uint8_t *) RTNH_NEXT(rtnh) - (uint8_t *) rta; + } + + r = sd_netlink_message_append_data(req, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta)); + if (r < 0) + return r; + + return 0; +} + int route_configure( Route *route, Link *link, @@ -699,6 +783,10 @@ int route_configure( if (r < 0) return log_link_error_errno(link, r, "Could not append RTA_METRICS attribute: %m"); + r = append_nexthops(route, req); + if (r < 0) + return log_link_error_errno(link, r, "Could not append RTA_MULTIPATH attribute: %m"); + r = netlink_call_async(link->manager->rtnl, NULL, req, callback, link_netlink_destroy_callback, link); if (r < 0) @@ -1480,6 +1568,113 @@ int config_parse_route_ttl_propagate( return 0; } +int config_parse_multipath_route( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(route_free_or_set_invalidp) Route *n = NULL; + _cleanup_free_ char *word = NULL, *buf = NULL; + _cleanup_free_ MultipathRoute *m = NULL; + Network *network = userdata; + const char *p, *ip, *dev; + union in_addr_union a; + int family, r; + + assert(filename); + assert(section); + assert(lvalue); + assert(rvalue); + assert(data); + + r = route_new_static(network, filename, section_line, &n); + if (r < 0) + return r; + + if (isempty(rvalue)) { + n->multipath_routes = ordered_set_free_free(n->multipath_routes); + return 0; + } + + m = new0(MultipathRoute, 1); + if (!m) + return log_oom(); + + p = rvalue; + r = extract_first_word(&p, &word, NULL, 0); + if (r == -ENOMEM) + return log_oom(); + if (r <= 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Invalid multipath route option, ignoring assignment: %s", rvalue); + return 0; + } + + dev = strchr(word, '@'); + if (dev) { + buf = strndup(word, dev - word); + if (!buf) + return log_oom(); + ip = buf; + dev++; + } else + ip = word; + + r = in_addr_from_string_auto(ip, &family, &a); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Invalid multipath route gateway '%s', ignoring assignment: %m", rvalue); + return 0; + } + m->gateway.address = a; + m->gateway.family = family; + + if (dev) { + r = parse_ifindex_or_ifname(dev, &m->ifindex); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Invalid interface name or index, ignoring assignment: %s", dev); + return 0; + } + } + + if (!isempty(p)) { + r = safe_atou32(p, &m->weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Invalid multipath route weight, ignoring assignment: %s", p); + return 0; + } + if (m->weight == 0 || m->weight > 256) { + log_syntax(unit, LOG_ERR, filename, line, 0, + "Invalid multipath route weight, ignoring assignment: %s", p); + return 0; + } + } + + r = ordered_set_ensure_allocated(&n->multipath_routes, NULL); + if (r < 0) + return log_oom(); + + r = ordered_set_put(n->multipath_routes, m); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to store multipath route, ignoring assignment: %m"); + return 0; + } + + TAKE_PTR(m); + TAKE_PTR(n); + return 0; +} + int route_section_verify(Route *route, Network *network) { if (section_is_invalid(route->section)) return -EINVAL; diff --git a/src/network/networkd-route.h b/src/network/networkd-route.h index 89d54020db..91bba368ee 100644 --- a/src/network/networkd-route.h +++ b/src/network/networkd-route.h @@ -10,6 +10,17 @@ typedef struct NetworkConfigSection NetworkConfigSection; #include "networkd-network.h" #include "networkd-util.h" +typedef struct MultipathRouteVia { + uint16_t family; + union in_addr_union address; +} _packed_ MultipathRouteVia; + +typedef struct MultipathRoute { + MultipathRouteVia gateway; + int ifindex; + uint32_t weight; +} MultipathRoute; + struct Route { Network *network; NetworkConfigSection *section; @@ -42,6 +53,7 @@ struct Route { union in_addr_union dst; union in_addr_union src; union in_addr_union prefsrc; + OrderedSet *multipath_routes; usec_t lifetime; sd_event_source *expire; @@ -96,3 +108,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_quickack); CONFIG_PARSER_PROTOTYPE(config_parse_fast_open_no_cookie); CONFIG_PARSER_PROTOTYPE(config_parse_route_ttl_propagate); CONFIG_PARSER_PROTOTYPE(config_parse_route_mtu); +CONFIG_PARSER_PROTOTYPE(config_parse_multipath_route); diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network index c8cb7d52f7..37d31e3a92 100644 --- a/test/fuzz/fuzz-network-parser/directives.network +++ b/test/fuzz/fuzz-network-parser/directives.network @@ -125,6 +125,7 @@ FastOpenNoCookie= Source= Metric= TTLPropagate= +MultiPathRoute= [Network] IPv6DuplicateAddressDetection= IPMasquerade= diff --git a/test/test-network/conf/25-route-static.network b/test/test-network/conf/25-route-static.network index 9c8269e3b8..a4ba77a3e3 100644 --- a/test/test-network/conf/25-route-static.network +++ b/test/test-network/conf/25-route-static.network @@ -12,6 +12,10 @@ IPv4LLRoute=yes Destination=2001:1234:5:8fff:ff:ff:ff:ff/128 Scope=link +[Route] +Destination=2001:1234:5:9fff:ff:ff:ff:ff/128 +Scope=link + [Route] Destination=::/0 Gateway=2001:1234:5:8fff:ff:ff:ff:ff @@ -62,3 +66,18 @@ Destination=149.10.123.3 [Route] Type=multicast Destination=149.10.123.4 + +[Route] +Destination=192.168.10.1/32 +MultiPathRoute=149.10.124.59@dummy98 10 +MultiPathRoute=149.10.124.60@dummy98 5 + +[Route] +Destination=2001:1234:5:7fff:ff:ff:ff:ff/128 +MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10 +MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5 + +[Route] +Destination=192.168.10.2/32 +MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10 +MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5 diff --git a/test/test-network/systemd-networkd-tests.py b/test/test-network/systemd-networkd-tests.py index 3eadb1574d..90b1a8b140 100755 --- a/test/test-network/systemd-networkd-tests.py +++ b/test/test-network/systemd-networkd-tests.py @@ -1812,6 +1812,30 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities): print(output) self.assertRegex(output, 'prohibit 202.54.1.4 proto static') + print('### ip route show 192.168.10.1') + output = check_output('ip route show 192.168.10.1') + print(output) + self.assertRegex(output, '192.168.10.1 proto static') + self.assertRegex(output, 'nexthop via 149.10.124.59 dev dummy98 weight 10') + self.assertRegex(output, 'nexthop via 149.10.124.60 dev dummy98 weight 5') + + print('### ip route show 192.168.10.2') + output = check_output('ip route show 192.168.10.2') + print(output) + # old ip command does not show IPv6 gateways... + self.assertRegex(output, '192.168.10.2 proto static') + self.assertRegex(output, 'nexthop') + self.assertRegex(output, 'dev dummy98 weight 10') + self.assertRegex(output, 'dev dummy98 weight 5') + + print('### ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff') + output = check_output('ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff') + print(output) + # old ip command does not show 'nexthop' keyword and weight... + self.assertRegex(output, '2001:1234:5:7fff:ff:ff:ff:ff') + self.assertRegex(output, 'via 2001:1234:5:8fff:ff:ff:ff:ff dev dummy98') + self.assertRegex(output, 'via 2001:1234:5:9fff:ff:ff:ff:ff dev dummy98') + def test_gateway_reconfigure(self): copy_unit_to_networkd_unit_path('25-gateway-static.network', '12-dummy.netdev') start_networkd()