Merge pull request #14194 from yuwata/network-multipath-routing-12541

network: introduce multipath routing
This commit is contained in:
Lennart Poettering 2020-01-03 15:38:03 +01:00 committed by GitHub
commit dc5737470e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 334 additions and 32 deletions

View File

@ -1378,6 +1378,16 @@
service type to CS6 (network control) or CS4 (Realtime). Defaults to CS6.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>MultiPathRoute=<replaceable>address</replaceable>[@<replaceable>name</replaceable>] [<replaceable>weight</replaceable>]</varname></term>
<listitem>
<para>Configures multipath route. Multipath routing is the technique of using multiple
alternative paths through a network. Takes gateway address. Optionally, takes a network
interface name or index separated with <literal>@</literal>, and a weight in 1..256 for
this multipath route separated with whitespace. This setting can be specified multiple
times. If an empty string is assigned, then the all previous assignments are cleared.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -142,11 +142,9 @@ int sd_netlink_message_is_broadcast(const sd_netlink_message *m) {
/* If successful the updated message will be correctly aligned, if
unsuccessful the old message is untouched. */
static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *data, size_t data_length) {
uint32_t rta_length;
size_t message_length, padding_length;
size_t message_length;
struct nlmsghdr *new_hdr;
struct rtattr *rta;
char *padding;
unsigned i;
int offset;
@ -154,16 +152,10 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da
assert(m->hdr);
assert(!m->sealed);
assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
assert(!data || data_length);
/* get offset of the new attribute */
offset = m->hdr->nlmsg_len;
/* get the size of the new rta attribute (with padding at the end) */
rta_length = RTA_LENGTH(data_length);
assert(!data || data_length > 0);
/* get the new message size (with padding at the end) */
message_length = offset + RTA_ALIGN(rta_length);
message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length);
/* buffer should be smaller than both one page or 8K to be accepted by the kernel */
if (message_length > MIN(page_size(), 8192UL))
@ -176,33 +168,19 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da
m->hdr = new_hdr;
/* get pointer to the attribute we are about to add */
rta = (struct rtattr *) ((uint8_t *) m->hdr + offset);
rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len);
rtattr_append_attribute_internal(rta, type, data, data_length);
/* if we are inside containers, extend them */
for (i = 0; i < m->n_containers; i++)
GET_CONTAINER(m, i)->rta_len += message_length - offset;
/* fill in the attribute */
rta->rta_type = type;
rta->rta_len = rta_length;
if (data)
/* we don't deal with the case where the user lies about the type
* and gives us too little data (so don't do that)
*/
padding = mempcpy(RTA_DATA(rta), data, data_length);
else
/* if no data was passed, make sure we still initialize the padding
note that we can have data_length > 0 (used by some containers) */
padding = RTA_DATA(rta);
/* make sure also the padding at the end of the message is initialized */
padding_length = (uint8_t*)m->hdr + message_length - (uint8_t*)padding;
memzero(padding, padding_length);
GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length);
/* update message size */
offset = m->hdr->nlmsg_len;
m->hdr->nlmsg_len = message_length;
/* return old message size */
return offset;
}

View File

@ -2,6 +2,7 @@
#include "sd-netlink.h"
#include "memory-util.h"
#include "netlink-internal.h"
#include "netlink-util.h"
#include "strv.h"
@ -178,3 +179,60 @@ int rtnl_log_parse_error(int r) {
int rtnl_log_create_error(int r) {
return log_error_errno(r, "Failed to create netlink message: %m");
}
void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) {
size_t padding_length;
char *padding;
assert(rta);
assert(!data || data_length > 0);
/* fill in the attribute */
rta->rta_type = type;
rta->rta_len = RTA_LENGTH(data_length);
if (data)
/* we don't deal with the case where the user lies about the type
* and gives us too little data (so don't do that)
*/
padding = mempcpy(RTA_DATA(rta), data, data_length);
else
/* if no data was passed, make sure we still initialize the padding
note that we can have data_length > 0 (used by some containers) */
padding = RTA_DATA(rta);
/* make sure also the padding at the end of the message is initialized */
padding_length = (char *) rta + RTA_SPACE(data_length) - padding;
memzero(padding, padding_length);
}
int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) {
struct rtattr *new_rta, *sub_rta;
size_t message_length;
assert(rta);
assert(!data || data_length > 0);
/* get the new message size (with padding at the end) */
message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length);
/* buffer should be smaller than both one page or 8K to be accepted by the kernel */
if (message_length > MIN(page_size(), 8192UL))
return -ENOBUFS;
/* realloc to fit the new attribute */
new_rta = realloc(*rta, message_length);
if (!new_rta)
return -ENOMEM;
*rta = new_rta;
/* get pointer to the attribute we are about to add */
sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len));
rtattr_append_attribute_internal(sub_rta, type, data, data_length);
/* update rta_len */
(*rta)->rta_len = message_length;
return 0;
}

View File

@ -77,3 +77,6 @@ int rtnl_log_create_error(int r);
int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data);
int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data);
void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length);
int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length);

View File

@ -1040,7 +1040,7 @@ int link_request_set_routes(Link *link) {
for (phase = 0; phase < _PHASE_MAX; phase++)
LIST_FOREACH(routes, rt, link->network->static_routes) {
if (in_addr_is_null(rt->family, &rt->gw) != (phase == PHASE_NON_GATEWAY))
if ((in_addr_is_null(rt->family, &rt->gw) && ordered_set_isempty(rt->multipath_routes)) != (phase == PHASE_NON_GATEWAY))
continue;
r = route_configure(rt, link, route_handler);

View File

@ -147,6 +147,7 @@ Route.InitialAdvertisedReceiveWindow, config_parse_tcp_window,
Route.QuickAck, config_parse_quickack, 0, 0
Route.FastOpenNoCookie, config_parse_fast_open_no_cookie, 0, 0
Route.TTLPropagate, config_parse_route_ttl_propagate, 0, 0
Route.MultiPathRoute, config_parse_multipath_route, 0, 0
NextHop.Id, config_parse_nexthop_id, 0, 0
NextHop.Gateway, config_parse_nexthop_gateway, 0, 0
DHCPv4.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)

View File

@ -144,6 +144,8 @@ void route_free(Route *route) {
set_remove(route->link->routes_foreign, route);
}
ordered_set_free_free(route->multipath_routes);
sd_event_source_unref(route->expire);
free(route);
@ -516,6 +518,88 @@ int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata) {
return 1;
}
static int append_nexthop_one(Route *route, MultipathRoute *m, struct rtattr **rta, size_t offset) {
struct rtnexthop *rtnh;
struct rtattr *new_rta;
int r;
assert(route);
assert(m);
assert(rta);
assert(*rta);
new_rta = realloc(*rta, RTA_ALIGN((*rta)->rta_len) + RTA_SPACE(sizeof(struct rtnexthop)));
if (!new_rta)
return -ENOMEM;
*rta = new_rta;
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
*rtnh = (struct rtnexthop) {
.rtnh_len = sizeof(*rtnh),
.rtnh_ifindex = m->ifindex,
.rtnh_hops = m->weight > 0 ? m->weight - 1 : 0,
};
(*rta)->rta_len += sizeof(struct rtnexthop);
if (route->family == m->gateway.family) {
r = rtattr_append_attribute(rta, RTA_GATEWAY, &m->gateway.address, FAMILY_ADDRESS_SIZE(m->gateway.family));
if (r < 0)
goto clear;
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family));
} else {
r = rtattr_append_attribute(rta, RTA_VIA, &m->gateway, FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
if (r < 0)
goto clear;
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
}
return 0;
clear:
(*rta)->rta_len -= sizeof(struct rtnexthop);
return r;
}
static int append_nexthops(Route *route, sd_netlink_message *req) {
_cleanup_free_ struct rtattr *rta = NULL;
struct rtnexthop *rtnh;
MultipathRoute *m;
size_t offset;
Iterator i;
int r;
if (ordered_set_isempty(route->multipath_routes))
return 0;
rta = new(struct rtattr, 1);
if (!rta)
return -ENOMEM;
*rta = (struct rtattr) {
.rta_type = RTA_MULTIPATH,
.rta_len = RTA_LENGTH(0),
};
offset = (uint8_t *) RTA_DATA(rta) - (uint8_t *) rta;
ORDERED_SET_FOREACH(m, route->multipath_routes, i) {
r = append_nexthop_one(route, m, &rta, offset);
if (r < 0)
return r;
rtnh = (struct rtnexthop *)((uint8_t *) rta + offset);
offset = (uint8_t *) RTNH_NEXT(rtnh) - (uint8_t *) rta;
}
r = sd_netlink_message_append_data(req, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta));
if (r < 0)
return r;
return 0;
}
int route_configure(
Route *route,
Link *link,
@ -699,6 +783,10 @@ int route_configure(
if (r < 0)
return log_link_error_errno(link, r, "Could not append RTA_METRICS attribute: %m");
r = append_nexthops(route, req);
if (r < 0)
return log_link_error_errno(link, r, "Could not append RTA_MULTIPATH attribute: %m");
r = netlink_call_async(link->manager->rtnl, NULL, req, callback,
link_netlink_destroy_callback, link);
if (r < 0)
@ -1480,6 +1568,113 @@ int config_parse_route_ttl_propagate(
return 0;
}
int config_parse_multipath_route(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(route_free_or_set_invalidp) Route *n = NULL;
_cleanup_free_ char *word = NULL, *buf = NULL;
_cleanup_free_ MultipathRoute *m = NULL;
Network *network = userdata;
const char *p, *ip, *dev;
union in_addr_union a;
int family, r;
assert(filename);
assert(section);
assert(lvalue);
assert(rvalue);
assert(data);
r = route_new_static(network, filename, section_line, &n);
if (r < 0)
return r;
if (isempty(rvalue)) {
n->multipath_routes = ordered_set_free_free(n->multipath_routes);
return 0;
}
m = new0(MultipathRoute, 1);
if (!m)
return log_oom();
p = rvalue;
r = extract_first_word(&p, &word, NULL, 0);
if (r == -ENOMEM)
return log_oom();
if (r <= 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Invalid multipath route option, ignoring assignment: %s", rvalue);
return 0;
}
dev = strchr(word, '@');
if (dev) {
buf = strndup(word, dev - word);
if (!buf)
return log_oom();
ip = buf;
dev++;
} else
ip = word;
r = in_addr_from_string_auto(ip, &family, &a);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Invalid multipath route gateway '%s', ignoring assignment: %m", rvalue);
return 0;
}
m->gateway.address = a;
m->gateway.family = family;
if (dev) {
r = parse_ifindex_or_ifname(dev, &m->ifindex);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Invalid interface name or index, ignoring assignment: %s", dev);
return 0;
}
}
if (!isempty(p)) {
r = safe_atou32(p, &m->weight);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Invalid multipath route weight, ignoring assignment: %s", p);
return 0;
}
if (m->weight == 0 || m->weight > 256) {
log_syntax(unit, LOG_ERR, filename, line, 0,
"Invalid multipath route weight, ignoring assignment: %s", p);
return 0;
}
}
r = ordered_set_ensure_allocated(&n->multipath_routes, NULL);
if (r < 0)
return log_oom();
r = ordered_set_put(n->multipath_routes, m);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to store multipath route, ignoring assignment: %m");
return 0;
}
TAKE_PTR(m);
TAKE_PTR(n);
return 0;
}
int route_section_verify(Route *route, Network *network) {
if (section_is_invalid(route->section))
return -EINVAL;

View File

@ -10,6 +10,17 @@ typedef struct NetworkConfigSection NetworkConfigSection;
#include "networkd-network.h"
#include "networkd-util.h"
typedef struct MultipathRouteVia {
uint16_t family;
union in_addr_union address;
} _packed_ MultipathRouteVia;
typedef struct MultipathRoute {
MultipathRouteVia gateway;
int ifindex;
uint32_t weight;
} MultipathRoute;
struct Route {
Network *network;
NetworkConfigSection *section;
@ -42,6 +53,7 @@ struct Route {
union in_addr_union dst;
union in_addr_union src;
union in_addr_union prefsrc;
OrderedSet *multipath_routes;
usec_t lifetime;
sd_event_source *expire;
@ -96,3 +108,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_quickack);
CONFIG_PARSER_PROTOTYPE(config_parse_fast_open_no_cookie);
CONFIG_PARSER_PROTOTYPE(config_parse_route_ttl_propagate);
CONFIG_PARSER_PROTOTYPE(config_parse_route_mtu);
CONFIG_PARSER_PROTOTYPE(config_parse_multipath_route);

View File

@ -125,6 +125,7 @@ FastOpenNoCookie=
Source=
Metric=
TTLPropagate=
MultiPathRoute=
[Network]
IPv6DuplicateAddressDetection=
IPMasquerade=

View File

@ -12,6 +12,10 @@ IPv4LLRoute=yes
Destination=2001:1234:5:8fff:ff:ff:ff:ff/128
Scope=link
[Route]
Destination=2001:1234:5:9fff:ff:ff:ff:ff/128
Scope=link
[Route]
Destination=::/0
Gateway=2001:1234:5:8fff:ff:ff:ff:ff
@ -62,3 +66,18 @@ Destination=149.10.123.3
[Route]
Type=multicast
Destination=149.10.123.4
[Route]
Destination=192.168.10.1/32
MultiPathRoute=149.10.124.59@dummy98 10
MultiPathRoute=149.10.124.60@dummy98 5
[Route]
Destination=2001:1234:5:7fff:ff:ff:ff:ff/128
MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10
MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5
[Route]
Destination=192.168.10.2/32
MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10
MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5

View File

@ -1812,6 +1812,30 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
print(output)
self.assertRegex(output, 'prohibit 202.54.1.4 proto static')
print('### ip route show 192.168.10.1')
output = check_output('ip route show 192.168.10.1')
print(output)
self.assertRegex(output, '192.168.10.1 proto static')
self.assertRegex(output, 'nexthop via 149.10.124.59 dev dummy98 weight 10')
self.assertRegex(output, 'nexthop via 149.10.124.60 dev dummy98 weight 5')
print('### ip route show 192.168.10.2')
output = check_output('ip route show 192.168.10.2')
print(output)
# old ip command does not show IPv6 gateways...
self.assertRegex(output, '192.168.10.2 proto static')
self.assertRegex(output, 'nexthop')
self.assertRegex(output, 'dev dummy98 weight 10')
self.assertRegex(output, 'dev dummy98 weight 5')
print('### ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff')
output = check_output('ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff')
print(output)
# old ip command does not show 'nexthop' keyword and weight...
self.assertRegex(output, '2001:1234:5:7fff:ff:ff:ff:ff')
self.assertRegex(output, 'via 2001:1234:5:8fff:ff:ff:ff:ff dev dummy98')
self.assertRegex(output, 'via 2001:1234:5:9fff:ff:ff:ff:ff dev dummy98')
def test_gateway_reconfigure(self):
copy_unit_to_networkd_unit_path('25-gateway-static.network', '12-dummy.netdev')
start_networkd()