![Lennart Poettering](/assets/img/avatar_default.png)
If our netlink input buffer overruns the kernel will send us ENOBUFS on the next recvmsg(). Don't consider this a complete failure resulting in closing of the netlink socket. Instead, simply continue (after debug logging). Of course, ideally we'd have a better strategy for this, and would have a way to resync if this happens (as well as a scheme for cancelling all ongoing asynchronous transactions), but for now let's at least not choke fatally, and simply accept that we lost some messages and continue. Note that if we lose messages when synchronously waiting for an operation to complete, we'll still propagate the ENOBUFS up, to make the individual transaction fail. See: #5398 (This bug does not properly fix the issue, hence we should leave the bug open.)
475 lines
14 KiB
C
475 lines
14 KiB
C
/***
|
|
This file is part of systemd.
|
|
|
|
Copyright 2013 Tom Gundersen <teg@jklm.no>
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
|
(at your option) any later version.
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
***/
|
|
|
|
#include <netinet/in.h>
|
|
#include <stdbool.h>
|
|
#include <unistd.h>
|
|
|
|
#include "sd-netlink.h"
|
|
|
|
#include "alloc-util.h"
|
|
#include "format-util.h"
|
|
#include "missing.h"
|
|
#include "netlink-internal.h"
|
|
#include "netlink-types.h"
|
|
#include "netlink-util.h"
|
|
#include "refcnt.h"
|
|
#include "socket-util.h"
|
|
#include "util.h"
|
|
|
|
int socket_open(int family) {
|
|
int fd;
|
|
|
|
fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
|
|
if (fd < 0)
|
|
return -errno;
|
|
|
|
return fd;
|
|
}
|
|
|
|
static int broadcast_groups_get(sd_netlink *nl) {
|
|
_cleanup_free_ uint32_t *groups = NULL;
|
|
socklen_t len = 0, old_len;
|
|
unsigned i, j;
|
|
int r;
|
|
|
|
assert(nl);
|
|
assert(nl->fd >= 0);
|
|
|
|
r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len);
|
|
if (r < 0) {
|
|
if (errno == ENOPROTOOPT) {
|
|
nl->broadcast_group_dont_leave = true;
|
|
return 0;
|
|
} else
|
|
return -errno;
|
|
}
|
|
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
groups = new0(uint32_t, len);
|
|
if (!groups)
|
|
return -ENOMEM;
|
|
|
|
old_len = len;
|
|
|
|
r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len);
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
if (old_len != len)
|
|
return -EIO;
|
|
|
|
r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
for (j = 0; j < sizeof(uint32_t) * 8; j++) {
|
|
uint32_t offset;
|
|
unsigned group;
|
|
|
|
offset = 1U << j;
|
|
|
|
if (!(groups[i] & offset))
|
|
continue;
|
|
|
|
group = i * sizeof(uint32_t) * 8 + j + 1;
|
|
|
|
r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1));
|
|
if (r < 0)
|
|
return r;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int socket_bind(sd_netlink *nl) {
|
|
socklen_t addrlen;
|
|
int r, one = 1;
|
|
|
|
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, &one, sizeof(one));
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
addrlen = sizeof(nl->sockaddr);
|
|
|
|
r = bind(nl->fd, &nl->sockaddr.sa, addrlen);
|
|
/* ignore EINVAL to allow opening an already bound socket */
|
|
if (r < 0 && errno != EINVAL)
|
|
return -errno;
|
|
|
|
r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen);
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
r = broadcast_groups_get(nl);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
|
|
assert(nl);
|
|
|
|
return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
|
|
}
|
|
|
|
static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
|
|
int r;
|
|
|
|
assert(nl);
|
|
|
|
r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
|
|
if (r < 0)
|
|
return r;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int broadcast_group_join(sd_netlink *nl, unsigned group) {
|
|
int r;
|
|
|
|
assert(nl);
|
|
assert(nl->fd >= 0);
|
|
assert(group > 0);
|
|
|
|
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
|
|
unsigned n_ref;
|
|
int r;
|
|
|
|
assert(nl);
|
|
|
|
n_ref = broadcast_group_get_ref(nl, group);
|
|
|
|
n_ref++;
|
|
|
|
r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = broadcast_group_set_ref(nl, group, n_ref);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (n_ref > 1)
|
|
/* not yet in the group */
|
|
return 0;
|
|
|
|
r = broadcast_group_join(nl, group);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
|
|
int r;
|
|
|
|
assert(nl);
|
|
assert(nl->fd >= 0);
|
|
assert(group > 0);
|
|
|
|
if (nl->broadcast_group_dont_leave)
|
|
return 0;
|
|
|
|
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group));
|
|
if (r < 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
|
|
unsigned n_ref;
|
|
int r;
|
|
|
|
assert(nl);
|
|
|
|
n_ref = broadcast_group_get_ref(nl, group);
|
|
|
|
assert(n_ref > 0);
|
|
|
|
n_ref--;
|
|
|
|
r = broadcast_group_set_ref(nl, group, n_ref);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (n_ref > 0)
|
|
/* still refs left */
|
|
return 0;
|
|
|
|
r = broadcast_group_leave(nl, group);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* returns the number of bytes sent, or a negative error code */
|
|
int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
|
|
union {
|
|
struct sockaddr sa;
|
|
struct sockaddr_nl nl;
|
|
} addr = {
|
|
.nl.nl_family = AF_NETLINK,
|
|
};
|
|
ssize_t k;
|
|
|
|
assert(nl);
|
|
assert(m);
|
|
assert(m->hdr);
|
|
|
|
k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len,
|
|
0, &addr.sa, sizeof(addr));
|
|
if (k < 0)
|
|
return -errno;
|
|
|
|
return k;
|
|
}
|
|
|
|
static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) {
|
|
union sockaddr_union sender;
|
|
uint8_t cmsg_buffer[CMSG_SPACE(sizeof(struct nl_pktinfo))];
|
|
struct msghdr msg = {
|
|
.msg_iov = iov,
|
|
.msg_iovlen = 1,
|
|
.msg_name = &sender,
|
|
.msg_namelen = sizeof(sender),
|
|
.msg_control = cmsg_buffer,
|
|
.msg_controllen = sizeof(cmsg_buffer),
|
|
};
|
|
struct cmsghdr *cmsg;
|
|
uint32_t group = 0;
|
|
int r;
|
|
|
|
assert(fd >= 0);
|
|
assert(iov);
|
|
|
|
r = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
|
|
if (r < 0) {
|
|
/* no data */
|
|
if (errno == ENOBUFS)
|
|
log_debug("rtnl: kernel receive buffer overrun");
|
|
else if (errno == EAGAIN)
|
|
log_debug("rtnl: no data in socket");
|
|
|
|
return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
|
|
}
|
|
|
|
if (sender.nl.nl_pid != 0) {
|
|
/* not from the kernel, ignore */
|
|
log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid);
|
|
|
|
if (peek) {
|
|
/* drop the message */
|
|
r = recvmsg(fd, &msg, 0);
|
|
if (r < 0)
|
|
return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
CMSG_FOREACH(cmsg, &msg) {
|
|
if (cmsg->cmsg_level == SOL_NETLINK &&
|
|
cmsg->cmsg_type == NETLINK_PKTINFO &&
|
|
cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) {
|
|
struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg);
|
|
|
|
/* multi-cast group */
|
|
group = pktinfo->group;
|
|
}
|
|
}
|
|
|
|
if (_group)
|
|
*_group = group;
|
|
|
|
return r;
|
|
}
|
|
|
|
/* On success, the number of bytes received is returned and *ret points to the received message
|
|
* which has a valid header and the correct size.
|
|
* If nothing useful was received 0 is returned.
|
|
* On failure, a negative error code is returned.
|
|
*/
|
|
int socket_read_message(sd_netlink *rtnl) {
|
|
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
|
|
struct iovec iov = {};
|
|
uint32_t group = 0;
|
|
bool multi_part = false, done = false;
|
|
struct nlmsghdr *new_msg;
|
|
size_t len;
|
|
int r;
|
|
unsigned i = 0;
|
|
|
|
assert(rtnl);
|
|
assert(rtnl->rbuffer);
|
|
assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));
|
|
|
|
/* read nothing, just get the pending message size */
|
|
r = socket_recv_message(rtnl->fd, &iov, NULL, true);
|
|
if (r <= 0)
|
|
return r;
|
|
else
|
|
len = (size_t)r;
|
|
|
|
/* make room for the pending message */
|
|
if (!greedy_realloc((void **)&rtnl->rbuffer,
|
|
&rtnl->rbuffer_allocated,
|
|
len, sizeof(uint8_t)))
|
|
return -ENOMEM;
|
|
|
|
iov.iov_base = rtnl->rbuffer;
|
|
iov.iov_len = rtnl->rbuffer_allocated;
|
|
|
|
/* read the pending message */
|
|
r = socket_recv_message(rtnl->fd, &iov, &group, false);
|
|
if (r <= 0)
|
|
return r;
|
|
else
|
|
len = (size_t)r;
|
|
|
|
if (len > rtnl->rbuffer_allocated)
|
|
/* message did not fit in read buffer */
|
|
return -EIO;
|
|
|
|
if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
|
|
multi_part = true;
|
|
|
|
for (i = 0; i < rtnl->rqueue_partial_size; i++) {
|
|
if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
|
|
rtnl->rbuffer->nlmsg_seq) {
|
|
first = rtnl->rqueue_partial[i];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
|
|
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
|
|
const NLType *nl_type;
|
|
|
|
if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
|
|
/* not broadcast and not for us */
|
|
continue;
|
|
|
|
if (new_msg->nlmsg_type == NLMSG_NOOP)
|
|
/* silently drop noop messages */
|
|
continue;
|
|
|
|
if (new_msg->nlmsg_type == NLMSG_DONE) {
|
|
/* finished reading multi-part message */
|
|
done = true;
|
|
|
|
/* if first is not defined, put NLMSG_DONE into the receive queue. */
|
|
if (first)
|
|
continue;
|
|
}
|
|
|
|
/* check that we support this message type */
|
|
r = type_system_get_type(&type_system_root, &nl_type, new_msg->nlmsg_type);
|
|
if (r < 0) {
|
|
if (r == -EOPNOTSUPP)
|
|
log_debug("sd-netlink: ignored message with unknown type: %i",
|
|
new_msg->nlmsg_type);
|
|
|
|
continue;
|
|
}
|
|
|
|
/* check that the size matches the message type */
|
|
if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) {
|
|
log_debug("sd-netlink: message larger than expected, dropping");
|
|
continue;
|
|
}
|
|
|
|
r = message_new_empty(rtnl, &m);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
m->broadcast = !!group;
|
|
|
|
m->hdr = memdup(new_msg, new_msg->nlmsg_len);
|
|
if (!m->hdr)
|
|
return -ENOMEM;
|
|
|
|
/* seal and parse the top-level message */
|
|
r = sd_netlink_message_rewind(m);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
/* push the message onto the multi-part message stack */
|
|
if (first)
|
|
m->next = first;
|
|
first = m;
|
|
m = NULL;
|
|
}
|
|
|
|
if (len)
|
|
log_debug("sd-netlink: discarding %zu bytes of incoming message", len);
|
|
|
|
if (!first)
|
|
return 0;
|
|
|
|
if (!multi_part || done) {
|
|
/* we got a complete message, push it on the read queue */
|
|
r = rtnl_rqueue_make_room(rtnl);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
rtnl->rqueue[rtnl->rqueue_size++] = first;
|
|
first = NULL;
|
|
|
|
if (multi_part && (i < rtnl->rqueue_partial_size)) {
|
|
/* remove the message form the partial read queue */
|
|
memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
|
|
sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1));
|
|
rtnl->rqueue_partial_size--;
|
|
}
|
|
|
|
return 1;
|
|
} else {
|
|
/* we only got a partial multi-part message, push it on the
|
|
partial read queue */
|
|
if (i < rtnl->rqueue_partial_size) {
|
|
rtnl->rqueue_partial[i] = first;
|
|
} else {
|
|
r = rtnl_rqueue_partial_make_room(rtnl);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = first;
|
|
}
|
|
first = NULL;
|
|
|
|
return 0;
|
|
}
|
|
}
|