Systemd/src/libsystemd/sd-netlink/netlink-socket.c
Lennart Poettering 71994cff31 sd-netlink: don't give up on netlink on ENOBUFS
If our netlink input buffer overruns the kernel will send us ENOBUFS on
the next recvmsg(). Don't consider this a complete failure resulting in
closing of the netlink socket. Instead, simply continue (after debug
logging).

Of course, ideally we'd have a better strategy for this, and would have
a way to resync if this happens (as well as a scheme for cancelling all
ongoing asynchronous transactions), but for now let's at least not choke
fatally, and simply accept that we lost some messages and continue.

Note that if we lose messages when synchronously waiting for an
operation to complete, we'll still propagate the ENOBUFS up, to make the
individual transaction fail.

See: #5398

(This bug does not properly fix the issue, hence we should leave the bug
open.)
2017-02-21 21:41:32 +01:00

475 lines
14 KiB
C

/***
This file is part of systemd.
Copyright 2013 Tom Gundersen <teg@jklm.no>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <netinet/in.h>
#include <stdbool.h>
#include <unistd.h>
#include "sd-netlink.h"
#include "alloc-util.h"
#include "format-util.h"
#include "missing.h"
#include "netlink-internal.h"
#include "netlink-types.h"
#include "netlink-util.h"
#include "refcnt.h"
#include "socket-util.h"
#include "util.h"
int socket_open(int family) {
int fd;
fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
if (fd < 0)
return -errno;
return fd;
}
static int broadcast_groups_get(sd_netlink *nl) {
_cleanup_free_ uint32_t *groups = NULL;
socklen_t len = 0, old_len;
unsigned i, j;
int r;
assert(nl);
assert(nl->fd >= 0);
r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len);
if (r < 0) {
if (errno == ENOPROTOOPT) {
nl->broadcast_group_dont_leave = true;
return 0;
} else
return -errno;
}
if (len == 0)
return 0;
groups = new0(uint32_t, len);
if (!groups)
return -ENOMEM;
old_len = len;
r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len);
if (r < 0)
return -errno;
if (old_len != len)
return -EIO;
r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
if (r < 0)
return r;
for (i = 0; i < len; i++) {
for (j = 0; j < sizeof(uint32_t) * 8; j++) {
uint32_t offset;
unsigned group;
offset = 1U << j;
if (!(groups[i] & offset))
continue;
group = i * sizeof(uint32_t) * 8 + j + 1;
r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1));
if (r < 0)
return r;
}
}
return 0;
}
int socket_bind(sd_netlink *nl) {
socklen_t addrlen;
int r, one = 1;
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, &one, sizeof(one));
if (r < 0)
return -errno;
addrlen = sizeof(nl->sockaddr);
r = bind(nl->fd, &nl->sockaddr.sa, addrlen);
/* ignore EINVAL to allow opening an already bound socket */
if (r < 0 && errno != EINVAL)
return -errno;
r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen);
if (r < 0)
return -errno;
r = broadcast_groups_get(nl);
if (r < 0)
return r;
return 0;
}
static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
assert(nl);
return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
}
static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
int r;
assert(nl);
r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
if (r < 0)
return r;
return 0;
}
static int broadcast_group_join(sd_netlink *nl, unsigned group) {
int r;
assert(nl);
assert(nl->fd >= 0);
assert(group > 0);
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
if (r < 0)
return -errno;
return 0;
}
int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
unsigned n_ref;
int r;
assert(nl);
n_ref = broadcast_group_get_ref(nl, group);
n_ref++;
r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
if (r < 0)
return r;
r = broadcast_group_set_ref(nl, group, n_ref);
if (r < 0)
return r;
if (n_ref > 1)
/* not yet in the group */
return 0;
r = broadcast_group_join(nl, group);
if (r < 0)
return r;
return 0;
}
static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
int r;
assert(nl);
assert(nl->fd >= 0);
assert(group > 0);
if (nl->broadcast_group_dont_leave)
return 0;
r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group));
if (r < 0)
return -errno;
return 0;
}
int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
unsigned n_ref;
int r;
assert(nl);
n_ref = broadcast_group_get_ref(nl, group);
assert(n_ref > 0);
n_ref--;
r = broadcast_group_set_ref(nl, group, n_ref);
if (r < 0)
return r;
if (n_ref > 0)
/* still refs left */
return 0;
r = broadcast_group_leave(nl, group);
if (r < 0)
return r;
return 0;
}
/* returns the number of bytes sent, or a negative error code */
int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
union {
struct sockaddr sa;
struct sockaddr_nl nl;
} addr = {
.nl.nl_family = AF_NETLINK,
};
ssize_t k;
assert(nl);
assert(m);
assert(m->hdr);
k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len,
0, &addr.sa, sizeof(addr));
if (k < 0)
return -errno;
return k;
}
static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) {
union sockaddr_union sender;
uint8_t cmsg_buffer[CMSG_SPACE(sizeof(struct nl_pktinfo))];
struct msghdr msg = {
.msg_iov = iov,
.msg_iovlen = 1,
.msg_name = &sender,
.msg_namelen = sizeof(sender),
.msg_control = cmsg_buffer,
.msg_controllen = sizeof(cmsg_buffer),
};
struct cmsghdr *cmsg;
uint32_t group = 0;
int r;
assert(fd >= 0);
assert(iov);
r = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
if (r < 0) {
/* no data */
if (errno == ENOBUFS)
log_debug("rtnl: kernel receive buffer overrun");
else if (errno == EAGAIN)
log_debug("rtnl: no data in socket");
return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
}
if (sender.nl.nl_pid != 0) {
/* not from the kernel, ignore */
log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid);
if (peek) {
/* drop the message */
r = recvmsg(fd, &msg, 0);
if (r < 0)
return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
}
return 0;
}
CMSG_FOREACH(cmsg, &msg) {
if (cmsg->cmsg_level == SOL_NETLINK &&
cmsg->cmsg_type == NETLINK_PKTINFO &&
cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) {
struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg);
/* multi-cast group */
group = pktinfo->group;
}
}
if (_group)
*_group = group;
return r;
}
/* On success, the number of bytes received is returned and *ret points to the received message
* which has a valid header and the correct size.
* If nothing useful was received 0 is returned.
* On failure, a negative error code is returned.
*/
int socket_read_message(sd_netlink *rtnl) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
struct iovec iov = {};
uint32_t group = 0;
bool multi_part = false, done = false;
struct nlmsghdr *new_msg;
size_t len;
int r;
unsigned i = 0;
assert(rtnl);
assert(rtnl->rbuffer);
assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));
/* read nothing, just get the pending message size */
r = socket_recv_message(rtnl->fd, &iov, NULL, true);
if (r <= 0)
return r;
else
len = (size_t)r;
/* make room for the pending message */
if (!greedy_realloc((void **)&rtnl->rbuffer,
&rtnl->rbuffer_allocated,
len, sizeof(uint8_t)))
return -ENOMEM;
iov.iov_base = rtnl->rbuffer;
iov.iov_len = rtnl->rbuffer_allocated;
/* read the pending message */
r = socket_recv_message(rtnl->fd, &iov, &group, false);
if (r <= 0)
return r;
else
len = (size_t)r;
if (len > rtnl->rbuffer_allocated)
/* message did not fit in read buffer */
return -EIO;
if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
multi_part = true;
for (i = 0; i < rtnl->rqueue_partial_size; i++) {
if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
rtnl->rbuffer->nlmsg_seq) {
first = rtnl->rqueue_partial[i];
break;
}
}
}
for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
const NLType *nl_type;
if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
/* not broadcast and not for us */
continue;
if (new_msg->nlmsg_type == NLMSG_NOOP)
/* silently drop noop messages */
continue;
if (new_msg->nlmsg_type == NLMSG_DONE) {
/* finished reading multi-part message */
done = true;
/* if first is not defined, put NLMSG_DONE into the receive queue. */
if (first)
continue;
}
/* check that we support this message type */
r = type_system_get_type(&type_system_root, &nl_type, new_msg->nlmsg_type);
if (r < 0) {
if (r == -EOPNOTSUPP)
log_debug("sd-netlink: ignored message with unknown type: %i",
new_msg->nlmsg_type);
continue;
}
/* check that the size matches the message type */
if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) {
log_debug("sd-netlink: message larger than expected, dropping");
continue;
}
r = message_new_empty(rtnl, &m);
if (r < 0)
return r;
m->broadcast = !!group;
m->hdr = memdup(new_msg, new_msg->nlmsg_len);
if (!m->hdr)
return -ENOMEM;
/* seal and parse the top-level message */
r = sd_netlink_message_rewind(m);
if (r < 0)
return r;
/* push the message onto the multi-part message stack */
if (first)
m->next = first;
first = m;
m = NULL;
}
if (len)
log_debug("sd-netlink: discarding %zu bytes of incoming message", len);
if (!first)
return 0;
if (!multi_part || done) {
/* we got a complete message, push it on the read queue */
r = rtnl_rqueue_make_room(rtnl);
if (r < 0)
return r;
rtnl->rqueue[rtnl->rqueue_size++] = first;
first = NULL;
if (multi_part && (i < rtnl->rqueue_partial_size)) {
/* remove the message form the partial read queue */
memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1));
rtnl->rqueue_partial_size--;
}
return 1;
} else {
/* we only got a partial multi-part message, push it on the
partial read queue */
if (i < rtnl->rqueue_partial_size) {
rtnl->rqueue_partial[i] = first;
} else {
r = rtnl_rqueue_partial_make_room(rtnl);
if (r < 0)
return r;
rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = first;
}
first = NULL;
return 0;
}
}