From 47ed20e1e08d800e722b05a3fb33ba6be4b48afc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Jun 2020 11:55:14 +0200 Subject: [PATCH 01/11] firewall-util: reject NULL source or address with prefixlen 0 Make sure we don't add masquerading rules without a explicitly specified network range we should be masquerading for. The only caller aside from test case is networkd-address.c which never passes a NULL source. As it also passes the network prefix, that should always be > 0 as well. This causes expected test failure: Failed to modify firewall: Invalid argument Failed to modify firewall: Invalid argument Failed to modify firewall: Invalid argument Failed to modify firewall: Protocol not available Failed to modify firewall: Protocol not available Failed to modify firewall: Protocol not available Failed to modify firewall: Protocol not available The failing test cases are amended to expect failure on NULL source or prefix instead of success. --- src/shared/firewall-util.c | 3 +++ src/test/test-firewall-util.c | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index 974803903d..df020ba7a2 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -98,6 +98,9 @@ int fw_add_masquerade( if (af != AF_INET) return -EOPNOTSUPP; + if (!source || source_prefixlen == 0) + return -EINVAL; + h = iptc_init("nat"); if (!h) return -errno; diff --git a/src/test/test-firewall-util.c b/src/test/test-firewall-util.c index 25c5a6cbf5..f223c0a4d9 100644 --- a/src/test/test-firewall-util.c +++ b/src/test/test-firewall-util.c @@ -9,16 +9,30 @@ int main(int argc, char *argv[]) { int r; test_setup_logging(LOG_DEBUG); + uint8_t prefixlen = 32; r = fw_add_masquerade(true, AF_INET, NULL, 0); + if (r == 0) + log_error("Expected failure: NULL source"); + + r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,0), 0); + if (r == 0) + log_error("Expected failure: 0 prefixlen"); + + r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), prefixlen); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_masquerade(true, AF_INET, NULL, 0); + prefixlen = 28; + r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_masquerade(false, AF_INET, NULL, 0); + r = fw_add_masquerade(false, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); + if (r < 0) + log_error_errno(r, "Failed to modify firewall: %m"); + + r = fw_add_masquerade(false, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), 32); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); From 3122097217a6ea44709c6ec053e57097df921331 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Jun 2020 15:00:54 +0200 Subject: [PATCH 02/11] firewall-util: prepare for alternative to iptables backend In a nutshell: 1. git mv firewall-util.c firewall-util-iptables.c 2. existing external functions gain _iptables_ in their names 3. firewall-util.c provides old function names 4. build system always compiles firewall-util.c, firewall-util-iptables.c is conditional instead (libiptc). 5. On first call to any of the 'old' API functions performs a probe that should return the preferred backend. In a future step, can add firewall-util-FOOTYPE.c, add its probe function to firewall-util.c and then have calls to fw_add_masq/local_dnat handed to the detected backend. For now, only iptables backend exists, and no special probing takes place for it, i.e. when systemd was built with iptables, that will be used. If not, requets to add masquerade/dnat will fail with same error (-EOPNOTSUPP) as before this change. For reference, the rules added by the libiptc/iptables backend look like this: for service export (via systemd-nspawn): [0:0] -A PREROUTING -p tcp -m tcp --dport $exportedport -m addrtype --dst-type LOCAL -j DNAT --to-destination $containerip:$port [0:0] -A OUTPUT ! -d 127.0.0.0/8 -p tcp -m tcp --dport $exportedport -m addrtype --dst-type LOCAL -j DNAT --to-destination $containerip:$port for ip masquerade: [0:0] -A POSTROUTING -s network/prefix -j MASQUERADE --- src/shared/firewall-util-iptables.c | 350 ++++++++++++++++++++++++++++ src/shared/firewall-util-private.h | 25 ++ src/shared/firewall-util.c | 342 +++------------------------ src/shared/firewall-util.h | 25 -- src/shared/meson.build | 4 +- 5 files changed, 410 insertions(+), 336 deletions(-) create mode 100644 src/shared/firewall-util-iptables.c create mode 100644 src/shared/firewall-util-private.h diff --git a/src/shared/firewall-util-iptables.c b/src/shared/firewall-util-iptables.c new file mode 100644 index 0000000000..ab3438843b --- /dev/null +++ b/src/shared/firewall-util-iptables.c @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +/* Temporary work-around for broken glibc vs. linux kernel header definitions + * This is already fixed upstream, remove this when distributions have updated. + */ +#define _NET_IF_H 1 + +#include +#include +#include +#include +#include +#include +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "firewall-util.h" +#include "firewall-util-private.h" +#include "in-addr-util.h" +#include "macro.h" +#include "socket-util.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free); + +static int entry_fill_basics( + struct ipt_entry *entry, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + + assert(entry); + + if (out_interface && !ifname_valid(out_interface)) + return -EINVAL; + if (in_interface && !ifname_valid(in_interface)) + return -EINVAL; + + entry->ip.proto = protocol; + + if (in_interface) { + size_t l; + + l = strlen(in_interface); + assert(l < sizeof entry->ip.iniface); + assert(l < sizeof entry->ip.iniface_mask); + + strcpy(entry->ip.iniface, in_interface); + memset(entry->ip.iniface_mask, 0xFF, l + 1); + } + if (source) { + entry->ip.src = source->in; + in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen); + } + + if (out_interface) { + size_t l = strlen(out_interface); + assert(l < sizeof entry->ip.outiface); + assert(l < sizeof entry->ip.outiface_mask); + + strcpy(entry->ip.outiface, out_interface); + memset(entry->ip.outiface_mask, 0xFF, l + 1); + } + if (destination) { + entry->ip.dst = destination->in; + in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen); + } + + return 0; +} + +int fw_iptables_add_masquerade( + bool add, + int af, + const union in_addr_union *source, + unsigned source_prefixlen) { + + static const xt_chainlabel chain = "POSTROUTING"; + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + size_t sz; + struct nf_nat_ipv4_multi_range_compat *mr; + int r, protocol = 0; + const char *out_interface = NULL; + const union in_addr_union *destination = NULL; + unsigned destination_prefixlen = 0; + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!source || source_prefixlen == 0) + return -EINVAL; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + /* Put together the entry we want to add or remove */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry)); + r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + + /* Create a search mask entry */ + mask = alloca(sz); + memset(mask, 0xFF, sz); + + if (add) { + if (iptc_check_entry(chain, entry, (unsigned char*) mask, h)) + return 0; + if (errno != ENOENT) /* if other error than not existing yet, fail */ + return -errno; + + if (!iptc_insert_entry(chain, entry, 0, h)) + return -errno; + } else { + if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) { + if (errno == ENOENT) /* if it's already gone, all is good! */ + return 0; + + return -errno; + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} + +int fw_iptables_add_local_dnat( + bool add, + int af, + int protocol, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + + static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT"; + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + struct ipt_entry_match *m; + struct xt_addrtype_info_v1 *at; + struct nf_nat_ipv4_multi_range_compat *mr; + size_t sz, msz; + int r; + const char *in_interface = NULL; + const union in_addr_union *source = NULL; + unsigned source_prefixlen = 0; + const union in_addr_union *destination = NULL; + unsigned destination_prefixlen = 0; + + assert(add || !previous_remote); + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP)) + return -EOPNOTSUPP; + + if (local_port <= 0) + return -EINVAL; + + if (remote_port <= 0) + return -EINVAL; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + if (protocol == IPPROTO_TCP) + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_tcp)); + else + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_udp)); + + sz += msz; + + /* Fill in basic part */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = + XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + msz; + r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in first match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry))); + m->u.match_size = msz; + if (protocol == IPPROTO_TCP) { + struct xt_tcp *tcp; + + strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name)); + tcp = (struct xt_tcp*) m->data; + tcp->dpts[0] = tcp->dpts[1] = local_port; + tcp->spts[0] = 0; + tcp->spts[1] = 0xFFFF; + + } else { + struct xt_udp *udp; + + strncpy(m->u.user.name, "udp", sizeof(m->u.user.name)); + udp = (struct xt_udp*) m->data; + udp->dpts[0] = udp->dpts[1] = local_port; + udp->spts[0] = 0; + udp->spts[1] = 0xFFFF; + } + + /* Fill in second match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz); + m->u.match_size = + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)); + strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name)); + m->u.user.revision = 1; + at = (struct xt_addrtype_info_v1*) m->data; + at->dest = XT_ADDRTYPE_LOCAL; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS; + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + if (protocol == IPPROTO_TCP) + mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port); + else + mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port); + + mask = alloca0(sz); + memset(mask, 0xFF, sz); + + if (add) { + /* Add the PREROUTING rule, if it is missing so far */ + if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -EINVAL; + + if (!iptc_insert_entry(chain_pre, entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + } + + /* Add the OUTPUT rule, if it is missing so far */ + if (!in_interface) { + + /* Don't apply onto loopback addresses */ + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + + if (!iptc_insert_entry(chain_output, entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + } else { + if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + if (!in_interface) { + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} diff --git a/src/shared/firewall-util-private.h b/src/shared/firewall-util-private.h new file mode 100644 index 0000000000..d7cb19353d --- /dev/null +++ b/src/shared/firewall-util-private.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include +#include + +#include "in-addr-util.h" + +#if HAVE_LIBIPTC + +int fw_iptables_add_masquerade( + bool add, + int af, + const union in_addr_union *source, + unsigned source_prefixlen); + +int fw_iptables_add_local_dnat( + bool add, + int af, + int protocol, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote); +#endif diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index df020ba7a2..107056514e 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -1,81 +1,28 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -/* Temporary work-around for broken glibc vs. linux kernel header definitions - * This is already fixed upstream, remove this when distributions have updated. - */ -#define _NET_IF_H 1 - -#include -#include #include #include #include -#include -#ifndef IFNAMSIZ -#define IFNAMSIZ 16 -#endif -#include -#include -#include -#include -#include #include "alloc-util.h" #include "firewall-util.h" -#include "in-addr-util.h" -#include "macro.h" -#include "socket-util.h" +#include "firewall-util-private.h" -DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free); +enum FirewallBackend { + FW_BACKEND_NONE, +#if HAVE_LIBIPTC + FW_BACKEND_IPTABLES, +#endif +}; -static int entry_fill_basics( - struct ipt_entry *entry, - int protocol, - const char *in_interface, - const union in_addr_union *source, - unsigned source_prefixlen, - const char *out_interface, - const union in_addr_union *destination, - unsigned destination_prefixlen) { +static enum FirewallBackend FirewallBackend; - assert(entry); - - if (out_interface && !ifname_valid(out_interface)) - return -EINVAL; - if (in_interface && !ifname_valid(in_interface)) - return -EINVAL; - - entry->ip.proto = protocol; - - if (in_interface) { - size_t l; - - l = strlen(in_interface); - assert(l < sizeof entry->ip.iniface); - assert(l < sizeof entry->ip.iniface_mask); - - strcpy(entry->ip.iniface, in_interface); - memset(entry->ip.iniface_mask, 0xFF, l + 1); - } - if (source) { - entry->ip.src = source->in; - in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen); - } - - if (out_interface) { - size_t l = strlen(out_interface); - assert(l < sizeof entry->ip.outiface); - assert(l < sizeof entry->ip.outiface_mask); - - strcpy(entry->ip.outiface, out_interface); - memset(entry->ip.outiface_mask, 0xFF, l + 1); - } - if (destination) { - entry->ip.dst = destination->in; - in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen); - } - - return 0; +static enum FirewallBackend firewall_backend_probe(void) { +#if HAVE_LIBIPTC + return FW_BACKEND_IPTABLES; +#else + return FW_BACKEND_NONE; +#endif } int fw_add_masquerade( @@ -84,73 +31,19 @@ int fw_add_masquerade( const union in_addr_union *source, unsigned source_prefixlen) { - static const xt_chainlabel chain = "POSTROUTING"; - _cleanup_(iptc_freep) struct xtc_handle *h = NULL; - struct ipt_entry *entry, *mask; - struct ipt_entry_target *t; - size_t sz; - struct nf_nat_ipv4_multi_range_compat *mr; - int r, protocol = 0; - const char *out_interface = NULL; - const union in_addr_union *destination = NULL; - unsigned destination_prefixlen = 0; + if (FirewallBackend == FW_BACKEND_NONE) + FirewallBackend = firewall_backend_probe(); - if (af != AF_INET) + switch (FirewallBackend) { + case FW_BACKEND_NONE: return -EOPNOTSUPP; - - if (!source || source_prefixlen == 0) - return -EINVAL; - - h = iptc_init("nat"); - if (!h) - return -errno; - - sz = XT_ALIGN(sizeof(struct ipt_entry)) + - XT_ALIGN(sizeof(struct ipt_entry_target)) + - XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); - - /* Put together the entry we want to add or remove */ - entry = alloca0(sz); - entry->next_offset = sz; - entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry)); - r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen); - if (r < 0) - return r; - - /* Fill in target part */ - t = ipt_get_target(entry); - t->u.target_size = - XT_ALIGN(sizeof(struct ipt_entry_target)) + - XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); - strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name)); - mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; - mr->rangesize = 1; - - /* Create a search mask entry */ - mask = alloca(sz); - memset(mask, 0xFF, sz); - - if (add) { - if (iptc_check_entry(chain, entry, (unsigned char*) mask, h)) - return 0; - if (errno != ENOENT) /* if other error than not existing yet, fail */ - return -errno; - - if (!iptc_insert_entry(chain, entry, 0, h)) - return -errno; - } else { - if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) { - if (errno == ENOENT) /* if it's already gone, all is good! */ - return 0; - - return -errno; - } +#if HAVE_LIBIPTC + case FW_BACKEND_IPTABLES: + return fw_iptables_add_masquerade(add, af, source, source_prefixlen); +#endif } - if (!iptc_commit(h)) - return -errno; - - return 0; + return -EOPNOTSUPP; } int fw_add_local_dnat( @@ -162,188 +55,17 @@ int fw_add_local_dnat( uint16_t remote_port, const union in_addr_union *previous_remote) { - static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT"; - _cleanup_(iptc_freep) struct xtc_handle *h = NULL; - struct ipt_entry *entry, *mask; - struct ipt_entry_target *t; - struct ipt_entry_match *m; - struct xt_addrtype_info_v1 *at; - struct nf_nat_ipv4_multi_range_compat *mr; - size_t sz, msz; - int r; - const char *in_interface = NULL; - const union in_addr_union *source = NULL; - unsigned source_prefixlen = 0; - const union in_addr_union *destination = NULL; - unsigned destination_prefixlen = 0; + if (FirewallBackend == FW_BACKEND_NONE) + FirewallBackend = firewall_backend_probe(); - assert(add || !previous_remote); - - if (af != AF_INET) + switch (FirewallBackend) { + case FW_BACKEND_NONE: return -EOPNOTSUPP; - - if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP)) - return -EOPNOTSUPP; - - if (local_port <= 0) - return -EINVAL; - - if (remote_port <= 0) - return -EINVAL; - - h = iptc_init("nat"); - if (!h) - return -errno; - - sz = XT_ALIGN(sizeof(struct ipt_entry)) + - XT_ALIGN(sizeof(struct ipt_entry_match)) + - XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + - XT_ALIGN(sizeof(struct ipt_entry_target)) + - XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); - - if (protocol == IPPROTO_TCP) - msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + - XT_ALIGN(sizeof(struct xt_tcp)); - else - msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + - XT_ALIGN(sizeof(struct xt_udp)); - - sz += msz; - - /* Fill in basic part */ - entry = alloca0(sz); - entry->next_offset = sz; - entry->target_offset = - XT_ALIGN(sizeof(struct ipt_entry)) + - XT_ALIGN(sizeof(struct ipt_entry_match)) + - XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + - msz; - r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen); - if (r < 0) - return r; - - /* Fill in first match */ - m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry))); - m->u.match_size = msz; - if (protocol == IPPROTO_TCP) { - struct xt_tcp *tcp; - - strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name)); - tcp = (struct xt_tcp*) m->data; - tcp->dpts[0] = tcp->dpts[1] = local_port; - tcp->spts[0] = 0; - tcp->spts[1] = 0xFFFF; - - } else { - struct xt_udp *udp; - - strncpy(m->u.user.name, "udp", sizeof(m->u.user.name)); - udp = (struct xt_udp*) m->data; - udp->dpts[0] = udp->dpts[1] = local_port; - udp->spts[0] = 0; - udp->spts[1] = 0xFFFF; +#if HAVE_LIBIPTC + case FW_BACKEND_IPTABLES: + return fw_iptables_add_local_dnat(add, af, protocol, local_port, remote, remote_port, previous_remote); +#endif } - /* Fill in second match */ - m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz); - m->u.match_size = - XT_ALIGN(sizeof(struct ipt_entry_match)) + - XT_ALIGN(sizeof(struct xt_addrtype_info_v1)); - strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name)); - m->u.user.revision = 1; - at = (struct xt_addrtype_info_v1*) m->data; - at->dest = XT_ADDRTYPE_LOCAL; - - /* Fill in target part */ - t = ipt_get_target(entry); - t->u.target_size = - XT_ALIGN(sizeof(struct ipt_entry_target)) + - XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); - strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name)); - mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; - mr->rangesize = 1; - mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS; - mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; - if (protocol == IPPROTO_TCP) - mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port); - else - mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port); - - mask = alloca0(sz); - memset(mask, 0xFF, sz); - - if (add) { - /* Add the PREROUTING rule, if it is missing so far */ - if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -EINVAL; - - if (!iptc_insert_entry(chain_pre, entry, 0, h)) - return -errno; - } - - /* If a previous remote is set, remove its entry */ - if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { - mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; - - if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -errno; - } - - mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; - } - - /* Add the OUTPUT rule, if it is missing so far */ - if (!in_interface) { - - /* Don't apply onto loopback addresses */ - if (!destination) { - entry->ip.dst.s_addr = htobe32(0x7F000000); - entry->ip.dmsk.s_addr = htobe32(0xFF000000); - entry->ip.invflags = IPT_INV_DSTIP; - } - - if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -errno; - - if (!iptc_insert_entry(chain_output, entry, 0, h)) - return -errno; - } - - /* If a previous remote is set, remove its entry */ - if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { - mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; - - if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -errno; - } - } - } - } else { - if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -errno; - } - - if (!in_interface) { - if (!destination) { - entry->ip.dst.s_addr = htobe32(0x7F000000); - entry->ip.dmsk.s_addr = htobe32(0xFF000000); - entry->ip.invflags = IPT_INV_DSTIP; - } - - if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { - if (errno != ENOENT) - return -errno; - } - } - } - - if (!iptc_commit(h)) - return -errno; - - return 0; + return -EOPNOTSUPP; } diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h index f7191ba006..bb6dc5a0f0 100644 --- a/src/shared/firewall-util.h +++ b/src/shared/firewall-util.h @@ -6,8 +6,6 @@ #include "in-addr-util.h" -#if HAVE_LIBIPTC - int fw_add_masquerade( bool add, int af, @@ -22,26 +20,3 @@ int fw_add_local_dnat( const union in_addr_union *remote, uint16_t remote_port, const union in_addr_union *previous_remote); - -#else - -static inline int fw_add_masquerade( - bool add, - int af, - const union in_addr_union *source, - unsigned source_prefixlen) { - return -EOPNOTSUPP; -} - -static inline int fw_add_local_dnat( - bool add, - int af, - int protocol, - uint16_t local_port, - const union in_addr_union *remote, - uint16_t remote_port, - const union in_addr_union *previous_remote) { - return -EOPNOTSUPP; -} - -#endif diff --git a/src/shared/meson.build b/src/shared/meson.build index cc9501f5b2..de916e0a4c 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -101,7 +101,9 @@ shared_sources = files(''' fdset.h fileio-label.c fileio-label.h + firewall-util.c firewall-util.h + firewall-util-private.h format-table.c format-table.h fsck-util.h @@ -293,7 +295,7 @@ if conf.get('HAVE_SECCOMP') == 1 endif if conf.get('HAVE_LIBIPTC') == 1 - shared_sources += files('firewall-util.c') + shared_sources += files('firewall-util-iptables.c') endif if conf.get('HAVE_KMOD') == 1 From f51343d0af559ed0d00f4dc82b9d3d9e784eea23 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Oct 2020 20:29:09 +0200 Subject: [PATCH 03/11] nspawn: pass userdata pointer, not inet_addr union Next patch will need to pass two pointers to the callback instead of just the addr mask. Caller will pass a compound structure, so make this 'void *userdata' to de-clutter the next patch. --- src/nspawn/nspawn-expose-ports.c | 6 +++--- src/nspawn/nspawn-expose-ports.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c index d1e29d7b82..db076c50c0 100644 --- a/src/nspawn/nspawn-expose-ports.c +++ b/src/nspawn/nspawn-expose-ports.c @@ -188,7 +188,7 @@ int expose_port_watch_rtnl( sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, - union in_addr_union *exposed, + void *userdata, sd_netlink **ret) { _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; int fd, r; @@ -207,11 +207,11 @@ int expose_port_watch_rtnl( return log_error_errno(r, "Failed to create rtnl object: %m"); } - r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, exposed, "nspawn-NEWADDR"); + r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, userdata, "nspawn-NEWADDR"); if (r < 0) return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m"); - r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, exposed, "nspawn-DELADDR"); + r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, userdata, "nspawn-DELADDR"); if (r < 0) return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m"); diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h index cc834a4197..d0c1cecbe8 100644 --- a/src/nspawn/nspawn-expose-ports.h +++ b/src/nspawn/nspawn-expose-ports.h @@ -19,7 +19,7 @@ typedef struct ExposePort { void expose_port_free_all(ExposePort *p); int expose_port_parse(ExposePort **l, const char *s); -int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret); +int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, void *userdata, sd_netlink **ret); int expose_port_send_rtnl(int send_fd); int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed); From 761cf19d7bc4b5950caff33965508d9fb7bbb547 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Sep 2020 19:58:44 +0200 Subject: [PATCH 04/11] firewall-util: introduce context structure for planned nft backend we have three choices: - open/close a new nfnetlink socket for every operation - keep a nfnetlink socket open internally - expose a opaque fw_ctx and stash all internal data here. Originally I opted for the 2nd option, but during review it was suggested to avoid static storage duration because of perceived problems with threaded applications. This adds fw_ctx and new/free functions, then converts the existing api and nspawn and networkd to use it. --- src/network/networkd-address.c | 2 +- src/network/networkd-manager.c | 3 ++ src/network/networkd-manager.h | 3 ++ src/network/networkd.c | 5 +++ src/nspawn/nspawn-expose-ports.c | 12 ++++--- src/nspawn/nspawn-expose-ports.h | 6 ++-- src/nspawn/nspawn.c | 34 ++++++++++++------ src/shared/firewall-util-private.h | 11 ++++++ src/shared/firewall-util.c | 55 ++++++++++++++++++++++-------- src/shared/firewall-util.h | 9 +++++ src/test/test-firewall-util.c | 25 ++++++++------ 11 files changed, 122 insertions(+), 43 deletions(-) diff --git a/src/network/networkd-address.c b/src/network/networkd-address.c index 3ec47e30a3..4137b29945 100644 --- a/src/network/networkd-address.c +++ b/src/network/networkd-address.c @@ -282,7 +282,7 @@ static int address_set_masquerade(Address *address, bool add) { if (r < 0) return r; - r = fw_add_masquerade(add, AF_INET, &masked, address->prefixlen); + r = fw_add_masquerade(&address->link->manager->fw_ctx, add, AF_INET, &masked, address->prefixlen); if (r < 0) return r; diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c index 8af17b1194..4894d235b0 100644 --- a/src/network/networkd-manager.c +++ b/src/network/networkd-manager.c @@ -22,6 +22,7 @@ #include "dns-domain.h" #include "fd-util.h" #include "fileio.h" +#include "firewall-util.h" #include "local-addresses.h" #include "netlink-util.h" #include "network-internal.h" @@ -912,6 +913,8 @@ void manager_free(Manager *m) { safe_close(m->ethtool_fd); + m->fw_ctx = fw_ctx_free(m->fw_ctx); + free(m); } diff --git a/src/network/networkd-manager.h b/src/network/networkd-manager.h index b67116be59..25fb080dc9 100644 --- a/src/network/networkd-manager.h +++ b/src/network/networkd-manager.h @@ -9,6 +9,7 @@ #include "sd-resolve.h" #include "dhcp-identifier.h" +#include "firewall-util.h" #include "hashmap.h" #include "networkd-link.h" #include "networkd-network.h" @@ -74,6 +75,8 @@ struct Manager { bool dhcp4_prefix_root_cannot_set_table:1; bool bridge_mdb_on_master_not_supported:1; + + FirewallContext *fw_ctx; }; int manager_new(Manager **ret); diff --git a/src/network/networkd.c b/src/network/networkd.c index b448d9b011..ac2bc90d97 100644 --- a/src/network/networkd.c +++ b/src/network/networkd.c @@ -9,6 +9,7 @@ #include "capability-util.h" #include "daemon-util.h" +#include "firewall-util.h" #include "main-func.h" #include "mkdir.h" #include "networkd-conf.h" @@ -92,6 +93,10 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; + r = fw_ctx_new(&m->fw_ctx); + if (r < 0) + log_warning_errno(r, "Could not initialize firewall, IPMasquerade= option not available: %m"); + r = manager_start(m); if (r < 0) return log_error_errno(r, "Could not start manager: %m"); diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c index db076c50c0..c368b20563 100644 --- a/src/nspawn/nspawn-expose-ports.c +++ b/src/nspawn/nspawn-expose-ports.c @@ -82,7 +82,7 @@ void expose_port_free_all(ExposePort *p) { } } -int expose_port_flush(ExposePort* l, union in_addr_union *exposed) { +int expose_port_flush(FirewallContext **fw_ctx, ExposePort* l, union in_addr_union *exposed) { ExposePort *p; int r, af = AF_INET; @@ -97,7 +97,8 @@ int expose_port_flush(ExposePort* l, union in_addr_union *exposed) { log_debug("Lost IP address."); LIST_FOREACH(ports, p, l) { - r = fw_add_local_dnat(false, + r = fw_add_local_dnat(fw_ctx, + false, af, p->protocol, p->host_port, @@ -112,7 +113,7 @@ int expose_port_flush(ExposePort* l, union in_addr_union *exposed) { return 0; } -int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed) { +int expose_port_execute(sd_netlink *rtnl, FirewallContext **fw_ctx, ExposePort *l, union in_addr_union *exposed) { _cleanup_free_ struct local_address *addresses = NULL; union in_addr_union new_exposed; ExposePort *p; @@ -136,7 +137,7 @@ int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *ex addresses[0].scope < RT_SCOPE_LINK; if (!add) - return expose_port_flush(l, exposed); + return expose_port_flush(fw_ctx, l, exposed); new_exposed = addresses[0].address; if (in_addr_equal(af, exposed, &new_exposed)) @@ -150,7 +151,8 @@ int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *ex LIST_FOREACH(ports, p, l) { - r = fw_add_local_dnat(true, + r = fw_add_local_dnat(fw_ctx, + true, af, p->protocol, p->host_port, diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h index d0c1cecbe8..c1677cb61b 100644 --- a/src/nspawn/nspawn-expose-ports.h +++ b/src/nspawn/nspawn-expose-ports.h @@ -3,6 +3,8 @@ #include +#include "firewall-util.h" + #include "sd-event.h" #include "sd-netlink.h" @@ -22,5 +24,5 @@ int expose_port_parse(ExposePort **l, const char *s); int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, void *userdata, sd_netlink **ret); int expose_port_send_rtnl(int send_fd); -int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed); -int expose_port_flush(ExposePort* l, union in_addr_union *exposed); +int expose_port_execute(sd_netlink *rtnl, FirewallContext **fw_ctx, ExposePort *l, union in_addr_union *exposed); +int expose_port_flush(FirewallContext **fw_ctx, ExposePort* l, union in_addr_union *exposed); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index cfbc8f11bf..a6f64e8415 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2474,14 +2474,19 @@ static int setup_kmsg(int kmsg_socket) { return 0; } +struct ExposeArgs { + union in_addr_union address; + struct FirewallContext *fw_ctx; +}; + static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { - union in_addr_union *exposed = userdata; + struct ExposeArgs *args = userdata; assert(rtnl); assert(m); - assert(exposed); + assert(args); - expose_port_execute(rtnl, arg_expose_ports, exposed); + expose_port_execute(rtnl, &args->fw_ctx, arg_expose_ports, &args->address); return 0; } @@ -4466,7 +4471,7 @@ static int run_container( bool secondary, FDSet *fds, char veth_name[IFNAMSIZ], bool *veth_created, - union in_addr_union *exposed, + struct ExposeArgs *expose_args, int *master, pid_t *pid, int *ret) { static const struct sigaction sa = { @@ -4895,11 +4900,11 @@ static int run_container( (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid)); if (arg_expose_ports) { - r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl); + r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, expose_args, &rtnl); if (r < 0) return r; - (void) expose_port_execute(rtnl, arg_expose_ports, exposed); + (void) expose_port_execute(rtnl, &expose_args->fw_ctx, arg_expose_ports, &expose_args->address); } rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]); @@ -5026,7 +5031,7 @@ static int run_container( return 0; /* finito */ } - expose_port_flush(arg_expose_ports, exposed); + expose_port_flush(&expose_args->fw_ctx, arg_expose_ports, &expose_args->address); (void) remove_veth_links(veth_name, arg_network_veth_extra); *veth_created = false; @@ -5155,12 +5160,13 @@ static int run(int argc, char *argv[]) { _cleanup_fdset_free_ FDSet *fds = NULL; int r, n_fd_passed, ret = EXIT_SUCCESS; char veth_name[IFNAMSIZ] = ""; - union in_addr_union exposed = {}; + struct ExposeArgs expose_args = {}; _cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT; char tmprootdir[] = "/tmp/nspawn-root-XXXXXX"; _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL; _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; + _cleanup_(fw_ctx_freep) FirewallContext *fw_ctx = NULL; pid_t pid = 0; log_parse_environment(); @@ -5517,12 +5523,20 @@ static int run(int argc, char *argv[]) { goto finish; } + if (arg_expose_ports) { + r = fw_ctx_new(&fw_ctx); + if (r < 0) { + log_error_errno(r, "Cannot expose configured ports, firewall initialization failed: %m"); + goto finish; + } + expose_args.fw_ctx = fw_ctx; + } for (;;) { r = run_container(dissected_image, secondary, fds, veth_name, &veth_created, - &exposed, &master, + &expose_args, &master, &pid, &ret); if (r <= 0) break; @@ -5572,7 +5586,7 @@ finish: (void) rm_rf(p, REMOVE_ROOT); } - expose_port_flush(arg_expose_ports, &exposed); + expose_port_flush(&fw_ctx, arg_expose_ports, &expose_args.address); if (veth_created) (void) remove_veth_links(veth_name, arg_network_veth_extra); diff --git a/src/shared/firewall-util-private.h b/src/shared/firewall-util-private.h index d7cb19353d..7f9efbc513 100644 --- a/src/shared/firewall-util-private.h +++ b/src/shared/firewall-util-private.h @@ -6,6 +6,17 @@ #include "in-addr-util.h" +enum FirewallBackend { + FW_BACKEND_NONE, +#if HAVE_LIBIPTC + FW_BACKEND_IPTABLES, +#endif +}; + +struct FirewallContext { + enum FirewallBackend firewall_backend; +}; + #if HAVE_LIBIPTC int fw_iptables_add_masquerade( diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index 107056514e..edfe5787b1 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -8,15 +8,6 @@ #include "firewall-util.h" #include "firewall-util-private.h" -enum FirewallBackend { - FW_BACKEND_NONE, -#if HAVE_LIBIPTC - FW_BACKEND_IPTABLES, -#endif -}; - -static enum FirewallBackend FirewallBackend; - static enum FirewallBackend firewall_backend_probe(void) { #if HAVE_LIBIPTC return FW_BACKEND_IPTABLES; @@ -25,16 +16,41 @@ static enum FirewallBackend firewall_backend_probe(void) { #endif } +int fw_ctx_new(FirewallContext **ret) { + _cleanup_free_ FirewallContext *ctx = NULL; + + ctx = new0(FirewallContext, 1); + if (!ctx) + return -ENOMEM; + + *ret = TAKE_PTR(ctx); + return 0; +} + +FirewallContext *fw_ctx_free(FirewallContext *ctx) { + return mfree(ctx); +} + int fw_add_masquerade( + FirewallContext **fw_ctx, bool add, int af, const union in_addr_union *source, unsigned source_prefixlen) { + FirewallContext *ctx; + int r; - if (FirewallBackend == FW_BACKEND_NONE) - FirewallBackend = firewall_backend_probe(); + if (!*fw_ctx) { + r = fw_ctx_new(fw_ctx); + if (r < 0) + return r; + } - switch (FirewallBackend) { + ctx = *fw_ctx; + if (ctx->firewall_backend == FW_BACKEND_NONE) + ctx->firewall_backend = firewall_backend_probe(); + + switch (ctx->firewall_backend) { case FW_BACKEND_NONE: return -EOPNOTSUPP; #if HAVE_LIBIPTC @@ -47,6 +63,7 @@ int fw_add_masquerade( } int fw_add_local_dnat( + FirewallContext **fw_ctx, bool add, int af, int protocol, @@ -54,11 +71,19 @@ int fw_add_local_dnat( const union in_addr_union *remote, uint16_t remote_port, const union in_addr_union *previous_remote) { + FirewallContext *ctx; - if (FirewallBackend == FW_BACKEND_NONE) - FirewallBackend = firewall_backend_probe(); + if (!*fw_ctx) { + int ret = fw_ctx_new(fw_ctx); + if (ret < 0) + return ret; + } - switch (FirewallBackend) { + ctx = *fw_ctx; + if (ctx->firewall_backend == FW_BACKEND_NONE) + ctx->firewall_backend = firewall_backend_probe(); + + switch (ctx->firewall_backend) { case FW_BACKEND_NONE: return -EOPNOTSUPP; #if HAVE_LIBIPTC diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h index bb6dc5a0f0..5180b429d3 100644 --- a/src/shared/firewall-util.h +++ b/src/shared/firewall-util.h @@ -6,13 +6,22 @@ #include "in-addr-util.h" +typedef struct FirewallContext FirewallContext; + +int fw_ctx_new(FirewallContext **ret); +FirewallContext *fw_ctx_free(FirewallContext *fw_ctx); + +DEFINE_TRIVIAL_CLEANUP_FUNC(FirewallContext *, fw_ctx_free); + int fw_add_masquerade( + FirewallContext **fw_ctx, bool add, int af, const union in_addr_union *source, unsigned source_prefixlen); int fw_add_local_dnat( + FirewallContext **fw_ctx, bool add, int af, int protocol, diff --git a/src/test/test-firewall-util.c b/src/test/test-firewall-util.c index f223c0a4d9..14678c048d 100644 --- a/src/test/test-firewall-util.c +++ b/src/test/test-firewall-util.c @@ -7,48 +7,53 @@ #define MAKE_IN_ADDR_UNION(a,b,c,d) (union in_addr_union) { .in.s_addr = htobe32((uint32_t) (a) << 24 | (uint32_t) (b) << 16 | (uint32_t) (c) << 8 | (uint32_t) (d))} int main(int argc, char *argv[]) { + _cleanup_(fw_ctx_freep) FirewallContext *ctx; int r; test_setup_logging(LOG_DEBUG); uint8_t prefixlen = 32; - r = fw_add_masquerade(true, AF_INET, NULL, 0); + r = fw_ctx_new(&ctx); + if (r < 0) + return log_error_errno(r, "Failed to init firewall: %m"); + + r = fw_add_masquerade(&ctx, true, AF_INET, NULL, 0); if (r == 0) log_error("Expected failure: NULL source"); - r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,0), 0); + r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,0), 0); if (r == 0) log_error("Expected failure: 0 prefixlen"); - r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), prefixlen); + r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), prefixlen); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); prefixlen = 28; - r = fw_add_masquerade(true, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); + r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_masquerade(false, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); + r = fw_add_masquerade(&ctx, false, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_masquerade(false, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), 32); + r = fw_add_masquerade(&ctx, false, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), 32); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL); + r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL); + r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4)); + r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4)); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); - r = fw_add_local_dnat(false, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL); + r = fw_add_local_dnat(&ctx, false, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL); if (r < 0) log_error_errno(r, "Failed to modify firewall: %m"); From bcd1a2bb5c5e34f68261e74f2b72b07b4688f462 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 14 Mar 2020 15:21:19 +0100 Subject: [PATCH 05/11] linux: import nf_tables and nfnetlink headers from Linux 5.8 Will be used/needed in the upcoming nfnetlink/nftables support. This follows existing model where kernel uapi headers are cached locally. --- src/basic/linux/netfilter/nf_tables.h | 1869 +++++++++++++++++++++++++ src/basic/linux/netfilter/nfnetlink.h | 81 ++ 2 files changed, 1950 insertions(+) create mode 100644 src/basic/linux/netfilter/nf_tables.h create mode 100644 src/basic/linux/netfilter/nfnetlink.h diff --git a/src/basic/linux/netfilter/nf_tables.h b/src/basic/linux/netfilter/nf_tables.h new file mode 100644 index 0000000000..4565456c0e --- /dev/null +++ b/src/basic/linux/netfilter/nf_tables.h @@ -0,0 +1,1869 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_NAME_MAXLEN 256 +#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_USERDATA_MAXLEN 256 +#define NFT_OSF_MAXGENRELEN 16 + +/** + * enum nft_registers - nf_tables registers + * + * nf_tables used to have five registers: a verdict register and four data + * registers of size 16. The data registers have been changed to 16 registers + * of size 4. For compatibility reasons, the NFT_REG_[1-4] registers still + * map to areas of size 16, the 4 byte registers are addressed using + * NFT_REG32_00 - NFT_REG32_15. + */ +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX, + + NFT_REG32_00 = 8, + NFT_REG32_01, + NFT_REG32_02, + NFT_REG32_03, + NFT_REG32_04, + NFT_REG32_05, + NFT_REG32_06, + NFT_REG32_07, + NFT_REG32_08, + NFT_REG32_09, + NFT_REG32_10, + NFT_REG32_11, + NFT_REG32_12, + NFT_REG32_13, + NFT_REG32_14, + NFT_REG32_15, +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +#define NFT_REG_SIZE 16 +#define NFT_REG32_SIZE 4 +#define NFT_REG32_COUNT (NFT_REG32_15 - NFT_REG32_00 + 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) + * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) + * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, + NFT_MSG_NEWGEN, + NFT_MSG_GETGEN, + NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, + NFT_MSG_NEWFLOWTABLE, + NFT_MSG_GETFLOWTABLE, + NFT_MSG_DELFLOWTABLE, + NFT_MSG_MAX, +}; + +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_HOOK_DEV: netdevice name (NLA_STRING) + * @NFTA_HOOK_DEVS: list of netdevices (NLA_NESTED) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + NFTA_HOOK_DEV, + NFTA_HOOK_DEVS, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, +}; + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) + * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, + NFTA_TABLE_USE, + NFTA_TABLE_HANDLE, + NFTA_TABLE_PAD, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) + * @NFTA_CHAIN_FLAGS: chain flags + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, + NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, + NFTA_CHAIN_PAD, + NFTA_CHAIN_FLAGS, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) + * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) + * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) + * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, + NFTA_RULE_USERDATA, + NFTA_RULE_PAD, + NFTA_RULE_ID, + NFTA_RULE_POSITION_ID, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numeric value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + * @NFT_SET_TIMEOUT: set uses timeouts + * @NFT_SET_EVAL: set can be updated from the evaluation path + * @NFT_SET_OBJECT: set contains stateful objects + * @NFT_SET_CONCAT: set contains a concatenation + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, + NFT_SET_TIMEOUT = 0x10, + NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, + NFT_SET_CONCAT = 0x80, +}; + +/** + * enum nft_set_policies - set selection policy + * + * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use + * @NFT_SET_POL_MEMORY: prefer low memory use over high performance + */ +enum nft_set_policies { + NFT_SET_POL_PERFORMANCE, + NFT_SET_POL_MEMORY, +}; + +/** + * enum nft_set_desc_attributes - set element description + * + * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32) + * @NFTA_SET_DESC_CONCAT: description of field concatenation (NLA_NESTED) + */ +enum nft_set_desc_attributes { + NFTA_SET_DESC_UNSPEC, + NFTA_SET_DESC_SIZE, + NFTA_SET_DESC_CONCAT, + __NFTA_SET_DESC_MAX +}; +#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1) + +/** + * enum nft_set_field_attributes - attributes of concatenated fields + * + * @NFTA_SET_FIELD_LEN: length of single field, in bits (NLA_U32) + */ +enum nft_set_field_attributes { + NFTA_SET_FIELD_UNSPEC, + NFTA_SET_FIELD_LEN, + __NFTA_SET_FIELD_MAX +}; +#define NFTA_SET_FIELD_MAX (__NFTA_SET_FIELD_MAX - 1) + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + * @NFTA_SET_POLICY: selection policy (NLA_U32) + * @NFTA_SET_DESC: set description (NLA_NESTED) + * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) + * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) + * @NFTA_SET_HANDLE: set handle (NLA_U64) + * @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + NFTA_SET_POLICY, + NFTA_SET_DESC, + NFTA_SET_ID, + NFTA_SET_TIMEOUT, + NFTA_SET_GC_INTERVAL, + NFTA_SET_USERDATA, + NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, + NFTA_SET_HANDLE, + NFTA_SET_EXPR, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) + * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) + * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) + * @NFTA_SET_ELEM_KEY_END: closing key value (NLA_NESTED: nft_data) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + NFTA_SET_ELEM_TIMEOUT, + NFTA_SET_ELEM_EXPIRATION, + NFTA_SET_ELEM_USERDATA, + NFTA_SET_ELEM_EXPR, + NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, + NFTA_SET_ELEM_KEY_END, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + NFTA_SET_ELEM_LIST_SET_ID, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/* Maximum length of a value */ +#define NFT_DATA_VALUE_MAXLEN 64 + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_ops - nf_tables bitwise operations + * + * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and + * XOR boolean operations + * @NFT_BITWISE_LSHIFT: left-shift operation + * @NFT_BITWISE_RSHIFT: right-shift operation + */ +enum nft_bitwise_ops { + NFT_BITWISE_BOOL, + NFT_BITWISE_LSHIFT, + NFT_BITWISE_RSHIFT, +}; + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops) + * @NFTA_BITWISE_DATA: argument for non-boolean operations + * (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression supports boolean and shift operations. It implements + * the boolean operations by performing the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * with these mask and xor values: + * + * mask xor + * NOT: 1 1 + * OR: ~x x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + NFTA_BITWISE_OP, + NFTA_BITWISE_DATA, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network operator + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + +enum nft_lookup_flags { + NFT_LOOKUP_F_INV = (1 << 0), +}; + +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + NFTA_LOOKUP_SET_ID, + NFTA_LOOKUP_FLAGS, + __NFTA_LOOKUP_MAX +}; +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) + +enum nft_dynset_ops { + NFT_DYNSET_OP_ADD, + NFT_DYNSET_OP_UPDATE, + NFT_DYNSET_OP_DELETE, +}; + +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), +}; + +/** + * enum nft_dynset_attributes - dynset expression attributes + * + * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING) + * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32) + * @NFTA_DYNSET_OP: operation (NLA_U32) + * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) + * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) + * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) + * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) + */ +enum nft_dynset_attributes { + NFTA_DYNSET_UNSPEC, + NFTA_DYNSET_SET_NAME, + NFTA_DYNSET_SET_ID, + NFTA_DYNSET_OP, + NFTA_DYNSET_SREG_KEY, + NFTA_DYNSET_SREG_DATA, + NFTA_DYNSET_TIMEOUT, + NFTA_DYNSET_EXPR, + NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, + __NFTA_DYNSET_MAX, +}; +#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_csum_types - nf_tables payload expression checksum types + * + * @NFT_PAYLOAD_CSUM_NONE: no checksumming + * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + */ +enum nft_payload_csum_types { + NFT_PAYLOAD_CSUM_NONE, + NFT_PAYLOAD_CSUM_INET, +}; + +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) + * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + NFTA_PAYLOAD_SREG, + NFTA_PAYLOAD_CSUM_TYPE, + NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +enum nft_exthdr_flags { + NFT_EXTHDR_F_PRESENT = (1 << 0), +}; + +/** + * enum nft_exthdr_op - nf_tables match options + * + * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers + * @NFT_EXTHDR_OP_TCP: match against tcp options + * @NFT_EXTHDR_OP_IPV4: match against ipv4 options + */ +enum nft_exthdr_op { + NFT_EXTHDR_OP_IPV6, + NFT_EXTHDR_OP_TCPOPT, + NFT_EXTHDR_OP_IPV4, + __NFT_EXTHDR_OP_MAX +}; +#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) + * @NFTA_EXTHDR_OP: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: option match type (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + NFTA_EXTHDR_FLAGS, + NFTA_EXTHDR_OP, + NFTA_EXTHDR_SREG, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + * @NFT_META_NFPROTO: netfilter protocol + * @NFT_META_L4PROTO: layer 4 protocol number + * @NFT_META_BRI_IIFNAME: packet input bridge interface name + * @NFT_META_BRI_OIFNAME: packet output bridge interface name + * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback + * @NFT_META_CPU: cpu id through smp_processor_id() + * @NFT_META_IIFGROUP: packet input interface group + * @NFT_META_OIFGROUP: packet output interface group + * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) + * @NFT_META_PRANDOM: a 32bit pseudo-random number + * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) + * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_BRI_IIFPVID: packet input bridge port pvid + * @NFT_META_BRI_IIFVPROTO: packet input bridge vlan proto + * @NFT_META_TIME_NS: time since epoch (in nanoseconds) + * @NFT_META_TIME_DAY: day of week (from 0 = Sunday to 6 = Saturday) + * @NFT_META_TIME_HOUR: hour of day (in seconds) + * @NFT_META_SDIF: slave device interface index + * @NFT_META_SDIFNAME: slave device interface name + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, + NFT_META_NFPROTO, + NFT_META_L4PROTO, + NFT_META_BRI_IIFNAME, + NFT_META_BRI_OIFNAME, + NFT_META_PKTTYPE, + NFT_META_CPU, + NFT_META_IIFGROUP, + NFT_META_OIFGROUP, + NFT_META_CGROUP, + NFT_META_PRANDOM, + NFT_META_SECPATH, + NFT_META_IIFKIND, + NFT_META_OIFKIND, + NFT_META_BRI_IIFPVID, + NFT_META_BRI_IIFVPROTO, + NFT_META_TIME_NS, + NFT_META_TIME_DAY, + NFT_META_TIME_HOUR, + NFT_META_SDIF, + NFT_META_SDIFNAME, +}; + +/** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + * @NFT_RT_TCPMSS: fetch current path tcp mss + * @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, + NFT_RT_TCPMSS, + NFT_RT_XFRM, + __NFT_RT_MAX +}; +#define NFT_RT_MAX (__NFT_RT_MAX - 1) + +/** + * enum nft_hash_types - nf_tables hash expression types + * + * @NFT_HASH_JENKINS: Jenkins Hash + * @NFT_HASH_SYM: Symmetric Hash + */ +enum nft_hash_types { + NFT_HASH_JENKINS, + NFT_HASH_SYM, +}; + +/** + * enum nft_hash_attributes - nf_tables hash expression netlink attributes + * + * @NFTA_HASH_SREG: source register (NLA_U32) + * @NFTA_HASH_DREG: destination register (NLA_U32) + * @NFTA_HASH_LEN: source data length (NLA_U32) + * @NFTA_HASH_MODULUS: modulus value (NLA_U32) + * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) + * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types) + * @NFTA_HASH_SET_NAME: name of the map to lookup (NLA_STRING) + * @NFTA_HASH_SET_ID: id of the map (NLA_U32) + */ +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_LEN, + NFTA_HASH_MODULUS, + NFTA_HASH_SEED, + NFTA_HASH_OFFSET, + NFTA_HASH_TYPE, + NFTA_HASH_SET_NAME, /* deprecated */ + NFTA_HASH_SET_ID, /* deprecated */ + __NFTA_HASH_MAX, +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + * @NFTA_META_SREG: source register (NLA_U32) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + NFTA_META_SREG, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + +/** + * enum nft_socket_attributes - nf_tables socket expression netlink attributes + * + * @NFTA_SOCKET_KEY: socket key to match + * @NFTA_SOCKET_DREG: destination register + */ +enum nft_socket_attributes { + NFTA_SOCKET_UNSPEC, + NFTA_SOCKET_KEY, + NFTA_SOCKET_DREG, + __NFTA_SOCKET_MAX +}; +#define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1) + +/* + * enum nft_socket_keys - nf_tables socket expression keys + * + * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option + * @NFT_SOCKET_MARK: Value of the socket mark + */ +enum nft_socket_keys { + NFT_SOCKET_TRANSPARENT, + NFT_SOCKET_MARK, + __NFT_SOCKET_MAX +}; +#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + * @NFT_CT_LABELS: conntrack labels + * @NFT_CT_PKTS: conntrack packets + * @NFT_CT_BYTES: conntrack bytes + * @NFT_CT_AVGPKT: conntrack average bytes per packet + * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack + * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address) + * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address) + * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) + * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) + * @NFT_CT_ID: conntrack id + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, + NFT_CT_LABELS, + NFT_CT_PKTS, + NFT_CT_BYTES, + NFT_CT_AVGPKT, + NFT_CT_ZONE, + NFT_CT_EVENTMASK, + NFT_CT_SRC_IP, + NFT_CT_DST_IP, + NFT_CT_SRC_IP6, + NFT_CT_DST_IP6, + NFT_CT_ID, + __NFT_CT_MAX +}; +#define NFT_CT_MAX (__NFT_CT_MAX - 1) + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + * @NFTA_CT_SREG: source register (NLA_U32) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + NFTA_CT_SREG, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_flow_attributes - ct offload expression attributes + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) + */ +enum nft_offload_attributes { + NFTA_FLOW_UNSPEC, + NFTA_FLOW_TABLE_NAME, + __NFTA_FLOW_MAX, +}; +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) + +enum nft_limit_type { + NFT_LIMIT_PKTS, + NFT_LIMIT_PKT_BYTES +}; + +enum nft_limit_flags { + NFT_LIMIT_F_INV = (1 << 0), +}; + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + * @NFTA_LIMIT_BURST: burst (NLA_U32) + * @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type) + * @NFTA_LIMIT_FLAGS: flags (NLA_U32: enum nft_limit_flags) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + NFTA_LIMIT_BURST, + NFTA_LIMIT_TYPE, + NFTA_LIMIT_FLAGS, + NFTA_LIMIT_PAD, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +enum nft_connlimit_flags { + NFT_CONNLIMIT_F_INV = (1 << 0), +}; + +/** + * enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes + * + * @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32) + * @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags) + */ +enum nft_connlimit_attributes { + NFTA_CONNLIMIT_UNSPEC, + NFTA_CONNLIMIT_COUNT, + NFTA_CONNLIMIT_FLAGS, + __NFTA_CONNLIMIT_MAX +}; +#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + NFTA_COUNTER_PAD, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + * @NFTA_LOG_LEVEL: log level (NLA_U32) + * @NFTA_LOG_FLAGS: logging flags (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + NFTA_LOG_LEVEL, + NFTA_LOG_FLAGS, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_log_level - nf_tables log levels + * + * @NFT_LOGLEVEL_EMERG: system is unusable + * @NFT_LOGLEVEL_ALERT: action must be taken immediately + * @NFT_LOGLEVEL_CRIT: critical conditions + * @NFT_LOGLEVEL_ERR: error conditions + * @NFT_LOGLEVEL_WARNING: warning conditions + * @NFT_LOGLEVEL_NOTICE: normal but significant condition + * @NFT_LOGLEVEL_INFO: informational + * @NFT_LOGLEVEL_DEBUG: debug-level messages + * @NFT_LOGLEVEL_AUDIT: enabling audit logging + */ +enum nft_log_level { + NFT_LOGLEVEL_EMERG, + NFT_LOGLEVEL_ALERT, + NFT_LOGLEVEL_CRIT, + NFT_LOGLEVEL_ERR, + NFT_LOGLEVEL_WARNING, + NFT_LOGLEVEL_NOTICE, + NFT_LOGLEVEL_INFO, + NFT_LOGLEVEL_DEBUG, + NFT_LOGLEVEL_AUDIT, + __NFT_LOGLEVEL_MAX +}; +#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX - 1) + +/** + * enum nft_queue_attributes - nf_tables queue expression netlink attributes + * + * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) + * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) + * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) + */ +enum nft_queue_attributes { + NFTA_QUEUE_UNSPEC, + NFTA_QUEUE_NUM, + NFTA_QUEUE_TOTAL, + NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, + __NFTA_QUEUE_MAX +}; +#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) + +#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFT_QUEUE_FLAG_MASK 0x03 + +enum nft_quota_flags { + NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), +}; + +/** + * enum nft_quota_attributes - nf_tables quota expression netlink attributes + * + * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) + * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64) + */ +enum nft_quota_attributes { + NFTA_QUOTA_UNSPEC, + NFTA_QUOTA_BYTES, + NFTA_QUOTA_FLAGS, + NFTA_QUOTA_PAD, + NFTA_QUOTA_CONSUMED, + __NFTA_QUOTA_MAX +}; +#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) + +/** + * enum nft_secmark_attributes - nf_tables secmark object netlink attributes + * + * @NFTA_SECMARK_CTX: security context (NLA_STRING) + */ +enum nft_secmark_attributes { + NFTA_SECMARK_UNSPEC, + NFTA_SECMARK_CTX, + __NFTA_SECMARK_MAX, +}; +#define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) + +/* Max security context length */ +#define NFT_SECMARK_CTX_MAXLEN 256 + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, + NFT_REJECT_ICMPX_UNREACH, +}; + +/** + * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * + * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable + * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable + * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable + * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited + * + * These codes are mapped to real ICMP and ICMPv6 codes. + */ +enum nft_reject_inet_code { + NFT_REJECT_ICMPX_NO_ROUTE = 0, + NFT_REJECT_ICMPX_PORT_UNREACH, + NFT_REJECT_ICMPX_HOST_UNREACH, + NFT_REJECT_ICMPX_ADMIN_PROHIBITED, + __NFT_REJECT_ICMPX_MAX +}; +#define NFT_REJECT_ICMPX_MAX (__NFT_REJECT_ICMPX_MAX - 1) + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, + NFTA_NAT_FLAGS, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +/** + * enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes + * + * NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) + * NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) + * NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) + */ +enum nft_tproxy_attributes { + NFTA_TPROXY_UNSPEC, + NFTA_TPROXY_FAMILY, + NFTA_TPROXY_REG_ADDR, + NFTA_TPROXY_REG_PORT, + __NFTA_TPROXY_MAX +}; +#define NFTA_TPROXY_MAX (__NFTA_TPROXY_MAX - 1) + +/** + * enum nft_masq_attributes - nf_tables masquerade expression attributes + * + * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + * @NFTA_MASQ_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_MASQ_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_masq_attributes { + NFTA_MASQ_UNSPEC, + NFTA_MASQ_FLAGS, + NFTA_MASQ_REG_PROTO_MIN, + NFTA_MASQ_REG_PROTO_MAX, + __NFTA_MASQ_MAX +}; +#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) + +/** + * enum nft_redir_attributes - nf_tables redirect expression netlink attributes + * + * @NFTA_REDIR_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_REDIR_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_REDIR_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_redir_attributes { + NFTA_REDIR_UNSPEC, + NFTA_REDIR_REG_PROTO_MIN, + NFTA_REDIR_REG_PROTO_MAX, + NFTA_REDIR_FLAGS, + __NFTA_REDIR_MAX +}; +#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) + +/** + * enum nft_dup_attributes - nf_tables dup expression netlink attributes + * + * @NFTA_DUP_SREG_ADDR: source register of address (NLA_U32: nft_registers) + * @NFTA_DUP_SREG_DEV: source register of output interface (NLA_U32: nft_register) + */ +enum nft_dup_attributes { + NFTA_DUP_UNSPEC, + NFTA_DUP_SREG_ADDR, + NFTA_DUP_SREG_DEV, + __NFTA_DUP_MAX +}; +#define NFTA_DUP_MAX (__NFTA_DUP_MAX - 1) + +/** + * enum nft_fwd_attributes - nf_tables fwd expression netlink attributes + * + * @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register) + * @NFTA_FWD_SREG_ADDR: source register of destination address (NLA_U32: nft_register) + * @NFTA_FWD_NFPROTO: layer 3 family of source register address (NLA_U32: enum nfproto) + */ +enum nft_fwd_attributes { + NFTA_FWD_UNSPEC, + NFTA_FWD_SREG_DEV, + NFTA_FWD_SREG_ADDR, + NFTA_FWD_NFPROTO, + __NFTA_FWD_MAX +}; +#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) + +/** + * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes + * + * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) + * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING) + * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32) + */ +enum nft_objref_attributes { + NFTA_OBJREF_UNSPEC, + NFTA_OBJREF_IMM_TYPE, + NFTA_OBJREF_IMM_NAME, + NFTA_OBJREF_SET_SREG, + NFTA_OBJREF_SET_NAME, + NFTA_OBJREF_SET_ID, + __NFTA_OBJREF_MAX +}; +#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) + +/** + * enum nft_gen_attributes - nf_tables ruleset generation attributes + * + * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) + */ +enum nft_gen_attributes { + NFTA_GEN_UNSPEC, + NFTA_GEN_ID, + NFTA_GEN_PROC_PID, + NFTA_GEN_PROC_NAME, + __NFTA_GEN_MAX +}; +#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) + +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ + NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */ +}; + +enum nft_ct_helper_attributes { + NFTA_CT_HELPER_UNSPEC, + NFTA_CT_HELPER_NAME, + NFTA_CT_HELPER_L3PROTO, + NFTA_CT_HELPER_L4PROTO, + __NFTA_CT_HELPER_MAX, +}; +#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1) + +enum nft_ct_timeout_timeout_attributes { + NFTA_CT_TIMEOUT_UNSPEC, + NFTA_CT_TIMEOUT_L3PROTO, + NFTA_CT_TIMEOUT_L4PROTO, + NFTA_CT_TIMEOUT_DATA, + __NFTA_CT_TIMEOUT_MAX, +}; +#define NFTA_CT_TIMEOUT_MAX (__NFTA_CT_TIMEOUT_MAX - 1) + +enum nft_ct_expectation_attributes { + NFTA_CT_EXPECT_UNSPEC, + NFTA_CT_EXPECT_L3PROTO, + NFTA_CT_EXPECT_L4PROTO, + NFTA_CT_EXPECT_DPORT, + NFTA_CT_EXPECT_TIMEOUT, + NFTA_CT_EXPECT_SIZE, + __NFTA_CT_EXPECT_MAX, +}; +#define NFTA_CT_EXPECT_MAX (__NFTA_CT_EXPECT_MAX - 1) + +#define NFT_OBJECT_UNSPEC 0 +#define NFT_OBJECT_COUNTER 1 +#define NFT_OBJECT_QUOTA 2 +#define NFT_OBJECT_CT_HELPER 3 +#define NFT_OBJECT_LIMIT 4 +#define NFT_OBJECT_CONNLIMIT 5 +#define NFT_OBJECT_TUNNEL 6 +#define NFT_OBJECT_CT_TIMEOUT 7 +#define NFT_OBJECT_SECMARK 8 +#define NFT_OBJECT_CT_EXPECT 9 +#define NFT_OBJECT_SYNPROXY 10 +#define __NFT_OBJECT_MAX 11 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + * @NFTA_OBJ_HANDLE: object handle (NLA_U64) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + NFTA_OBJ_HANDLE, + NFTA_OBJ_PAD, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + +/** + * enum nft_flowtable_flags - nf_tables flowtable flags + * + * @NFT_FLOWTABLE_HW_OFFLOAD: flowtable hardware offload is enabled + * @NFT_FLOWTABLE_COUNTER: enable flow counters + */ +enum nft_flowtable_flags { + NFT_FLOWTABLE_HW_OFFLOAD = 0x1, + NFT_FLOWTABLE_COUNTER = 0x2, + NFT_FLOWTABLE_MASK = (NFT_FLOWTABLE_HW_OFFLOAD | + NFT_FLOWTABLE_COUNTER) +}; + +/** + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes + * + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) + * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) + */ +enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, + NFTA_FLOWTABLE_TABLE, + NFTA_FLOWTABLE_NAME, + NFTA_FLOWTABLE_HOOK, + NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, + NFTA_FLOWTABLE_FLAGS, + __NFTA_FLOWTABLE_MAX +}; +#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) + +/** + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes + * + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED) + */ +enum nft_flowtable_hook_attributes { + NFTA_FLOWTABLE_HOOK_UNSPEC, + NFTA_FLOWTABLE_HOOK_NUM, + NFTA_FLOWTABLE_HOOK_PRIORITY, + NFTA_FLOWTABLE_HOOK_DEVS, + __NFTA_FLOWTABLE_HOOK_MAX +}; +#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) + +/** + * enum nft_osf_attributes - nftables osf expression netlink attributes + * + * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + * @NFTA_OSF_FLAGS: flags (NLA_U32) + */ +enum nft_osf_attributes { + NFTA_OSF_UNSPEC, + NFTA_OSF_DREG, + NFTA_OSF_TTL, + NFTA_OSF_FLAGS, + __NFTA_OSF_MAX, +}; +#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) + +enum nft_osf_flags { + NFT_OSF_F_VERSION = (1 << 0), +}; + +/** + * enum nft_synproxy_attributes - nf_tables synproxy expression netlink attributes + * + * @NFTA_SYNPROXY_MSS: mss value sent to the backend (NLA_U16) + * @NFTA_SYNPROXY_WSCALE: wscale value sent to the backend (NLA_U8) + * @NFTA_SYNPROXY_FLAGS: flags (NLA_U32) + */ +enum nft_synproxy_attributes { + NFTA_SYNPROXY_UNSPEC, + NFTA_SYNPROXY_MSS, + NFTA_SYNPROXY_WSCALE, + NFTA_SYNPROXY_FLAGS, + __NFTA_SYNPROXY_MAX, +}; +#define NFTA_SYNPROXY_MAX (__NFTA_SYNPROXY_MAX - 1) + +/** + * enum nft_device_attributes - nf_tables device netlink attributes + * + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + */ +enum nft_devices_attributes { + NFTA_DEVICE_UNSPEC, + NFTA_DEVICE_NAME, + __NFTA_DEVICE_MAX +}; +#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) + +/* + * enum nft_xfrm_attributes - nf_tables xfrm expr netlink attributes + * + * @NFTA_XFRM_DREG: destination register (NLA_U32) + * @NFTA_XFRM_KEY: enum nft_xfrm_keys (NLA_U32) + * @NFTA_XFRM_DIR: direction (NLA_U8) + * @NFTA_XFRM_SPNUM: index in secpath array (NLA_U32) + */ +enum nft_xfrm_attributes { + NFTA_XFRM_UNSPEC, + NFTA_XFRM_DREG, + NFTA_XFRM_KEY, + NFTA_XFRM_DIR, + NFTA_XFRM_SPNUM, + __NFTA_XFRM_MAX +}; +#define NFTA_XFRM_MAX (__NFTA_XFRM_MAX - 1) + +enum nft_xfrm_keys { + NFT_XFRM_KEY_UNSPEC, + NFT_XFRM_KEY_DADDR_IP4, + NFT_XFRM_KEY_DADDR_IP6, + NFT_XFRM_KEY_SADDR_IP4, + NFT_XFRM_KEY_SADDR_IP6, + NFT_XFRM_KEY_REQID, + NFT_XFRM_KEY_SPI, + __NFT_XFRM_KEY_MAX, +}; +#define NFT_XFRM_KEY_MAX (__NFT_XFRM_KEY_MAX - 1) + +/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) + * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING) + * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types) + * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts) + * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32) + * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY) + * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY) + * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY) + * @NFTA_TRACE_IIF: indev ifindex (NLA_U32) + * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16) + * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32) + * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16) + * @NFTA_TRACE_MARK: nfmark (NLA_U32) + * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) + * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + */ +enum nft_trace_attributes { + NFTA_TRACE_UNSPEC, + NFTA_TRACE_TABLE, + NFTA_TRACE_CHAIN, + NFTA_TRACE_RULE_HANDLE, + NFTA_TRACE_TYPE, + NFTA_TRACE_VERDICT, + NFTA_TRACE_ID, + NFTA_TRACE_LL_HEADER, + NFTA_TRACE_NETWORK_HEADER, + NFTA_TRACE_TRANSPORT_HEADER, + NFTA_TRACE_IIF, + NFTA_TRACE_IIFTYPE, + NFTA_TRACE_OIF, + NFTA_TRACE_OIFTYPE, + NFTA_TRACE_MARK, + NFTA_TRACE_NFPROTO, + NFTA_TRACE_POLICY, + NFTA_TRACE_PAD, + __NFTA_TRACE_MAX +}; +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) + +enum nft_trace_types { + NFT_TRACETYPE_UNSPEC, + NFT_TRACETYPE_POLICY, + NFT_TRACETYPE_RETURN, + NFT_TRACETYPE_RULE, + __NFT_TRACETYPE_MAX +}; +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) + +/** + * enum nft_ng_attributes - nf_tables number generator expression netlink attributes + * + * @NFTA_NG_DREG: destination register (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) + * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) + * @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING) + * @NFTA_NG_SET_ID: id of the map (NLA_U32) + */ +enum nft_ng_attributes { + NFTA_NG_UNSPEC, + NFTA_NG_DREG, + NFTA_NG_MODULUS, + NFTA_NG_TYPE, + NFTA_NG_OFFSET, + NFTA_NG_SET_NAME, /* deprecated */ + NFTA_NG_SET_ID, /* deprecated */ + __NFTA_NG_MAX +}; +#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) + +enum nft_ng_types { + NFT_NG_INCREMENTAL, + NFT_NG_RANDOM, + __NFT_NG_MAX +}; +#define NFT_NG_MAX (__NFT_NG_MAX - 1) + +enum nft_tunnel_key_ip_attributes { + NFTA_TUNNEL_KEY_IP_UNSPEC, + NFTA_TUNNEL_KEY_IP_SRC, + NFTA_TUNNEL_KEY_IP_DST, + __NFTA_TUNNEL_KEY_IP_MAX +}; +#define NFTA_TUNNEL_KEY_IP_MAX (__NFTA_TUNNEL_KEY_IP_MAX - 1) + +enum nft_tunnel_ip6_attributes { + NFTA_TUNNEL_KEY_IP6_UNSPEC, + NFTA_TUNNEL_KEY_IP6_SRC, + NFTA_TUNNEL_KEY_IP6_DST, + NFTA_TUNNEL_KEY_IP6_FLOWLABEL, + __NFTA_TUNNEL_KEY_IP6_MAX +}; +#define NFTA_TUNNEL_KEY_IP6_MAX (__NFTA_TUNNEL_KEY_IP6_MAX - 1) + +enum nft_tunnel_opts_attributes { + NFTA_TUNNEL_KEY_OPTS_UNSPEC, + NFTA_TUNNEL_KEY_OPTS_VXLAN, + NFTA_TUNNEL_KEY_OPTS_ERSPAN, + NFTA_TUNNEL_KEY_OPTS_GENEVE, + __NFTA_TUNNEL_KEY_OPTS_MAX +}; +#define NFTA_TUNNEL_KEY_OPTS_MAX (__NFTA_TUNNEL_KEY_OPTS_MAX - 1) + +enum nft_tunnel_opts_vxlan_attributes { + NFTA_TUNNEL_KEY_VXLAN_UNSPEC, + NFTA_TUNNEL_KEY_VXLAN_GBP, + __NFTA_TUNNEL_KEY_VXLAN_MAX +}; +#define NFTA_TUNNEL_KEY_VXLAN_MAX (__NFTA_TUNNEL_KEY_VXLAN_MAX - 1) + +enum nft_tunnel_opts_erspan_attributes { + NFTA_TUNNEL_KEY_ERSPAN_UNSPEC, + NFTA_TUNNEL_KEY_ERSPAN_VERSION, + NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX, + NFTA_TUNNEL_KEY_ERSPAN_V2_HWID, + NFTA_TUNNEL_KEY_ERSPAN_V2_DIR, + __NFTA_TUNNEL_KEY_ERSPAN_MAX +}; +#define NFTA_TUNNEL_KEY_ERSPAN_MAX (__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1) + +enum nft_tunnel_opts_geneve_attributes { + NFTA_TUNNEL_KEY_GENEVE_UNSPEC, + NFTA_TUNNEL_KEY_GENEVE_CLASS, + NFTA_TUNNEL_KEY_GENEVE_TYPE, + NFTA_TUNNEL_KEY_GENEVE_DATA, + __NFTA_TUNNEL_KEY_GENEVE_MAX +}; +#define NFTA_TUNNEL_KEY_GENEVE_MAX (__NFTA_TUNNEL_KEY_GENEVE_MAX - 1) + +enum nft_tunnel_flags { + NFT_TUNNEL_F_ZERO_CSUM_TX = (1 << 0), + NFT_TUNNEL_F_DONT_FRAGMENT = (1 << 1), + NFT_TUNNEL_F_SEQ_NUMBER = (1 << 2), +}; +#define NFT_TUNNEL_F_MASK (NFT_TUNNEL_F_ZERO_CSUM_TX | \ + NFT_TUNNEL_F_DONT_FRAGMENT | \ + NFT_TUNNEL_F_SEQ_NUMBER) + +enum nft_tunnel_key_attributes { + NFTA_TUNNEL_KEY_UNSPEC, + NFTA_TUNNEL_KEY_ID, + NFTA_TUNNEL_KEY_IP, + NFTA_TUNNEL_KEY_IP6, + NFTA_TUNNEL_KEY_FLAGS, + NFTA_TUNNEL_KEY_TOS, + NFTA_TUNNEL_KEY_TTL, + NFTA_TUNNEL_KEY_SPORT, + NFTA_TUNNEL_KEY_DPORT, + NFTA_TUNNEL_KEY_OPTS, + __NFTA_TUNNEL_KEY_MAX +}; +#define NFTA_TUNNEL_KEY_MAX (__NFTA_TUNNEL_KEY_MAX - 1) + +enum nft_tunnel_keys { + NFT_TUNNEL_PATH, + NFT_TUNNEL_ID, + __NFT_TUNNEL_MAX +}; +#define NFT_TUNNEL_MAX (__NFT_TUNNEL_MAX - 1) + +enum nft_tunnel_mode { + NFT_TUNNEL_MODE_NONE, + NFT_TUNNEL_MODE_RX, + NFT_TUNNEL_MODE_TX, + __NFT_TUNNEL_MODE_MAX +}; +#define NFT_TUNNEL_MODE_MAX (__NFT_TUNNEL_MODE_MAX - 1) + +enum nft_tunnel_attributes { + NFTA_TUNNEL_UNSPEC, + NFTA_TUNNEL_KEY, + NFTA_TUNNEL_DREG, + NFTA_TUNNEL_MODE, + __NFTA_TUNNEL_MAX +}; +#define NFTA_TUNNEL_MAX (__NFTA_TUNNEL_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/src/basic/linux/netfilter/nfnetlink.h b/src/basic/linux/netfilter/nfnetlink.h new file mode 100644 index 0000000000..a89f3a56a3 --- /dev/null +++ b/src/basic/linux/netfilter/nfnetlink.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _NFNETLINK_H +#define _NFNETLINK_H +#include +#include + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES + NFNLGRP_ACCT_QUOTA, +#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA + NFNLGRP_NFTRACE, +#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + __u8 nfgen_family; /* AF_xxx */ + __u8 version; /* nfnetlink version */ + __be16 res_id; /* resource id */ +}; + +#define NFNETLINK_V0 0 + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_OSF 5 +#define NFNL_SUBSYS_IPSET 6 +#define NFNL_SUBSYS_ACCT 7 +#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_COUNT 12 + +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + +/** + * enum nfnl_batch_attributes - nfnetlink batch netlink attributes + * + * @NFNL_BATCH_GENID: generation ID for this changeset (NLA_U32) + */ +enum nfnl_batch_attributes { + NFNL_BATCH_UNSPEC, + NFNL_BATCH_GENID, + __NFNL_BATCH_MAX +}; +#define NFNL_BATCH_MAX (__NFNL_BATCH_MAX - 1) + +#endif /* _NFNETLINK_H */ From 6f00fd9b7c9471e191c3639561072d2bcadd5447 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jun 2020 14:03:03 +0200 Subject: [PATCH 06/11] sd-netlink: add nfnetlink/nftables type system Will be used by upcoming nftables support -- it will use the netlink interface directly rather than add another library dependency. --- src/libsystemd/sd-netlink/netlink-types.c | 246 +++++++++++++++++++++- src/libsystemd/sd-netlink/netlink-types.h | 17 ++ 2 files changed, 262 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c index 6fb6c147d9..167433337f 100644 --- a/src/libsystemd/sd-netlink/netlink-types.c +++ b/src/libsystemd/sd-netlink/netlink-types.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -1312,6 +1314,243 @@ static const NLType genl_families[] = { [SD_GENL_NL80211] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_cmds_type_system }, }; +static const NLType nfnl_nft_table_types[] = { + [NFTA_TABLE_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_TABLE_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLTypeSystem nfnl_nft_table_type_system = { + .count = ELEMENTSOF(nfnl_nft_table_types), + .types = nfnl_nft_table_types, +}; + +static const NLType nfnl_nft_chain_hook_types[] = { + [NFTA_HOOK_HOOKNUM] = { .type = NETLINK_TYPE_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NETLINK_TYPE_U32 }, + [NFTA_HOOK_DEV] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 }, +}; + +static const NLTypeSystem nfnl_nft_chain_hook_type_system = { + .count = ELEMENTSOF(nfnl_nft_chain_hook_types), + .types = nfnl_nft_chain_hook_types, +}; + +static const NLType nfnl_nft_chain_types[] = { + [NFTA_CHAIN_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_CHAIN_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_chain_hook_type_system }, + [NFTA_CHAIN_TYPE] = { .type = NETLINK_TYPE_STRING, .size = 16 }, + [NFTA_CHAIN_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLTypeSystem nfnl_nft_chain_type_system = { + .count = ELEMENTSOF(nfnl_nft_chain_types), + .types = nfnl_nft_chain_types, +}; + +static const NLType nfnl_nft_expr_meta_types[] = { + [NFTA_META_DREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_META_KEY] = { .type = NETLINK_TYPE_U32 }, + [NFTA_META_SREG] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLType nfnl_nft_expr_payload_types[] = { + [NFTA_PAYLOAD_DREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NETLINK_TYPE_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NETLINK_TYPE_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLType nfnl_nft_expr_nat_types[] = { + [NFTA_NAT_TYPE] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_FAMILY] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_REG_ADDR_MIN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_REG_ADDR_MAX] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_REG_PROTO_MIN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_REG_PROTO_MAX] = { .type = NETLINK_TYPE_U32 }, + [NFTA_NAT_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLType nfnl_nft_data_types[] = { + [NFTA_DATA_VALUE] = { .type = NETLINK_TYPE_BINARY }, +}; + +static const NLTypeSystem nfnl_nft_data_type_system = { + .count = ELEMENTSOF(nfnl_nft_data_types), + .types = nfnl_nft_data_types, +}; + +static const NLType nfnl_nft_expr_bitwise_types[] = { + [NFTA_BITWISE_SREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_BITWISE_DREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_BITWISE_LEN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_BITWISE_MASK] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system }, + [NFTA_BITWISE_XOR] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system }, +}; + +static const NLType nfnl_nft_expr_cmp_types[] = { + [NFTA_CMP_SREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_CMP_OP] = { .type = NETLINK_TYPE_U32 }, + [NFTA_CMP_DATA] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system }, +}; + +static const NLType nfnl_nft_expr_fib_types[] = { + [NFTA_FIB_DREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_FIB_RESULT] = { .type = NETLINK_TYPE_U32 }, + [NFTA_FIB_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLType nfnl_nft_expr_lookup_types[] = { + [NFTA_LOOKUP_SET] = { .type = NETLINK_TYPE_STRING }, + [NFTA_LOOKUP_SREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_LOOKUP_DREG] = { .type = NETLINK_TYPE_U32 }, + [NFTA_LOOKUP_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLType nfnl_nft_expr_masq_types[] = { + [NFTA_MASQ_FLAGS] = { .type = NETLINK_TYPE_U32 }, + [NFTA_MASQ_REG_PROTO_MIN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_MASQ_REG_PROTO_MAX] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLTypeSystem nfnl_expr_data_type_systems[] = { + [NL_UNION_NFT_EXPR_DATA_BITWISE] = { .count = ELEMENTSOF(nfnl_nft_expr_bitwise_types), + .types = nfnl_nft_expr_bitwise_types }, + [NL_UNION_NFT_EXPR_DATA_CMP] = { .count = ELEMENTSOF(nfnl_nft_expr_cmp_types), + .types = nfnl_nft_expr_cmp_types }, + [NL_UNION_NFT_EXPR_DATA_FIB] = { .count = ELEMENTSOF(nfnl_nft_expr_fib_types), + .types = nfnl_nft_expr_fib_types }, + [NL_UNION_NFT_EXPR_DATA_LOOKUP] = { .count = ELEMENTSOF(nfnl_nft_expr_lookup_types), + .types = nfnl_nft_expr_lookup_types }, + [NL_UNION_NFT_EXPR_DATA_MASQ] = { .count = ELEMENTSOF(nfnl_nft_expr_masq_types), + .types = nfnl_nft_expr_masq_types }, + [NL_UNION_NFT_EXPR_DATA_META] = { .count = ELEMENTSOF(nfnl_nft_expr_meta_types), + .types = nfnl_nft_expr_meta_types }, + [NL_UNION_NFT_EXPR_DATA_NAT] = { .count = ELEMENTSOF(nfnl_nft_expr_nat_types), + .types = nfnl_nft_expr_nat_types }, + [NL_UNION_NFT_EXPR_DATA_PAYLOAD] = { .count = ELEMENTSOF(nfnl_nft_expr_payload_types), + .types = nfnl_nft_expr_payload_types }, +}; + +static const char* const nl_union_nft_expr_data_table[] = { + [NL_UNION_NFT_EXPR_DATA_BITWISE] = "bitwise", + [NL_UNION_NFT_EXPR_DATA_CMP] = "cmp", + [NL_UNION_NFT_EXPR_DATA_LOOKUP] = "lookup", + [NL_UNION_NFT_EXPR_DATA_META] = "meta", + [NL_UNION_NFT_EXPR_DATA_FIB] = "fib", + [NL_UNION_NFT_EXPR_DATA_MASQ] = "masq", + [NL_UNION_NFT_EXPR_DATA_NAT] = "nat", + [NL_UNION_NFT_EXPR_DATA_PAYLOAD] = "payload", +}; + +DEFINE_STRING_TABLE_LOOKUP(nl_union_nft_expr_data, NLUnionNFTExprData); + +static const NLTypeSystemUnion nfnl_nft_data_expr_type_system_union = { + .num = _NL_UNION_NFT_EXPR_DATA_MAX, + .lookup = nl_union_nft_expr_data_from_string, + .type_systems = nfnl_expr_data_type_systems, + .match_type = NL_MATCH_SIBLING, + .match = NFTA_EXPR_NAME, +}; + +static const NLType nfnl_nft_rule_expr_types[] = { + [NFTA_EXPR_NAME] = { .type = NETLINK_TYPE_STRING, .size = 16 }, + [NFTA_EXPR_DATA] = { .type = NETLINK_TYPE_UNION, + .type_system_union = &nfnl_nft_data_expr_type_system_union }, +}; + +static const NLTypeSystem nfnl_nft_rule_expr_type_system = { + .count = ELEMENTSOF(nfnl_nft_rule_expr_types), + .types = nfnl_nft_rule_expr_types, +}; + +static const NLType nfnl_nft_rule_types[] = { + [NFTA_RULE_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_RULE_CHAIN] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_rule_expr_type_system } +}; + +static const NLTypeSystem nfnl_nft_rule_type_system = { + .count = ELEMENTSOF(nfnl_nft_rule_types), + .types = nfnl_nft_rule_types, +}; + +static const NLType nfnl_nft_set_types[] = { + [NFTA_SET_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_SET_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_SET_FLAGS] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_KEY_TYPE] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_KEY_LEN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_DATA_TYPE] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_DATA_LEN] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_POLICY] = { .type = NETLINK_TYPE_U32 }, + [NFTA_SET_ID] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLTypeSystem nfnl_nft_set_type_system = { + .count = ELEMENTSOF(nfnl_nft_set_types), + .types = nfnl_nft_set_types, +}; + +static const NLType nfnl_nft_setelem_types[] = { + [NFTA_SET_ELEM_KEY] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system }, + [NFTA_SET_ELEM_DATA] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system }, + [NFTA_SET_ELEM_FLAGS] = { .type = NETLINK_TYPE_U32 }, +}; + +static const NLTypeSystem nfnl_nft_setelem_type_system = { + .count = ELEMENTSOF(nfnl_nft_setelem_types), + .types = nfnl_nft_setelem_types, +}; + +static const NLType nfnl_nft_setelem_list_types[] = { + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_type_system }, +}; + +static const NLTypeSystem nfnl_nft_setelem_list_type_system = { + .count = ELEMENTSOF(nfnl_nft_setelem_list_types), + .types = nfnl_nft_setelem_list_types, +}; + +static const NLType nfnl_nft_msg_types [] = { + [NFT_MSG_DELTABLE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_table_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_NEWTABLE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_table_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_NEWCHAIN] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_chain_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_NEWRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_rule_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_NEWSET] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_set_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_NEWSETELEM] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_list_type_system, .size = sizeof(struct nfgenmsg) }, + [NFT_MSG_DELSETELEM] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_list_type_system, .size = sizeof(struct nfgenmsg) }, +}; + +static const NLTypeSystem nfnl_nft_msg_type_system = { + .count = ELEMENTSOF(nfnl_nft_msg_types), + .types = nfnl_nft_msg_types, +}; + +static const NLType nfnl_msg_batch_types [] = { + [NFNL_BATCH_GENID] = { .type = NETLINK_TYPE_U32 } +}; + +static const NLTypeSystem nfnl_msg_batch_type_system = { + .count = ELEMENTSOF(nfnl_msg_batch_types), + .types = nfnl_msg_batch_types, +}; + +static const NLType nfnl_types[] = { + [NLMSG_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 }, + [NLMSG_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &error_type_system, .size = sizeof(struct nlmsgerr) }, + [NFNL_MSG_BATCH_BEGIN] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_msg_batch_type_system, .size = sizeof(struct nfgenmsg) }, + [NFNL_MSG_BATCH_END] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_msg_batch_type_system, .size = sizeof(struct nfgenmsg) }, + [NFNL_SUBSYS_NFTABLES] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_msg_type_system, .size = sizeof(struct nfgenmsg) }, +}; + +const NLTypeSystem nfnl_type_system_root = { + .count = ELEMENTSOF(nfnl_types), + .types = nfnl_types, +}; + /* Mainly used when sending message */ const NLTypeSystem genl_family_type_system_root = { .count = ELEMENTSOF(genl_families), @@ -1368,6 +1607,8 @@ const NLTypeSystem *type_system_get_root(int protocol) { switch (protocol) { case NETLINK_GENERIC: return &genl_type_system_root; + case NETLINK_NETFILTER: + return &nfnl_type_system_root; default: /* NETLINK_ROUTE: */ return &rtnl_type_system_root; } @@ -1378,9 +1619,12 @@ int type_system_root_get_type(sd_netlink *nl, const NLType **ret, uint16_t type) const NLType *nl_type; int r; - if (!nl || nl->protocol != NETLINK_GENERIC) + if (!nl) return type_system_get_type(&rtnl_type_system_root, ret, type); + if (nl->protocol != NETLINK_GENERIC) + return type_system_get_type(type_system_get_root(nl->protocol), ret, type); + r = nlmsg_type_to_genl_family(nl, type, &family); if (r < 0) return r; diff --git a/src/libsystemd/sd-netlink/netlink-types.h b/src/libsystemd/sd-netlink/netlink-types.h index b14e66fbb1..1e93ed78cf 100644 --- a/src/libsystemd/sd-netlink/netlink-types.h +++ b/src/libsystemd/sd-netlink/netlink-types.h @@ -21,6 +21,7 @@ enum { NETLINK_TYPE_NESTED, /* NLA_NESTED */ NETLINK_TYPE_UNION, NETLINK_TYPE_SOCKADDR, + NETLINK_TYPE_BINARY, }; typedef enum NLMatchType { @@ -117,3 +118,19 @@ typedef enum NLUnionTCAOptionData { const char *nl_union_tca_option_data_to_string(NLUnionTCAOptionData p) _const_; NLUnionTCAOptionData nl_union_tca_option_data_from_string(const char *p) _pure_; + +typedef enum NLUnionNFTExprData { + NL_UNION_NFT_EXPR_DATA_BITWISE, + NL_UNION_NFT_EXPR_DATA_CMP, + NL_UNION_NFT_EXPR_DATA_FIB, + NL_UNION_NFT_EXPR_DATA_LOOKUP, + NL_UNION_NFT_EXPR_DATA_PAYLOAD, + NL_UNION_NFT_EXPR_DATA_MASQ, + NL_UNION_NFT_EXPR_DATA_META, + NL_UNION_NFT_EXPR_DATA_NAT, + _NL_UNION_NFT_EXPR_DATA_MAX, + _NL_UNION_NFT_EXPR_DATA_INVALID = -1, +} NLUnionNFTExprData; + +const char *nl_union_nft_expr_data_to_string(NLUnionNFTExprData p) _const_; +NLUnionNFTExprData nl_union_nft_expr_data_from_string(const char *p) _pure_; From 347ea16797ee3962b745973f109951ab9fb1ce23 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jun 2020 14:42:31 +0200 Subject: [PATCH 07/11] sd-netlink: add nfnetlink helper routines add nfnetlink_nftables helper functions to: * open a new nfnetlink socket to kernel * add tables, chains, rules, sets and maps * delete/flush table * add and delete elements from sets/maps --- src/libsystemd/meson.build | 1 + src/libsystemd/sd-netlink/nfnl-message.c | 318 +++++++++++++++++++++++ src/systemd/sd-netlink.h | 26 ++ 3 files changed, 345 insertions(+) create mode 100644 src/libsystemd/sd-netlink/nfnl-message.c diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index 50716f7b94..d22a7754e2 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -81,6 +81,7 @@ libsystemd_sources = files(''' sd-netlink/netlink-types.h sd-netlink/netlink-util.c sd-netlink/netlink-util.h + sd-netlink/nfnl-message.c sd-netlink/rtnl-message.c sd-netlink/sd-netlink.c sd-network/network-util.c diff --git a/src/libsystemd/sd-netlink/nfnl-message.c b/src/libsystemd/sd-netlink/nfnl-message.c new file mode 100644 index 0000000000..d7bcbf8ba8 --- /dev/null +++ b/src/libsystemd/sd-netlink/nfnl-message.c @@ -0,0 +1,318 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sd-netlink.h" + +#include "format-util.h" +#include "netlink-internal.h" +#include "netlink-types.h" +#include "netlink-util.h" +#include "socket-util.h" +#include "util.h" + +static int nft_message_new(sd_netlink *nfnl, sd_netlink_message **ret, int family, uint16_t type, uint16_t flags) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + struct nfgenmsg *nfh; + const NLType *nl_type; + size_t size; + int r; + + assert_return(nfnl, -EINVAL); + + r = type_system_root_get_type(nfnl, &nl_type, NFNL_SUBSYS_NFTABLES); + if (r < 0) + return r; + + if (type_get_type(nl_type) != NETLINK_TYPE_NESTED) + return -EINVAL; + + r = message_new_empty(nfnl, &m); + if (r < 0) + return r; + + size = NLMSG_SPACE(type_get_size(nl_type)); + + assert(size >= sizeof(struct nlmsghdr)); + m->hdr = malloc0(size); + if (!m->hdr) + return -ENOMEM; + + m->hdr->nlmsg_flags = NLM_F_REQUEST | flags; + + type_get_type_system(nl_type, &m->containers[0].type_system); + + r = type_system_get_type_system(m->containers[0].type_system, + &m->containers[0].type_system, + type); + if (r < 0) + return r; + + m->hdr->nlmsg_len = size; + m->hdr->nlmsg_type = NFNL_SUBSYS_NFTABLES << 8 | type; + + nfh = NLMSG_DATA(m->hdr); + nfh->nfgen_family = family; + nfh->version = NFNETLINK_V0; + nfh->res_id = nfnl->serial; + + *ret = TAKE_PTR(m); + return 0; +} + +static int sd_nfnl_message_batch(sd_netlink *nfnl, sd_netlink_message **ret, int v) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + struct nfgenmsg *nfh; + int r; + + r = message_new(nfnl, &m, v); + if (r < 0) + return r; + + nfh = NLMSG_DATA(m->hdr); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = NFNL_SUBSYS_NFTABLES; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_message_batch_begin(sd_netlink *nfnl, sd_netlink_message **ret) { + return sd_nfnl_message_batch(nfnl, ret, NFNL_MSG_BATCH_BEGIN); +} + +int sd_nfnl_message_batch_end(sd_netlink *nfnl, sd_netlink_message **ret) { + return sd_nfnl_message_batch(nfnl, ret, NFNL_MSG_BATCH_END); +} + +int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret, + int family, + const char *table, const char *chain, + const char *type, + uint8_t hook, int prio) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWCHAIN, NLM_F_CREATE | NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_NAME, chain); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_TYPE, type); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_CHAIN_HOOK); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_HOOK_HOOKNUM, htobe32(hook)); + if (r < 0) + goto cancel; + + r = sd_netlink_message_append_u32(m, NFTA_HOOK_PRIORITY, htobe32(prio)); + if (r < 0) + goto cancel; + + r = sd_netlink_message_close_container(m); + if (r < 0) + goto cancel; + + *ret = TAKE_PTR(m); + return 0; +cancel: + sd_netlink_message_cancel_array(m); + return r; +} + +int sd_nfnl_nft_message_del_table(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_DELTABLE, NLM_F_CREATE | NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, uint16_t flags) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWTABLE, NLM_F_CREATE | flags); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *chain) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWRULE, NLM_F_CREATE | NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_RULE_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_RULE_CHAIN, chain); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name, + uint32_t set_id, uint32_t klen) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWSET, NLM_F_CREATE | NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_NAME, set_name); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_ID, ++set_id); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_LEN, htobe32(klen)); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWSETELEM, NLM_F_CREATE | NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_SET_ELEM_LIST_ELEMENTS); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_del_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_message_new(nfnl, &m, family, NFT_MSG_DELSETELEM, NLM_F_ACK); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_SET_ELEM_LIST_ELEMENTS); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return r; +} + +static int sd_nfnl_add_data(sd_netlink_message *m, uint16_t attr, const void *data, uint32_t dlen) { + int r = sd_netlink_message_open_container(m, attr); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(m, NFTA_DATA_VALUE, data, dlen); + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* attr */ +} + +int sd_nfnl_nft_message_add_setelem(sd_netlink_message *m, + uint32_t num, + const void *key, uint32_t klen, + const void *data, uint32_t dlen) { + int r; + + r = sd_netlink_message_open_array(m, num); + if (r < 0) + return r; + + r = sd_nfnl_add_data(m, NFTA_SET_ELEM_KEY, key, klen); + if (r < 0) + goto cancel; + + if (data) { + r = sd_nfnl_add_data(m, NFTA_SET_ELEM_DATA, data, dlen); + if (r < 0) + goto cancel; + } + + return r; +cancel: + sd_netlink_message_cancel_array(m); + return r; +} + +int sd_nfnl_nft_message_add_setelem_end(sd_netlink_message *m) { + return sd_netlink_message_close_container(m); /* NFTA_SET_ELEM_LIST_ELEMENTS */ +} + +int sd_nfnl_socket_open(sd_netlink **ret) { + return netlink_open_family(ret, NETLINK_NETFILTER); +} diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h index bf6d1e47ff..15fa84de28 100644 --- a/src/systemd/sd-netlink.h +++ b/src/systemd/sd-netlink.h @@ -219,6 +219,32 @@ int sd_rtnl_message_set_tclass_handle(sd_netlink_message *m, uint32_t handle); int sd_rtnl_message_new_mdb(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int mdb_ifindex); +/* nfnl */ +int sd_nfnl_socket_open(sd_netlink **nl); +int sd_nfnl_message_batch_begin(sd_netlink *nfnl, sd_netlink_message **ret); +int sd_nfnl_message_batch_end(sd_netlink *nfnl, sd_netlink_message **ret); +int sd_nfnl_nft_message_del_table(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table); +int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, uint16_t nl_flags); +int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *chain, + const char *type, uint8_t hook, int prio); +int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *chain); +int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name, + uint32_t setid, uint32_t klen); +int sd_nfnl_nft_message_new_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name); +int sd_nfnl_nft_message_del_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *table, const char *set_name); +int sd_nfnl_nft_message_add_setelem(sd_netlink_message *m, + uint32_t num, + const void *key, uint32_t klen, + const void *data, uint32_t dlen); +int sd_nfnl_nft_message_add_setelem_end(sd_netlink_message *m); + /* genl */ int sd_genl_socket_open(sd_netlink **nl); int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m); From 99c41c0de4e65bd881bccda3796481d3dc2007d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jun 2020 14:58:41 +0200 Subject: [PATCH 08/11] sd-netlink: add sd_netlink_sendv nftables uses a transaction-based netlink model: one netlink write comes with multiple messages. A 'BEGIN' message to tell nf_tables/kernel that a new transaction starts. Then, one more messages to add/delete tables/chains/rules etc. Lastly, an END message that commits all changes. This function will be used to send all the individual messages that should make up a single transaction as a single write. --- src/libsystemd/sd-netlink/netlink-internal.h | 1 + src/libsystemd/sd-netlink/netlink-socket.c | 25 ++++++++++++++ src/libsystemd/sd-netlink/sd-netlink.c | 35 ++++++++++++++++++++ src/systemd/sd-netlink.h | 1 + 4 files changed, 62 insertions(+) diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h index 1240f0d66d..2845700ffb 100644 --- a/src/libsystemd/sd-netlink/netlink-internal.h +++ b/src/libsystemd/sd-netlink/netlink-internal.h @@ -139,6 +139,7 @@ int socket_bind(sd_netlink *nl); int socket_broadcast_group_ref(sd_netlink *nl, unsigned group); int socket_broadcast_group_unref(sd_netlink *nl, unsigned group); int socket_write_message(sd_netlink *nl, sd_netlink_message *m); +int socket_writev_message(sd_netlink *nl, sd_netlink_message *m[], size_t msgcount); int socket_read_message(sd_netlink *nl); int rtnl_rqueue_make_room(sd_netlink *rtnl); diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c index 228e38df90..a1a839f57a 100644 --- a/src/libsystemd/sd-netlink/netlink-socket.c +++ b/src/libsystemd/sd-netlink/netlink-socket.c @@ -238,6 +238,31 @@ int socket_write_message(sd_netlink *nl, sd_netlink_message *m) { return k; } +int socket_writev_message(sd_netlink *nl, sd_netlink_message *m[], size_t msgcount) { + _cleanup_free_ struct iovec *iovs = NULL; + ssize_t k; + size_t i; + + assert(nl); + assert(msgcount); + + iovs = new0(struct iovec, msgcount); + if (!iovs) + return -ENOMEM; + + for (i = 0; i < msgcount; i++) { + assert(m[i]->hdr != NULL); + assert(m[i]->hdr->nlmsg_len > 0); + iovs[i] = IOVEC_MAKE(m[i]->hdr, m[i]->hdr->nlmsg_len); + } + + k = writev(nl->fd, iovs, msgcount); + if (k < 0) + return -errno; + + return k; +} + static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_group, bool peek) { union sockaddr_union sender; CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control; diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c index 7801101807..5a8b4d9322 100644 --- a/src/libsystemd/sd-netlink/sd-netlink.c +++ b/src/libsystemd/sd-netlink/sd-netlink.c @@ -226,6 +226,41 @@ int sd_netlink_send(sd_netlink *nl, return 1; } +int sd_netlink_sendv(sd_netlink *nl, + sd_netlink_message *messages[], + size_t msgcount, + uint32_t **ret_serial) { + _cleanup_free_ uint32_t *serials = NULL; + unsigned i; + int r; + + assert_return(nl, -EINVAL); + assert_return(!rtnl_pid_changed(nl), -ECHILD); + assert_return(messages, -EINVAL); + + if (ret_serial) { + serials = new0(uint32_t, msgcount); + if (!serials) + return -ENOMEM; + } + + for (i = 0; i < msgcount; i++) { + assert_return(!messages[i]->sealed, -EPERM); + rtnl_seal_message(nl, messages[i]); + if (serials) + serials[i] = rtnl_message_get_serial(messages[i]); + } + + r = socket_writev_message(nl, messages, msgcount); + if (r < 0) + return r; + + if (ret_serial) + *ret_serial = TAKE_PTR(serials); + + return r; +} + int rtnl_rqueue_make_room(sd_netlink *rtnl) { assert(rtnl); diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h index 15fa84de28..2b52c4ca88 100644 --- a/src/systemd/sd-netlink.h +++ b/src/systemd/sd-netlink.h @@ -60,6 +60,7 @@ sd_netlink *sd_netlink_ref(sd_netlink *nl); sd_netlink *sd_netlink_unref(sd_netlink *nl); int sd_netlink_send(sd_netlink *nl, sd_netlink_message *message, uint32_t *serial); +int sd_netlink_sendv(sd_netlink *nl, sd_netlink_message *messages[], size_t msgcnt, uint32_t **ret_serial); int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink_message *message, sd_netlink_message_handler_t callback, sd_netlink_destroy_t destoy_callback, void *userdata, uint64_t usec, const char *description); From 4df42cd99d9a484d6045e2b26601f52538237e82 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Sep 2020 01:04:51 +0200 Subject: [PATCH 09/11] sd-netlink: add a read function Will be used by nftables nfnetlink backend. It sends a series of netlink messages that form a nftables update transaction. The transaction will then generate a series of ack messages (or an error). This function will be used to read these acks. --- src/libsystemd/sd-netlink/sd-netlink.c | 32 ++++++++++++++++++-------- src/systemd/sd-netlink.h | 1 + 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c index 5a8b4d9322..ceb8333cbe 100644 --- a/src/libsystemd/sd-netlink/sd-netlink.c +++ b/src/libsystemd/sd-netlink/sd-netlink.c @@ -621,21 +621,15 @@ int sd_netlink_call_async( return k; } -int sd_netlink_call(sd_netlink *rtnl, - sd_netlink_message *message, - uint64_t usec, - sd_netlink_message **ret) { +int sd_netlink_read(sd_netlink *rtnl, + uint32_t serial, + uint64_t usec, + sd_netlink_message **ret) { usec_t timeout; - uint32_t serial; int r; assert_return(rtnl, -EINVAL); assert_return(!rtnl_pid_changed(rtnl), -ECHILD); - assert_return(message, -EINVAL); - - r = sd_netlink_send(rtnl, message, &serial); - if (r < 0) - return r; timeout = calc_elapse(usec); @@ -705,6 +699,24 @@ int sd_netlink_call(sd_netlink *rtnl, } } +int sd_netlink_call(sd_netlink *rtnl, + sd_netlink_message *message, + uint64_t usec, + sd_netlink_message **ret) { + uint32_t serial; + int r; + + assert_return(rtnl, -EINVAL); + assert_return(!rtnl_pid_changed(rtnl), -ECHILD); + assert_return(message, -EINVAL); + + r = sd_netlink_send(rtnl, message, &serial); + if (r < 0) + return r; + + return sd_netlink_read(rtnl, serial, usec, ret); +} + int sd_netlink_get_events(const sd_netlink *rtnl) { assert_return(rtnl, -EINVAL); assert_return(!rtnl_pid_changed(rtnl), -ECHILD); diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h index 2b52c4ca88..41a7d89b60 100644 --- a/src/systemd/sd-netlink.h +++ b/src/systemd/sd-netlink.h @@ -66,6 +66,7 @@ int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink void *userdata, uint64_t usec, const char *description); int sd_netlink_call(sd_netlink *nl, sd_netlink_message *message, uint64_t timeout, sd_netlink_message **reply); +int sd_netlink_read(sd_netlink *nl, uint32_t serial, uint64_t timeout, sd_netlink_message **reply); int sd_netlink_get_events(const sd_netlink *nl); int sd_netlink_get_timeout(const sd_netlink *nl, uint64_t *timeout); From 715a70e7218710d6a6c033e9157bf97fdf5d8ede Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jun 2020 15:53:03 +0200 Subject: [PATCH 10/11] firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307 --- src/nspawn/nspawn.c | 5 - src/shared/firewall-util-nft.c | 903 +++++++++++++++++++++++++++++ src/shared/firewall-util-private.h | 23 + src/shared/firewall-util.c | 29 +- src/shared/meson.build | 1 + src/test/meson.build | 3 +- 6 files changed, 952 insertions(+), 12 deletions(-) create mode 100644 src/shared/firewall-util-nft.c diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a6f64e8415..7e4989f489 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1770,11 +1770,6 @@ static int verify_arguments(void) { if (arg_expose_ports && !arg_private_network) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking."); -#if ! HAVE_LIBIPTC - if (arg_expose_ports) - return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--port= is not supported, compiled without libiptc support."); -#endif - if (arg_caps_ambient) { if (arg_caps_ambient == (uint64_t)-1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "AmbientCapability= does not support the value all."); diff --git a/src/shared/firewall-util-nft.c b/src/shared/firewall-util-nft.c new file mode 100644 index 0000000000..6c72956e04 --- /dev/null +++ b/src/shared/firewall-util-nft.c @@ -0,0 +1,903 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "firewall-util.h" +#include "firewall-util-private.h" +#include "in-addr-util.h" +#include "macro.h" +#include "socket-util.h" +#include "time-util.h" + +#define NFT_SYSTEMD_DNAT_MAP_NAME "map_port_ipport" +#define NFT_SYSTEMD_TABLE_NAME "io.systemd.nat" +#define NFT_SYSTEMD_MASQ_SET_NAME "masq_saddr" + +#define NFNL_DEFAULT_TIMEOUT_USECS (1ULL * USEC_PER_SEC) + +#define UDP_DPORT_OFFSET 2 + +static int nfnl_netlink_sendv(sd_netlink *nfnl, + sd_netlink_message *messages[], + size_t msgcount) { + _cleanup_free_ uint32_t *serial = NULL; + size_t i; + int r; + + assert(msgcount > 0); + + r = sd_netlink_sendv(nfnl, messages, msgcount, &serial); + if (r < 0) + return r; + + r = 0; + for (i = 1; i < msgcount - 1; i++) { + int tmp; + + /* If message is an error, this returns embedded errno */ + tmp = sd_netlink_read(nfnl, serial[i], NFNL_DEFAULT_TIMEOUT_USECS, NULL); + if (tmp < 0 && r == 0) + r = tmp; + } + + return r; +} + +static int nfnl_add_open_expr_container(sd_netlink_message *m, const char *name) { + int r; + + r = sd_netlink_message_open_array(m, NFTA_LIST_ELEM); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_EXPR_NAME, name); + if (r < 0) + return r; + + return sd_netlink_message_open_container_union(m, NFTA_EXPR_DATA, name); +} + +static int nfnl_add_expr_fib(sd_netlink_message *m, uint32_t nft_fib_flags, + enum nft_fib_result result, + enum nft_registers dreg) { + int r; + + r = nfnl_add_open_expr_container(m, "fib"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_FIB_FLAGS, htobe32(nft_fib_flags)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_FIB_RESULT, htobe32(result)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_FIB_DREG, htobe32(dreg)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_meta(sd_netlink_message *m, enum nft_meta_keys key, + enum nft_registers dreg) { + int r; + + r = nfnl_add_open_expr_container(m, "meta"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_META_KEY, htobe32(key)); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_META_DREG, htobe32(dreg)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_payload(sd_netlink_message *m, enum nft_payload_bases pb, + uint32_t offset, uint32_t len, enum nft_registers dreg) { + int r; + + r = nfnl_add_open_expr_container(m, "payload"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_DREG, htobe32(dreg)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_BASE, htobe32(pb)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_OFFSET, htobe32(offset)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_LEN, htobe32(len)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_lookup_set_data(sd_netlink_message *m, const char *set_name, + enum nft_registers sreg) { + int r; + + r = nfnl_add_open_expr_container(m, "lookup"); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_LOOKUP_SET, set_name); + if (r < 0) + return r; + + return sd_netlink_message_append_u32(m, NFTA_LOOKUP_SREG, htobe32(sreg)); +} + +static int nfnl_add_expr_lookup_set(sd_netlink_message *m, const char *set_name, + enum nft_registers sreg) { + int r; + + r = nfnl_add_expr_lookup_set_data(m, set_name, sreg); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_lookup_map(sd_netlink_message *m, const char *set_name, + enum nft_registers sreg, enum nft_registers dreg) { + int r; + + r = nfnl_add_expr_lookup_set_data(m, set_name, sreg); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_LOOKUP_DREG, htobe32(dreg)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_data(sd_netlink_message *m, int attr, const void *data, uint32_t dlen) { + int r; + + r = sd_netlink_message_open_container(m, attr); + if (r < 0) + return r; + r = sd_netlink_message_append_data(m, NFTA_DATA_VALUE, data, dlen); + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* attr */ +} + +static int nfnl_add_expr_cmp_data(sd_netlink_message *m, const void *data, uint32_t dlen) { + return nfnl_add_expr_data(m, NFTA_CMP_DATA, data, dlen); +} + +static int nfnl_add_expr_cmp(sd_netlink_message *m, enum nft_cmp_ops cmp_op, + enum nft_registers sreg, const void *data, uint32_t dlen) { + int r; + + r = nfnl_add_open_expr_container(m, "cmp"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_CMP_OP, htobe32(cmp_op)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_CMP_SREG, htobe32(sreg)); + if (r < 0) + return r; + + r = nfnl_add_expr_cmp_data(m, data, dlen); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_bitwise(sd_netlink_message *m, + enum nft_registers sreg, + enum nft_registers dreg, + const void *and, + const void *xor, uint32_t len) { + int r; + + r = nfnl_add_open_expr_container(m, "bitwise"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_BITWISE_SREG, htobe32(sreg)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_BITWISE_DREG, htobe32(dreg)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_BITWISE_LEN, htobe32(len)); + if (r < 0) + return r; + + r = nfnl_add_expr_data(m, NFTA_BITWISE_MASK, and, len); + if (r < 0) + return r; + + r = nfnl_add_expr_data(m, NFTA_BITWISE_XOR, xor, len); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */ + if (r < 0) + return r; + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +static int nfnl_add_expr_dnat(sd_netlink_message *m, + int family, + enum nft_registers areg, + enum nft_registers preg) { + int r; + + r = nfnl_add_open_expr_container(m, "nat"); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_NAT_TYPE, htobe32(NFT_NAT_DNAT)); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_NAT_FAMILY, htobe32(family)); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_NAT_REG_ADDR_MIN, htobe32(areg)); + if (r < 0) + return r; + r = sd_netlink_message_append_u32(m, NFTA_NAT_REG_PROTO_MIN, htobe32(preg)); + if (r < 0) + return r; + r = sd_netlink_message_close_container(m); + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); +} + +static int nfnl_add_expr_masq(sd_netlink_message *m) { + int r; + + r = sd_netlink_message_open_array(m, NFTA_LIST_ELEM); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_EXPR_NAME, "masq"); + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */ +} + +/* -t nat -A POSTROUTING -p protocol -s source/pflen -o out_interface -d destionation/pflen -j MASQUERADE */ +static int sd_nfnl_message_new_masq_rule(sd_netlink *nfnl, sd_netlink_message **ret, int family, + const char *chain) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS); + if (r < 0) + return r; + + /* 1st statement: ip saddr @masq_saddr. Place iph->saddr in reg1. */ + r = nfnl_add_expr_payload(m, NFT_PAYLOAD_NETWORK_HEADER, offsetof(struct iphdr, saddr), + sizeof(uint32_t), NFT_REG32_01); + if (r < 0) + return r; + + /* 1st statement: use reg1 content to make lookup in @masq_saddr set. */ + r = nfnl_add_expr_lookup_set(m, NFT_SYSTEMD_MASQ_SET_NAME, NFT_REG32_01); + if (r < 0) + return r; + + /* 2nd statement: masq. Only executed by kernel if the previous lookup was successful. */ + r = nfnl_add_expr_masq(m); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */ + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +/* -t nat -A PREROUTING -p protocol --dport local_port -i in_interface -s source/pflen -d destionation/pflen -j DNAT --to-destination remote_addr:remote_port */ +static int sd_nfnl_message_new_dnat_rule_pre(sd_netlink *nfnl, sd_netlink_message **ret, int family, + const char *chain) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + enum nft_registers proto_reg; + uint32_t local = RTN_LOCAL; + int r; + + r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS); + if (r < 0) + return r; + + /* 1st statement: fib daddr type local */ + r = nfnl_add_expr_fib(m, NFTA_FIB_F_DADDR, NFT_FIB_RESULT_ADDRTYPE, NFT_REG32_01); + if (r < 0) + return r; + + /* 1st statement (cont.): compare RTN_LOCAL */ + r = nfnl_add_expr_cmp(m, NFT_CMP_EQ, NFT_REG32_01, &local, sizeof(local)); + if (r < 0) + return r; + + /* 2nd statement: lookup local port in map, fetch address:dport to map to */ + r = nfnl_add_expr_meta(m, NFT_META_L4PROTO, NFT_REG32_01); + if (r < 0) + return r; + + r = nfnl_add_expr_payload(m, NFT_PAYLOAD_TRANSPORT_HEADER, UDP_DPORT_OFFSET, + sizeof(uint16_t), NFT_REG32_02); + if (r < 0) + return r; + + /* 3rd statement: lookup 'l4proto . dport', e.g. 'tcp . 22' as key and + * store address and port for the dnat mapping in REG1/REG2. + */ + r = nfnl_add_expr_lookup_map(m, NFT_SYSTEMD_DNAT_MAP_NAME, NFT_REG32_01, NFT_REG32_01); + if (r < 0) + return r; + + proto_reg = NFT_REG32_02; + r = nfnl_add_expr_dnat(m, family, NFT_REG32_01, proto_reg); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */ + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +static int sd_nfnl_message_new_dnat_rule_out(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *chain) { + static const uint32_t zero, one = 1; + + uint32_t lonet = htobe32(0x7F000000), lomask = htobe32(0xff000000); + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + enum nft_registers proto_reg; + int r; + + r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS); + if (r < 0) + return r; + + /* 1st statement: exclude 127.0.0.1/8: ip daddr != 127.0.0.1/8 */ + r = nfnl_add_expr_payload(m, NFT_PAYLOAD_NETWORK_HEADER, offsetof(struct iphdr, daddr), + sizeof(uint32_t), NFT_REG32_01); + if (r < 0) + return r; + + /* 1st statement (cont.): bitops/prefix */ + r = nfnl_add_expr_bitwise(m, NFT_REG32_01, NFT_REG32_01, &lomask, &zero, sizeof(lomask)); + if (r < 0) + return r; + + /* 1st statement (cont.): compare reg1 with 127/8 */ + r = nfnl_add_expr_cmp(m, NFT_CMP_NEQ, NFT_REG32_01, &lonet, sizeof(lonet)); + if (r < 0) + return r; + + /* 2nd statement: meta oif lo */ + r = nfnl_add_expr_meta(m, NFT_META_OIF, NFT_REG32_01); + if (r < 0) + return r; + + /* 2nd statement (cont.): compare to lo ifindex (1) */ + r = nfnl_add_expr_cmp(m, NFT_CMP_EQ, NFT_REG32_01, &one, sizeof(one)); + if (r < 0) + return r; + + /* 3rd statement: meta l4proto . th dport dnat ip . port to map @map_port_ipport */ + r = nfnl_add_expr_meta(m, NFT_META_L4PROTO, NFT_REG32_01); + if (r < 0) + return r; + + /* 3rd statement (cont): store the port number in reg2 */ + r = nfnl_add_expr_payload(m, NFT_PAYLOAD_TRANSPORT_HEADER, UDP_DPORT_OFFSET, + sizeof(uint16_t), NFT_REG32_02); + if (r < 0) + return r; + + /* 3rd statement (cont): use reg1 and reg2 and retrieve + * the new destination ip and port number. + * + * reg1 and reg2 are clobbered and will then contain the new + * address/port number. + */ + r = nfnl_add_expr_lookup_map(m, NFT_SYSTEMD_DNAT_MAP_NAME, NFT_REG32_01, NFT_REG32_01); + if (r < 0) + return r; + + /* 4th statement: dnat connection to address/port retrieved by the + * preceeding expression. */ + proto_reg = NFT_REG32_02; + r = nfnl_add_expr_dnat(m, family, NFT_REG32_01, proto_reg); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */ + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +static int nft_new_set(struct sd_netlink *nfnl, + sd_netlink_message **ret, + int family, const char *set_name, + uint32_t set_id, + uint32_t flags, uint32_t type, uint32_t klen) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_nft_message_new_set(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name, set_id, klen); + if (r < 0) + return r; + + if (flags != 0) { + r = sd_netlink_message_append_u32(m, NFTA_SET_FLAGS, htobe32(flags)); + if (r < 0) + return r; + } + + r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_TYPE, htobe32(type)); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +static int nft_new_map(struct sd_netlink *nfnl, + sd_netlink_message **ret, + int family, const char *set_name, uint32_t set_id, + uint32_t flags, uint32_t type, uint32_t klen, uint32_t dtype, uint32_t dlen) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = nft_new_set(nfnl, &m, family, set_name, set_id, flags | NFT_SET_MAP, type, klen); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_DATA_TYPE, htobe32(dtype)); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_DATA_LEN, htobe32(dlen)); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +static int nft_add_element(sd_netlink *nfnl, sd_netlink_message **ret, + int family, const char *set_name, + const void *key, uint32_t klen, + const void *data, uint32_t dlen) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + /* + * Ideally there would be an API that provides: + * + * 1) a init function to add the main ruleset skeleton + * 2) a function that populates the sets with all known address/port pairs to s/dnat for + * 3) a function that can remove address/port pairs again. + * + * At this time, the existing API is used which is built on a + * 'add/delete a rule' paradigm. + * + * This replicated here and each element gets added to the set + * one-by-one. + */ + r = sd_nfnl_nft_message_new_setelems_begin(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name); + if (r < 0) + return r; + + r = sd_nfnl_nft_message_add_setelem(m, 0, key, klen, data, dlen); + if (r < 0) + return r; + + /* could theoretically append more set elements to add here */ + r = sd_nfnl_nft_message_add_setelem_end(m); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +static int nft_del_element(sd_netlink *nfnl, + sd_netlink_message **ret, int family, const char *set_name, + const void *key, uint32_t klen, + const void *data, uint32_t dlen) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_nft_message_del_setelems_begin(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name); + if (r < 0) + return r; + + r = sd_nfnl_nft_message_add_setelem(m, 0, key, klen, data, dlen); + if (r < 0) + return r; + + r = sd_nfnl_nft_message_add_setelem_end(m); + if (r < 0) + return r; + *ret = TAKE_PTR(m); + return 0; +} + +/* This is needed so 'nft' userspace tool can properly format the contents + * of the set/map when someone uses 'nft' to inspect their content. + * + * The values cannot be changed, they are part of the nft tool type identifier ABI. + */ +#define TYPE_BITS 6 + +enum nft_key_types { + TYPE_IPADDR = 7, + TYPE_IP6ADDR = 8, + TYPE_INET_PROTOCOL = 12, + TYPE_INET_SERVICE = 13, +}; + +static uint32_t concat_types2(enum nft_key_types a, enum nft_key_types b) { + uint32_t type = (uint32_t)a; + + type <<= TYPE_BITS; + type |= (uint32_t)b; + + return type; +} + +/* enough space to hold netlink messages for table skeleton */ +#define NFT_INIT_MSGS 16 +static int fw_nftables_init_family(sd_netlink *nfnl, int family) { + sd_netlink_message *batch[NFT_INIT_MSGS] = {}; + size_t ip_type_size = sizeof(uint32_t); + int ip_type = TYPE_IPADDR, r; + size_t msgcnt = 0, i; + uint32_t set_id = 0; + + r = sd_nfnl_message_batch_begin(nfnl, &batch[msgcnt]); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + /* Set F_EXCL so table add fails if the table already exists. */ + r = sd_nfnl_nft_message_new_table(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME, NLM_F_EXCL | NLM_F_ACK); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + + r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME, + "prerouting", "nat", + NF_INET_PRE_ROUTING, NF_IP_PRI_NAT_DST + 1); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME, + "output", "nat", + NF_INET_LOCAL_OUT, NF_IP_PRI_NAT_DST + 1); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME, + "postrouting", "nat", + NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 1); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + /* set to store ip address ranges we should masquerade for */ + r = nft_new_set(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_MASQ_SET_NAME, ++set_id, NFT_SET_INTERVAL, ip_type, ip_type_size); + if (r < 0) + goto out_unref; + + /* + * map to store ip address:port pair to dnat to. elements in concatenation + * are rounded up to 4 bytes. + * + * Example: ip protocol . tcp daddr is sizeof(uint32_t) + sizeof(uint32_t), not + * sizeof(uint8_t) + sizeof(uint16_t). + */ + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = nft_new_map(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_DNAT_MAP_NAME, ++set_id, 0, + concat_types2(TYPE_INET_PROTOCOL, TYPE_INET_SERVICE), sizeof(uint32_t) * 2, + concat_types2(ip_type, TYPE_INET_SERVICE), ip_type_size + sizeof(uint32_t)); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = sd_nfnl_message_new_dnat_rule_pre(nfnl, &batch[msgcnt], family, "prerouting"); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = sd_nfnl_message_new_dnat_rule_out(nfnl, &batch[msgcnt], family, "output"); + if (r < 0) + goto out_unref; + + msgcnt++; + r = sd_nfnl_message_new_masq_rule(nfnl, &batch[msgcnt], family, "postrouting"); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt < NFT_INIT_MSGS); + r = sd_nfnl_message_batch_end(nfnl, &batch[msgcnt]); + if (r < 0) + goto out_unref; + + msgcnt++; + assert(msgcnt <= NFT_INIT_MSGS); + r = nfnl_netlink_sendv(nfnl, batch, msgcnt); + if (r == -EEXIST) + r = 0; + +out_unref: + for (i = 0; i < msgcnt; i++) + sd_netlink_message_unref(batch[i]); + + return r; +} + +int fw_nftables_init(FirewallContext *ctx) { + _cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL; + int r; + + r = sd_nfnl_socket_open(&nfnl); + if (r < 0) + return r; + + r = fw_nftables_init_family(nfnl, AF_INET); + if (r < 0) + return r; + + ctx->nfnl = TAKE_PTR(nfnl); + return 0; +} + +void fw_nftables_exit(FirewallContext *ctx) { + ctx->nfnl = sd_netlink_unref(ctx->nfnl); +} + +static int nft_message_add_setelem_iprange(sd_netlink_message *m, + const union in_addr_union *source, + unsigned int prefixlen) { + uint32_t mask, start, end; + unsigned int nplen; + int r; + + assert(prefixlen <= 32); + nplen = 32 - prefixlen; + + mask = (1U << nplen) - 1U; + mask = htobe32(~mask); + start = source->in.s_addr & mask; + + r = sd_nfnl_nft_message_add_setelem(m, 0, &start, sizeof(start), NULL, 0); + if (r < 0) + return r; + + r = sd_nfnl_nft_message_add_setelem_end(m); + if (r < 0) + return r; + + end = be32toh(start) + (1U << nplen); + if (end < be32toh(start)) + end = 0U; + end = htobe32(end); + + r = sd_nfnl_nft_message_add_setelem(m, 1, &end, sizeof(end), NULL, 0); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_ELEM_FLAGS, htobe32(NFT_SET_ELEM_INTERVAL_END)); + if (r < 0) + return r; + + r = sd_nfnl_nft_message_add_setelem_end(m); + if (r < 0) + return r; + + return 0; +} + +#define NFT_MASQ_MSGS 3 + +int fw_nftables_add_masquerade( + FirewallContext *ctx, + bool add, + int af, + const union in_addr_union *source, + unsigned int source_prefixlen) { + sd_netlink_message *transaction[NFT_MASQ_MSGS] = {}; + size_t tsize; + int r; + + if (!source || source_prefixlen == 0) + return -EINVAL; + + r = sd_nfnl_message_batch_begin(ctx->nfnl, &transaction[0]); + if (r < 0) + return r; + tsize = 1; + if (add) + r = sd_nfnl_nft_message_new_setelems_begin(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_TABLE_NAME, NFT_SYSTEMD_MASQ_SET_NAME); + else + r = sd_nfnl_nft_message_del_setelems_begin(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_TABLE_NAME, NFT_SYSTEMD_MASQ_SET_NAME); + + if (r < 0) + goto out_unref; + + r = nft_message_add_setelem_iprange(transaction[tsize], source, source_prefixlen); + if (r < 0) + goto out_unref; + + ++tsize; + assert(tsize < NFT_MASQ_MSGS); + r = sd_nfnl_message_batch_end(ctx->nfnl, &transaction[tsize]); + if (r < 0) + return r; + ++tsize; + r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize); + +out_unref: + while (tsize > 0) + sd_netlink_message_unref(transaction[--tsize]); + return r < 0 ? r : 0; +} + +#define NFT_DNAT_MSGS 4 + +int fw_nftables_add_local_dnat( + FirewallContext *ctx, + bool add, + int af, + int protocol, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + uint32_t data[2], key[2]; + sd_netlink_message *transaction[NFT_DNAT_MSGS] = {}; + size_t tsize; + int r; + + assert(add || !previous_remote); + + if (af != AF_INET) + return -EAFNOSUPPORT; + + if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP)) + return -EPROTONOSUPPORT; + + if (local_port <= 0) + return -EINVAL; + + key[0] = protocol; + key[1] = htobe16(local_port); + + if (!remote) + return -EOPNOTSUPP; + + if (remote_port <= 0) + return -EINVAL; + + data[1] = htobe16(remote_port); + + r = sd_nfnl_message_batch_begin(ctx->nfnl, &transaction[0]); + if (r < 0) + return r; + + tsize = 1; + /* If a previous remote is set, remove its entry */ + if (add && previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + data[0] = previous_remote->in.s_addr; + + r = nft_del_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data)); + if (r < 0) + goto out_unref; + + tsize++; + } + + data[0] = remote->in.s_addr; + + assert(tsize < NFT_DNAT_MSGS); + if (add) + nft_add_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data)); + else + nft_del_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data)); + + tsize++; + assert(tsize < NFT_DNAT_MSGS); + + r = sd_nfnl_message_batch_end(ctx->nfnl, &transaction[tsize]); + if (r < 0) + goto out_unref; + + tsize++; + assert(tsize <= NFT_DNAT_MSGS); + r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize); + +out_unref: + while (tsize > 0) + sd_netlink_message_unref(transaction[--tsize]); + return r < 0 ? r : 0; +} diff --git a/src/shared/firewall-util-private.h b/src/shared/firewall-util-private.h index 7f9efbc513..f034af180e 100644 --- a/src/shared/firewall-util-private.h +++ b/src/shared/firewall-util-private.h @@ -5,18 +5,41 @@ #include #include "in-addr-util.h" +#include "sd-netlink.h" enum FirewallBackend { FW_BACKEND_NONE, #if HAVE_LIBIPTC FW_BACKEND_IPTABLES, #endif + FW_BACKEND_NFTABLES, }; struct FirewallContext { enum FirewallBackend firewall_backend; + sd_netlink *nfnl; }; +int fw_nftables_init(FirewallContext *ctx); +void fw_nftables_exit(FirewallContext *ctx); + +int fw_nftables_add_masquerade( + FirewallContext *ctx, + bool add, + int af, + const union in_addr_union *source, + unsigned source_prefixlen); + +int fw_nftables_add_local_dnat( + FirewallContext *ctx, + bool add, + int af, + int protocol, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote); + #if HAVE_LIBIPTC int fw_iptables_add_masquerade( diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index edfe5787b1..3bed941127 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -8,7 +8,9 @@ #include "firewall-util.h" #include "firewall-util-private.h" -static enum FirewallBackend firewall_backend_probe(void) { +static enum FirewallBackend firewall_backend_probe(FirewallContext *ctx) { + if (fw_nftables_init(ctx) == 0) + return FW_BACKEND_NFTABLES; #if HAVE_LIBIPTC return FW_BACKEND_IPTABLES; #else @@ -23,11 +25,24 @@ int fw_ctx_new(FirewallContext **ret) { if (!ctx) return -ENOMEM; - *ret = TAKE_PTR(ctx); - return 0; + /* could probe here. However, this means that we will load + * iptable_nat or nf_tables, both will enable connection tracking. + * + * Alternative would be to probe here but only call + * fw_ctx_new when nspawn/networkd know they will call + * fw_add_masquerade/local_dnat later anyway. + */ + *ret = TAKE_PTR(ctx); + return 0; } FirewallContext *fw_ctx_free(FirewallContext *ctx) { + if (!ctx) + return NULL; + + if (ctx->firewall_backend == FW_BACKEND_NFTABLES) + fw_nftables_exit(ctx); + return mfree(ctx); } @@ -48,7 +63,7 @@ int fw_add_masquerade( ctx = *fw_ctx; if (ctx->firewall_backend == FW_BACKEND_NONE) - ctx->firewall_backend = firewall_backend_probe(); + ctx->firewall_backend = firewall_backend_probe(ctx); switch (ctx->firewall_backend) { case FW_BACKEND_NONE: @@ -57,6 +72,8 @@ int fw_add_masquerade( case FW_BACKEND_IPTABLES: return fw_iptables_add_masquerade(add, af, source, source_prefixlen); #endif + case FW_BACKEND_NFTABLES: + return fw_nftables_add_masquerade(ctx, add, af, source, source_prefixlen); } return -EOPNOTSUPP; @@ -81,11 +98,13 @@ int fw_add_local_dnat( ctx = *fw_ctx; if (ctx->firewall_backend == FW_BACKEND_NONE) - ctx->firewall_backend = firewall_backend_probe(); + ctx->firewall_backend = firewall_backend_probe(ctx); switch (ctx->firewall_backend) { case FW_BACKEND_NONE: return -EOPNOTSUPP; + case FW_BACKEND_NFTABLES: + return fw_nftables_add_local_dnat(ctx, add, af, protocol, local_port, remote, remote_port, previous_remote); #if HAVE_LIBIPTC case FW_BACKEND_IPTABLES: return fw_iptables_add_local_dnat(add, af, protocol, local_port, remote, remote_port, previous_remote); diff --git a/src/shared/meson.build b/src/shared/meson.build index de916e0a4c..18a22a6389 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -102,6 +102,7 @@ shared_sources = files(''' fileio-label.c fileio-label.h firewall-util.c + firewall-util-nft.c firewall-util.h firewall-util-private.h format-table.c diff --git a/src/test/meson.build b/src/test/meson.build index 3afe5d58cb..e016f40ab1 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -568,8 +568,7 @@ tests += [ [['src/test/test-firewall-util.c'], [libshared], - [], - 'HAVE_LIBIPTC'], + []], [['src/test/test-netlink-manual.c'], [], From bc5a9b82d5e8649dcd753cd5f2a90eeb07526563 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Nov 2020 11:15:31 +0100 Subject: [PATCH 11/11] firewall-util-nft: attempt table recreation when add operation fails When someone runs 'nft flush ruleset' in the same net namespace this will also tear down the systemd nat table. Unlike iptables -t nat -F, which will remove all rules added by the systemd iptables backend, iptables has builtin chains that cannot be deleted. IOW, the next add operation will 'just work'. In the nftables case however, the entire table gets removed. When the systemd nat table is removed by an external entity next attempt to add a set element will yield -ENOENT. If this happens, recreate the table, and, if successful, re-do the add operation. Note that this doesn't protect against external sabotage such as a running 'while true; nft flush ruleset;done'. However, there is nothing that could be done short of extending the kernel to allow tables to be "frozen" or otherwise tied to a process such as systemd-networkd. --- src/shared/firewall-util-nft.c | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/shared/firewall-util-nft.c b/src/shared/firewall-util-nft.c index 6c72956e04..69bc233164 100644 --- a/src/shared/firewall-util-nft.c +++ b/src/shared/firewall-util-nft.c @@ -778,6 +778,40 @@ static int nft_message_add_setelem_iprange(sd_netlink_message *m, return 0; } +/* When someone runs 'nft flush ruleset' in the same net namespace + * this will also tear down the systemd nat table. + * + * Unlike iptables -t nat -F (which will remove all rules added by the + * systemd iptables backend, iptables has builtin chains that cannot be + * deleted -- the next add operation will 'just work'. + * + * In the nftables case, everything gets removed. The next add operation + * will yield -ENOENT. + * + * If we see -ENOENT on add, replay the inital table setup. + * If that works, re-do the add operation. + * + * Note that this doesn't protect against external sabotage such as a + * 'while true; nft flush ruleset;done'. There is nothing that could be + * done about that short of extending the kernel to allow tables to be + * owned by stystemd-networkd and making them non-deleteable except by + * the 'owning process'. + */ +static int fw_nftables_recreate_table(sd_netlink *nfnl, int af, sd_netlink_message **old, size_t size) { + int r = fw_nftables_init_family(nfnl, af); + + if (r != 0) + return r; + + while (size > 0) { + size_t i = --size; + + old[i] = sd_netlink_message_unref(old[i]); + } + + return 0; +} + #define NFT_MASQ_MSGS 3 int fw_nftables_add_masquerade( @@ -787,12 +821,14 @@ int fw_nftables_add_masquerade( const union in_addr_union *source, unsigned int source_prefixlen) { sd_netlink_message *transaction[NFT_MASQ_MSGS] = {}; + bool retry = true; size_t tsize; int r; if (!source || source_prefixlen == 0) return -EINVAL; +again: r = sd_nfnl_message_batch_begin(ctx->nfnl, &transaction[0]); if (r < 0) return r; @@ -817,6 +853,14 @@ int fw_nftables_add_masquerade( ++tsize; r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize); + if (retry && r == -ENOENT) { + int tmp = fw_nftables_recreate_table(ctx->nfnl, af, transaction, tsize); + if (tmp == 0) { + retry = false; + goto again; + } + } + out_unref: while (tsize > 0) sd_netlink_message_unref(transaction[--tsize]); @@ -836,6 +880,7 @@ int fw_nftables_add_local_dnat( const union in_addr_union *previous_remote) { uint32_t data[2], key[2]; sd_netlink_message *transaction[NFT_DNAT_MSGS] = {}; + bool retry = true; size_t tsize; int r; @@ -850,6 +895,7 @@ int fw_nftables_add_local_dnat( if (local_port <= 0) return -EINVAL; +again: key[0] = protocol; key[1] = htobe16(local_port); @@ -896,6 +942,15 @@ int fw_nftables_add_local_dnat( assert(tsize <= NFT_DNAT_MSGS); r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize); + if (retry && r == -ENOENT) { + int tmp = fw_nftables_recreate_table(ctx->nfnl, af, transaction, tsize); + + if (tmp == 0) { + retry = false; + goto again; + } + } + out_unref: while (tsize > 0) sd_netlink_message_unref(transaction[--tsize]);