Merge pull request #17026 from fw-strlen/nft_16

add networkd/nspawn nftables backend
This commit is contained in:
Lennart Poettering 2020-12-16 19:18:22 +01:00 committed by GitHub
commit a8af734e75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 4173 additions and 377 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _NFNETLINK_H
#define _NFNETLINK_H
#include <linux/types.h>
#include <linux/netfilter/nfnetlink_compat.h>
enum nfnetlink_groups {
NFNLGRP_NONE,
#define NFNLGRP_NONE NFNLGRP_NONE
NFNLGRP_CONNTRACK_NEW,
#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW
NFNLGRP_CONNTRACK_UPDATE,
#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE
NFNLGRP_CONNTRACK_DESTROY,
#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY
NFNLGRP_CONNTRACK_EXP_NEW,
#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW
NFNLGRP_CONNTRACK_EXP_UPDATE,
#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE
NFNLGRP_CONNTRACK_EXP_DESTROY,
#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY
NFNLGRP_NFTABLES,
#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES
NFNLGRP_ACCT_QUOTA,
#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA
NFNLGRP_NFTRACE,
#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
/* General form of address family dependent message.
*/
struct nfgenmsg {
__u8 nfgen_family; /* AF_xxx */
__u8 version; /* nfnetlink version */
__be16 res_id; /* resource id */
};
#define NFNETLINK_V0 0
/* netfilter netlink message types are split in two pieces:
* 8 bit subsystem, 8bit operation.
*/
#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8)
#define NFNL_MSG_TYPE(x) (x & 0x00ff)
/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS()
* won't work anymore */
#define NFNL_SUBSYS_NONE 0
#define NFNL_SUBSYS_CTNETLINK 1
#define NFNL_SUBSYS_CTNETLINK_EXP 2
#define NFNL_SUBSYS_QUEUE 3
#define NFNL_SUBSYS_ULOG 4
#define NFNL_SUBSYS_OSF 5
#define NFNL_SUBSYS_IPSET 6
#define NFNL_SUBSYS_ACCT 7
#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8
#define NFNL_SUBSYS_CTHELPER 9
#define NFNL_SUBSYS_NFTABLES 10
#define NFNL_SUBSYS_NFT_COMPAT 11
#define NFNL_SUBSYS_COUNT 12
/* Reserved control nfnetlink messages */
#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE
#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1
/**
* enum nfnl_batch_attributes - nfnetlink batch netlink attributes
*
* @NFNL_BATCH_GENID: generation ID for this changeset (NLA_U32)
*/
enum nfnl_batch_attributes {
NFNL_BATCH_UNSPEC,
NFNL_BATCH_GENID,
__NFNL_BATCH_MAX
};
#define NFNL_BATCH_MAX (__NFNL_BATCH_MAX - 1)
#endif /* _NFNETLINK_H */

View File

@ -81,6 +81,7 @@ libsystemd_sources = files('''
sd-netlink/netlink-types.h
sd-netlink/netlink-util.c
sd-netlink/netlink-util.h
sd-netlink/nfnl-message.c
sd-netlink/rtnl-message.c
sd-netlink/sd-netlink.c
sd-network/network-util.c

View File

@ -139,6 +139,7 @@ int socket_bind(sd_netlink *nl);
int socket_broadcast_group_ref(sd_netlink *nl, unsigned group);
int socket_broadcast_group_unref(sd_netlink *nl, unsigned group);
int socket_write_message(sd_netlink *nl, sd_netlink_message *m);
int socket_writev_message(sd_netlink *nl, sd_netlink_message *m[], size_t msgcount);
int socket_read_message(sd_netlink *nl);
int rtnl_rqueue_make_room(sd_netlink *rtnl);

View File

@ -238,6 +238,31 @@ int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
return k;
}
int socket_writev_message(sd_netlink *nl, sd_netlink_message *m[], size_t msgcount) {
_cleanup_free_ struct iovec *iovs = NULL;
ssize_t k;
size_t i;
assert(nl);
assert(msgcount);
iovs = new0(struct iovec, msgcount);
if (!iovs)
return -ENOMEM;
for (i = 0; i < msgcount; i++) {
assert(m[i]->hdr != NULL);
assert(m[i]->hdr->nlmsg_len > 0);
iovs[i] = IOVEC_MAKE(m[i]->hdr, m[i]->hdr->nlmsg_len);
}
k = writev(nl->fd, iovs, msgcount);
if (k < 0)
return -errno;
return k;
}
static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_group, bool peek) {
union sockaddr_union sender;
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control;

View File

@ -19,6 +19,8 @@
#include <linux/if_macsec.h>
#include <linux/if_tunnel.h>
#include <linux/l2tp.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/nexthop.h>
#include <linux/nl80211.h>
#include <linux/pkt_sched.h>
@ -1312,6 +1314,243 @@ static const NLType genl_families[] = {
[SD_GENL_NL80211] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_cmds_type_system },
};
static const NLType nfnl_nft_table_types[] = {
[NFTA_TABLE_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLTypeSystem nfnl_nft_table_type_system = {
.count = ELEMENTSOF(nfnl_nft_table_types),
.types = nfnl_nft_table_types,
};
static const NLType nfnl_nft_chain_hook_types[] = {
[NFTA_HOOK_HOOKNUM] = { .type = NETLINK_TYPE_U32 },
[NFTA_HOOK_PRIORITY] = { .type = NETLINK_TYPE_U32 },
[NFTA_HOOK_DEV] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 },
};
static const NLTypeSystem nfnl_nft_chain_hook_type_system = {
.count = ELEMENTSOF(nfnl_nft_chain_hook_types),
.types = nfnl_nft_chain_hook_types,
};
static const NLType nfnl_nft_chain_types[] = {
[NFTA_CHAIN_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_CHAIN_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_chain_hook_type_system },
[NFTA_CHAIN_TYPE] = { .type = NETLINK_TYPE_STRING, .size = 16 },
[NFTA_CHAIN_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLTypeSystem nfnl_nft_chain_type_system = {
.count = ELEMENTSOF(nfnl_nft_chain_types),
.types = nfnl_nft_chain_types,
};
static const NLType nfnl_nft_expr_meta_types[] = {
[NFTA_META_DREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_META_KEY] = { .type = NETLINK_TYPE_U32 },
[NFTA_META_SREG] = { .type = NETLINK_TYPE_U32 },
};
static const NLType nfnl_nft_expr_payload_types[] = {
[NFTA_PAYLOAD_DREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_PAYLOAD_BASE] = { .type = NETLINK_TYPE_U32 },
[NFTA_PAYLOAD_OFFSET] = { .type = NETLINK_TYPE_U32 },
[NFTA_PAYLOAD_LEN] = { .type = NETLINK_TYPE_U32 },
};
static const NLType nfnl_nft_expr_nat_types[] = {
[NFTA_NAT_TYPE] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_FAMILY] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_REG_ADDR_MIN] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_REG_ADDR_MAX] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NETLINK_TYPE_U32 },
[NFTA_NAT_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLType nfnl_nft_data_types[] = {
[NFTA_DATA_VALUE] = { .type = NETLINK_TYPE_BINARY },
};
static const NLTypeSystem nfnl_nft_data_type_system = {
.count = ELEMENTSOF(nfnl_nft_data_types),
.types = nfnl_nft_data_types,
};
static const NLType nfnl_nft_expr_bitwise_types[] = {
[NFTA_BITWISE_SREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_BITWISE_DREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_BITWISE_LEN] = { .type = NETLINK_TYPE_U32 },
[NFTA_BITWISE_MASK] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system },
[NFTA_BITWISE_XOR] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system },
};
static const NLType nfnl_nft_expr_cmp_types[] = {
[NFTA_CMP_SREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_CMP_OP] = { .type = NETLINK_TYPE_U32 },
[NFTA_CMP_DATA] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system },
};
static const NLType nfnl_nft_expr_fib_types[] = {
[NFTA_FIB_DREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_FIB_RESULT] = { .type = NETLINK_TYPE_U32 },
[NFTA_FIB_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLType nfnl_nft_expr_lookup_types[] = {
[NFTA_LOOKUP_SET] = { .type = NETLINK_TYPE_STRING },
[NFTA_LOOKUP_SREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_LOOKUP_DREG] = { .type = NETLINK_TYPE_U32 },
[NFTA_LOOKUP_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLType nfnl_nft_expr_masq_types[] = {
[NFTA_MASQ_FLAGS] = { .type = NETLINK_TYPE_U32 },
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NETLINK_TYPE_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NETLINK_TYPE_U32 },
};
static const NLTypeSystem nfnl_expr_data_type_systems[] = {
[NL_UNION_NFT_EXPR_DATA_BITWISE] = { .count = ELEMENTSOF(nfnl_nft_expr_bitwise_types),
.types = nfnl_nft_expr_bitwise_types },
[NL_UNION_NFT_EXPR_DATA_CMP] = { .count = ELEMENTSOF(nfnl_nft_expr_cmp_types),
.types = nfnl_nft_expr_cmp_types },
[NL_UNION_NFT_EXPR_DATA_FIB] = { .count = ELEMENTSOF(nfnl_nft_expr_fib_types),
.types = nfnl_nft_expr_fib_types },
[NL_UNION_NFT_EXPR_DATA_LOOKUP] = { .count = ELEMENTSOF(nfnl_nft_expr_lookup_types),
.types = nfnl_nft_expr_lookup_types },
[NL_UNION_NFT_EXPR_DATA_MASQ] = { .count = ELEMENTSOF(nfnl_nft_expr_masq_types),
.types = nfnl_nft_expr_masq_types },
[NL_UNION_NFT_EXPR_DATA_META] = { .count = ELEMENTSOF(nfnl_nft_expr_meta_types),
.types = nfnl_nft_expr_meta_types },
[NL_UNION_NFT_EXPR_DATA_NAT] = { .count = ELEMENTSOF(nfnl_nft_expr_nat_types),
.types = nfnl_nft_expr_nat_types },
[NL_UNION_NFT_EXPR_DATA_PAYLOAD] = { .count = ELEMENTSOF(nfnl_nft_expr_payload_types),
.types = nfnl_nft_expr_payload_types },
};
static const char* const nl_union_nft_expr_data_table[] = {
[NL_UNION_NFT_EXPR_DATA_BITWISE] = "bitwise",
[NL_UNION_NFT_EXPR_DATA_CMP] = "cmp",
[NL_UNION_NFT_EXPR_DATA_LOOKUP] = "lookup",
[NL_UNION_NFT_EXPR_DATA_META] = "meta",
[NL_UNION_NFT_EXPR_DATA_FIB] = "fib",
[NL_UNION_NFT_EXPR_DATA_MASQ] = "masq",
[NL_UNION_NFT_EXPR_DATA_NAT] = "nat",
[NL_UNION_NFT_EXPR_DATA_PAYLOAD] = "payload",
};
DEFINE_STRING_TABLE_LOOKUP(nl_union_nft_expr_data, NLUnionNFTExprData);
static const NLTypeSystemUnion nfnl_nft_data_expr_type_system_union = {
.num = _NL_UNION_NFT_EXPR_DATA_MAX,
.lookup = nl_union_nft_expr_data_from_string,
.type_systems = nfnl_expr_data_type_systems,
.match_type = NL_MATCH_SIBLING,
.match = NFTA_EXPR_NAME,
};
static const NLType nfnl_nft_rule_expr_types[] = {
[NFTA_EXPR_NAME] = { .type = NETLINK_TYPE_STRING, .size = 16 },
[NFTA_EXPR_DATA] = { .type = NETLINK_TYPE_UNION,
.type_system_union = &nfnl_nft_data_expr_type_system_union },
};
static const NLTypeSystem nfnl_nft_rule_expr_type_system = {
.count = ELEMENTSOF(nfnl_nft_rule_expr_types),
.types = nfnl_nft_rule_expr_types,
};
static const NLType nfnl_nft_rule_types[] = {
[NFTA_RULE_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_RULE_CHAIN] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_RULE_EXPRESSIONS] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_rule_expr_type_system }
};
static const NLTypeSystem nfnl_nft_rule_type_system = {
.count = ELEMENTSOF(nfnl_nft_rule_types),
.types = nfnl_nft_rule_types,
};
static const NLType nfnl_nft_set_types[] = {
[NFTA_SET_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_NAME] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_FLAGS] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_KEY_TYPE] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_KEY_LEN] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_DATA_TYPE] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_DATA_LEN] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_POLICY] = { .type = NETLINK_TYPE_U32 },
[NFTA_SET_ID] = { .type = NETLINK_TYPE_U32 },
};
static const NLTypeSystem nfnl_nft_set_type_system = {
.count = ELEMENTSOF(nfnl_nft_set_types),
.types = nfnl_nft_set_types,
};
static const NLType nfnl_nft_setelem_types[] = {
[NFTA_SET_ELEM_KEY] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system },
[NFTA_SET_ELEM_DATA] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_data_type_system },
[NFTA_SET_ELEM_FLAGS] = { .type = NETLINK_TYPE_U32 },
};
static const NLTypeSystem nfnl_nft_setelem_type_system = {
.count = ELEMENTSOF(nfnl_nft_setelem_types),
.types = nfnl_nft_setelem_types,
};
static const NLType nfnl_nft_setelem_list_types[] = {
[NFTA_SET_ELEM_LIST_TABLE] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NETLINK_TYPE_STRING, .size = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_type_system },
};
static const NLTypeSystem nfnl_nft_setelem_list_type_system = {
.count = ELEMENTSOF(nfnl_nft_setelem_list_types),
.types = nfnl_nft_setelem_list_types,
};
static const NLType nfnl_nft_msg_types [] = {
[NFT_MSG_DELTABLE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_table_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_NEWTABLE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_table_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_NEWCHAIN] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_chain_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_NEWRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_rule_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_NEWSET] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_set_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_NEWSETELEM] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_list_type_system, .size = sizeof(struct nfgenmsg) },
[NFT_MSG_DELSETELEM] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_setelem_list_type_system, .size = sizeof(struct nfgenmsg) },
};
static const NLTypeSystem nfnl_nft_msg_type_system = {
.count = ELEMENTSOF(nfnl_nft_msg_types),
.types = nfnl_nft_msg_types,
};
static const NLType nfnl_msg_batch_types [] = {
[NFNL_BATCH_GENID] = { .type = NETLINK_TYPE_U32 }
};
static const NLTypeSystem nfnl_msg_batch_type_system = {
.count = ELEMENTSOF(nfnl_msg_batch_types),
.types = nfnl_msg_batch_types,
};
static const NLType nfnl_types[] = {
[NLMSG_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 },
[NLMSG_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &error_type_system, .size = sizeof(struct nlmsgerr) },
[NFNL_MSG_BATCH_BEGIN] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_msg_batch_type_system, .size = sizeof(struct nfgenmsg) },
[NFNL_MSG_BATCH_END] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_msg_batch_type_system, .size = sizeof(struct nfgenmsg) },
[NFNL_SUBSYS_NFTABLES] = { .type = NETLINK_TYPE_NESTED, .type_system = &nfnl_nft_msg_type_system, .size = sizeof(struct nfgenmsg) },
};
const NLTypeSystem nfnl_type_system_root = {
.count = ELEMENTSOF(nfnl_types),
.types = nfnl_types,
};
/* Mainly used when sending message */
const NLTypeSystem genl_family_type_system_root = {
.count = ELEMENTSOF(genl_families),
@ -1368,6 +1607,8 @@ const NLTypeSystem *type_system_get_root(int protocol) {
switch (protocol) {
case NETLINK_GENERIC:
return &genl_type_system_root;
case NETLINK_NETFILTER:
return &nfnl_type_system_root;
default: /* NETLINK_ROUTE: */
return &rtnl_type_system_root;
}
@ -1378,9 +1619,12 @@ int type_system_root_get_type(sd_netlink *nl, const NLType **ret, uint16_t type)
const NLType *nl_type;
int r;
if (!nl || nl->protocol != NETLINK_GENERIC)
if (!nl)
return type_system_get_type(&rtnl_type_system_root, ret, type);
if (nl->protocol != NETLINK_GENERIC)
return type_system_get_type(type_system_get_root(nl->protocol), ret, type);
r = nlmsg_type_to_genl_family(nl, type, &family);
if (r < 0)
return r;

View File

@ -21,6 +21,7 @@ enum {
NETLINK_TYPE_NESTED, /* NLA_NESTED */
NETLINK_TYPE_UNION,
NETLINK_TYPE_SOCKADDR,
NETLINK_TYPE_BINARY,
};
typedef enum NLMatchType {
@ -117,3 +118,19 @@ typedef enum NLUnionTCAOptionData {
const char *nl_union_tca_option_data_to_string(NLUnionTCAOptionData p) _const_;
NLUnionTCAOptionData nl_union_tca_option_data_from_string(const char *p) _pure_;
typedef enum NLUnionNFTExprData {
NL_UNION_NFT_EXPR_DATA_BITWISE,
NL_UNION_NFT_EXPR_DATA_CMP,
NL_UNION_NFT_EXPR_DATA_FIB,
NL_UNION_NFT_EXPR_DATA_LOOKUP,
NL_UNION_NFT_EXPR_DATA_PAYLOAD,
NL_UNION_NFT_EXPR_DATA_MASQ,
NL_UNION_NFT_EXPR_DATA_META,
NL_UNION_NFT_EXPR_DATA_NAT,
_NL_UNION_NFT_EXPR_DATA_MAX,
_NL_UNION_NFT_EXPR_DATA_INVALID = -1,
} NLUnionNFTExprData;
const char *nl_union_nft_expr_data_to_string(NLUnionNFTExprData p) _const_;
NLUnionNFTExprData nl_union_nft_expr_data_from_string(const char *p) _pure_;

View File

@ -0,0 +1,318 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <netinet/in.h>
#include <linux/if_addrlabel.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/nexthop.h>
#include <stdbool.h>
#include <unistd.h>
#include "sd-netlink.h"
#include "format-util.h"
#include "netlink-internal.h"
#include "netlink-types.h"
#include "netlink-util.h"
#include "socket-util.h"
#include "util.h"
static int nft_message_new(sd_netlink *nfnl, sd_netlink_message **ret, int family, uint16_t type, uint16_t flags) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
struct nfgenmsg *nfh;
const NLType *nl_type;
size_t size;
int r;
assert_return(nfnl, -EINVAL);
r = type_system_root_get_type(nfnl, &nl_type, NFNL_SUBSYS_NFTABLES);
if (r < 0)
return r;
if (type_get_type(nl_type) != NETLINK_TYPE_NESTED)
return -EINVAL;
r = message_new_empty(nfnl, &m);
if (r < 0)
return r;
size = NLMSG_SPACE(type_get_size(nl_type));
assert(size >= sizeof(struct nlmsghdr));
m->hdr = malloc0(size);
if (!m->hdr)
return -ENOMEM;
m->hdr->nlmsg_flags = NLM_F_REQUEST | flags;
type_get_type_system(nl_type, &m->containers[0].type_system);
r = type_system_get_type_system(m->containers[0].type_system,
&m->containers[0].type_system,
type);
if (r < 0)
return r;
m->hdr->nlmsg_len = size;
m->hdr->nlmsg_type = NFNL_SUBSYS_NFTABLES << 8 | type;
nfh = NLMSG_DATA(m->hdr);
nfh->nfgen_family = family;
nfh->version = NFNETLINK_V0;
nfh->res_id = nfnl->serial;
*ret = TAKE_PTR(m);
return 0;
}
static int sd_nfnl_message_batch(sd_netlink *nfnl, sd_netlink_message **ret, int v) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
struct nfgenmsg *nfh;
int r;
r = message_new(nfnl, &m, v);
if (r < 0)
return r;
nfh = NLMSG_DATA(m->hdr);
nfh->nfgen_family = AF_UNSPEC;
nfh->version = NFNETLINK_V0;
nfh->res_id = NFNL_SUBSYS_NFTABLES;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_message_batch_begin(sd_netlink *nfnl, sd_netlink_message **ret) {
return sd_nfnl_message_batch(nfnl, ret, NFNL_MSG_BATCH_BEGIN);
}
int sd_nfnl_message_batch_end(sd_netlink *nfnl, sd_netlink_message **ret) {
return sd_nfnl_message_batch(nfnl, ret, NFNL_MSG_BATCH_END);
}
int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret,
int family,
const char *table, const char *chain,
const char *type,
uint8_t hook, int prio) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWCHAIN, NLM_F_CREATE | NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_CHAIN_TABLE, table);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_CHAIN_NAME, chain);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_CHAIN_TYPE, type);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_CHAIN_HOOK);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_HOOK_HOOKNUM, htobe32(hook));
if (r < 0)
goto cancel;
r = sd_netlink_message_append_u32(m, NFTA_HOOK_PRIORITY, htobe32(prio));
if (r < 0)
goto cancel;
r = sd_netlink_message_close_container(m);
if (r < 0)
goto cancel;
*ret = TAKE_PTR(m);
return 0;
cancel:
sd_netlink_message_cancel_array(m);
return r;
}
int sd_nfnl_nft_message_del_table(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_DELTABLE, NLM_F_CREATE | NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, uint16_t flags) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWTABLE, NLM_F_CREATE | flags);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *chain) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWRULE, NLM_F_CREATE | NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_RULE_TABLE, table);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_RULE_CHAIN, chain);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name,
uint32_t set_id, uint32_t klen) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWSET, NLM_F_CREATE | NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_TABLE, table);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_NAME, set_name);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_SET_ID, ++set_id);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_LEN, htobe32(klen));
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_nft_message_new_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_NEWSETELEM, NLM_F_CREATE | NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_SET_ELEM_LIST_ELEMENTS);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
int sd_nfnl_nft_message_del_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_message_new(nfnl, &m, family, NFT_MSG_DELSETELEM, NLM_F_ACK);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_SET_ELEM_LIST_ELEMENTS);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
static int sd_nfnl_add_data(sd_netlink_message *m, uint16_t attr, const void *data, uint32_t dlen) {
int r = sd_netlink_message_open_container(m, attr);
if (r < 0)
return r;
r = sd_netlink_message_append_data(m, NFTA_DATA_VALUE, data, dlen);
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* attr */
}
int sd_nfnl_nft_message_add_setelem(sd_netlink_message *m,
uint32_t num,
const void *key, uint32_t klen,
const void *data, uint32_t dlen) {
int r;
r = sd_netlink_message_open_array(m, num);
if (r < 0)
return r;
r = sd_nfnl_add_data(m, NFTA_SET_ELEM_KEY, key, klen);
if (r < 0)
goto cancel;
if (data) {
r = sd_nfnl_add_data(m, NFTA_SET_ELEM_DATA, data, dlen);
if (r < 0)
goto cancel;
}
return r;
cancel:
sd_netlink_message_cancel_array(m);
return r;
}
int sd_nfnl_nft_message_add_setelem_end(sd_netlink_message *m) {
return sd_netlink_message_close_container(m); /* NFTA_SET_ELEM_LIST_ELEMENTS */
}
int sd_nfnl_socket_open(sd_netlink **ret) {
return netlink_open_family(ret, NETLINK_NETFILTER);
}

View File

@ -226,6 +226,41 @@ int sd_netlink_send(sd_netlink *nl,
return 1;
}
int sd_netlink_sendv(sd_netlink *nl,
sd_netlink_message *messages[],
size_t msgcount,
uint32_t **ret_serial) {
_cleanup_free_ uint32_t *serials = NULL;
unsigned i;
int r;
assert_return(nl, -EINVAL);
assert_return(!rtnl_pid_changed(nl), -ECHILD);
assert_return(messages, -EINVAL);
if (ret_serial) {
serials = new0(uint32_t, msgcount);
if (!serials)
return -ENOMEM;
}
for (i = 0; i < msgcount; i++) {
assert_return(!messages[i]->sealed, -EPERM);
rtnl_seal_message(nl, messages[i]);
if (serials)
serials[i] = rtnl_message_get_serial(messages[i]);
}
r = socket_writev_message(nl, messages, msgcount);
if (r < 0)
return r;
if (ret_serial)
*ret_serial = TAKE_PTR(serials);
return r;
}
int rtnl_rqueue_make_room(sd_netlink *rtnl) {
assert(rtnl);
@ -586,21 +621,15 @@ int sd_netlink_call_async(
return k;
}
int sd_netlink_call(sd_netlink *rtnl,
sd_netlink_message *message,
uint64_t usec,
sd_netlink_message **ret) {
int sd_netlink_read(sd_netlink *rtnl,
uint32_t serial,
uint64_t usec,
sd_netlink_message **ret) {
usec_t timeout;
uint32_t serial;
int r;
assert_return(rtnl, -EINVAL);
assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
assert_return(message, -EINVAL);
r = sd_netlink_send(rtnl, message, &serial);
if (r < 0)
return r;
timeout = calc_elapse(usec);
@ -670,6 +699,24 @@ int sd_netlink_call(sd_netlink *rtnl,
}
}
int sd_netlink_call(sd_netlink *rtnl,
sd_netlink_message *message,
uint64_t usec,
sd_netlink_message **ret) {
uint32_t serial;
int r;
assert_return(rtnl, -EINVAL);
assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
assert_return(message, -EINVAL);
r = sd_netlink_send(rtnl, message, &serial);
if (r < 0)
return r;
return sd_netlink_read(rtnl, serial, usec, ret);
}
int sd_netlink_get_events(const sd_netlink *rtnl) {
assert_return(rtnl, -EINVAL);
assert_return(!rtnl_pid_changed(rtnl), -ECHILD);

View File

@ -282,7 +282,7 @@ static int address_set_masquerade(Address *address, bool add) {
if (r < 0)
return r;
r = fw_add_masquerade(add, AF_INET, &masked, address->prefixlen);
r = fw_add_masquerade(&address->link->manager->fw_ctx, add, AF_INET, &masked, address->prefixlen);
if (r < 0)
return r;

View File

@ -22,6 +22,7 @@
#include "dns-domain.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "local-addresses.h"
#include "netlink-util.h"
#include "network-internal.h"
@ -912,6 +913,8 @@ void manager_free(Manager *m) {
safe_close(m->ethtool_fd);
m->fw_ctx = fw_ctx_free(m->fw_ctx);
free(m);
}

View File

@ -9,6 +9,7 @@
#include "sd-resolve.h"
#include "dhcp-identifier.h"
#include "firewall-util.h"
#include "hashmap.h"
#include "networkd-link.h"
#include "networkd-network.h"
@ -74,6 +75,8 @@ struct Manager {
bool dhcp4_prefix_root_cannot_set_table:1;
bool bridge_mdb_on_master_not_supported:1;
FirewallContext *fw_ctx;
};
int manager_new(Manager **ret);

View File

@ -9,6 +9,7 @@
#include "capability-util.h"
#include "daemon-util.h"
#include "firewall-util.h"
#include "main-func.h"
#include "mkdir.h"
#include "networkd-conf.h"
@ -92,6 +93,10 @@ static int run(int argc, char *argv[]) {
if (r < 0)
return r;
r = fw_ctx_new(&m->fw_ctx);
if (r < 0)
log_warning_errno(r, "Could not initialize firewall, IPMasquerade= option not available: %m");
r = manager_start(m);
if (r < 0)
return log_error_errno(r, "Could not start manager: %m");

View File

@ -82,7 +82,7 @@ void expose_port_free_all(ExposePort *p) {
}
}
int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
int expose_port_flush(FirewallContext **fw_ctx, ExposePort* l, union in_addr_union *exposed) {
ExposePort *p;
int r, af = AF_INET;
@ -97,7 +97,8 @@ int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
log_debug("Lost IP address.");
LIST_FOREACH(ports, p, l) {
r = fw_add_local_dnat(false,
r = fw_add_local_dnat(fw_ctx,
false,
af,
p->protocol,
p->host_port,
@ -112,7 +113,7 @@ int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
return 0;
}
int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed) {
int expose_port_execute(sd_netlink *rtnl, FirewallContext **fw_ctx, ExposePort *l, union in_addr_union *exposed) {
_cleanup_free_ struct local_address *addresses = NULL;
union in_addr_union new_exposed;
ExposePort *p;
@ -136,7 +137,7 @@ int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *ex
addresses[0].scope < RT_SCOPE_LINK;
if (!add)
return expose_port_flush(l, exposed);
return expose_port_flush(fw_ctx, l, exposed);
new_exposed = addresses[0].address;
if (in_addr_equal(af, exposed, &new_exposed))
@ -150,7 +151,8 @@ int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *ex
LIST_FOREACH(ports, p, l) {
r = fw_add_local_dnat(true,
r = fw_add_local_dnat(fw_ctx,
true,
af,
p->protocol,
p->host_port,
@ -188,7 +190,7 @@ int expose_port_watch_rtnl(
sd_event *event,
int recv_fd,
sd_netlink_message_handler_t handler,
union in_addr_union *exposed,
void *userdata,
sd_netlink **ret) {
_cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
int fd, r;
@ -207,11 +209,11 @@ int expose_port_watch_rtnl(
return log_error_errno(r, "Failed to create rtnl object: %m");
}
r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, exposed, "nspawn-NEWADDR");
r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, userdata, "nspawn-NEWADDR");
if (r < 0)
return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m");
r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, exposed, "nspawn-DELADDR");
r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, userdata, "nspawn-DELADDR");
if (r < 0)
return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m");

View File

@ -3,6 +3,8 @@
#include <inttypes.h>
#include "firewall-util.h"
#include "sd-event.h"
#include "sd-netlink.h"
@ -19,8 +21,8 @@ typedef struct ExposePort {
void expose_port_free_all(ExposePort *p);
int expose_port_parse(ExposePort **l, const char *s);
int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret);
int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, void *userdata, sd_netlink **ret);
int expose_port_send_rtnl(int send_fd);
int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed);
int expose_port_flush(ExposePort* l, union in_addr_union *exposed);
int expose_port_execute(sd_netlink *rtnl, FirewallContext **fw_ctx, ExposePort *l, union in_addr_union *exposed);
int expose_port_flush(FirewallContext **fw_ctx, ExposePort* l, union in_addr_union *exposed);

View File

@ -1771,11 +1771,6 @@ static int verify_arguments(void) {
if (arg_expose_ports && !arg_private_network)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking.");
#if ! HAVE_LIBIPTC
if (arg_expose_ports)
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--port= is not supported, compiled without libiptc support.");
#endif
if (arg_caps_ambient) {
if (arg_caps_ambient == (uint64_t)-1)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "AmbientCapability= does not support the value all.");
@ -2475,14 +2470,19 @@ static int setup_kmsg(int kmsg_socket) {
return 0;
}
struct ExposeArgs {
union in_addr_union address;
struct FirewallContext *fw_ctx;
};
static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
union in_addr_union *exposed = userdata;
struct ExposeArgs *args = userdata;
assert(rtnl);
assert(m);
assert(exposed);
assert(args);
expose_port_execute(rtnl, arg_expose_ports, exposed);
expose_port_execute(rtnl, &args->fw_ctx, arg_expose_ports, &args->address);
return 0;
}
@ -4467,7 +4467,7 @@ static int run_container(
bool secondary,
FDSet *fds,
char veth_name[IFNAMSIZ], bool *veth_created,
union in_addr_union *exposed,
struct ExposeArgs *expose_args,
int *master, pid_t *pid, int *ret) {
static const struct sigaction sa = {
@ -4896,11 +4896,11 @@ static int run_container(
(void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
if (arg_expose_ports) {
r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl);
r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, expose_args, &rtnl);
if (r < 0)
return r;
(void) expose_port_execute(rtnl, arg_expose_ports, exposed);
(void) expose_port_execute(rtnl, &expose_args->fw_ctx, arg_expose_ports, &expose_args->address);
}
rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
@ -5027,7 +5027,7 @@ static int run_container(
return 0; /* finito */
}
expose_port_flush(arg_expose_ports, exposed);
expose_port_flush(&expose_args->fw_ctx, arg_expose_ports, &expose_args->address);
(void) remove_veth_links(veth_name, arg_network_veth_extra);
*veth_created = false;
@ -5156,12 +5156,13 @@ static int run(int argc, char *argv[]) {
_cleanup_fdset_free_ FDSet *fds = NULL;
int r, n_fd_passed, ret = EXIT_SUCCESS;
char veth_name[IFNAMSIZ] = "";
union in_addr_union exposed = {};
struct ExposeArgs expose_args = {};
_cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
char tmprootdir[] = "/tmp/nspawn-root-XXXXXX";
_cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
_cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_(fw_ctx_freep) FirewallContext *fw_ctx = NULL;
pid_t pid = 0;
log_parse_environment();
@ -5518,12 +5519,20 @@ static int run(int argc, char *argv[]) {
goto finish;
}
if (arg_expose_ports) {
r = fw_ctx_new(&fw_ctx);
if (r < 0) {
log_error_errno(r, "Cannot expose configured ports, firewall initialization failed: %m");
goto finish;
}
expose_args.fw_ctx = fw_ctx;
}
for (;;) {
r = run_container(dissected_image,
secondary,
fds,
veth_name, &veth_created,
&exposed, &master,
&expose_args, &master,
&pid, &ret);
if (r <= 0)
break;
@ -5573,7 +5582,7 @@ finish:
(void) rm_rf(p, REMOVE_ROOT);
}
expose_port_flush(arg_expose_ports, &exposed);
expose_port_flush(&fw_ctx, arg_expose_ports, &expose_args.address);
if (veth_created)
(void) remove_veth_links(veth_name, arg_network_veth_extra);

View File

@ -0,0 +1,350 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
/* Temporary work-around for broken glibc vs. linux kernel header definitions
* This is already fixed upstream, remove this when distributions have updated.
*/
#define _NET_IF_H 1
#include <arpa/inet.h>
#include <endian.h>
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <net/if.h>
#ifndef IFNAMSIZ
#define IFNAMSIZ 16
#endif
#include <linux/if.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter/nf_nat.h>
#include <linux/netfilter/xt_addrtype.h>
#include <libiptc/libiptc.h>
#include "alloc-util.h"
#include "firewall-util.h"
#include "firewall-util-private.h"
#include "in-addr-util.h"
#include "macro.h"
#include "socket-util.h"
DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
static int entry_fill_basics(
struct ipt_entry *entry,
int protocol,
const char *in_interface,
const union in_addr_union *source,
unsigned source_prefixlen,
const char *out_interface,
const union in_addr_union *destination,
unsigned destination_prefixlen) {
assert(entry);
if (out_interface && !ifname_valid(out_interface))
return -EINVAL;
if (in_interface && !ifname_valid(in_interface))
return -EINVAL;
entry->ip.proto = protocol;
if (in_interface) {
size_t l;
l = strlen(in_interface);
assert(l < sizeof entry->ip.iniface);
assert(l < sizeof entry->ip.iniface_mask);
strcpy(entry->ip.iniface, in_interface);
memset(entry->ip.iniface_mask, 0xFF, l + 1);
}
if (source) {
entry->ip.src = source->in;
in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
}
if (out_interface) {
size_t l = strlen(out_interface);
assert(l < sizeof entry->ip.outiface);
assert(l < sizeof entry->ip.outiface_mask);
strcpy(entry->ip.outiface, out_interface);
memset(entry->ip.outiface_mask, 0xFF, l + 1);
}
if (destination) {
entry->ip.dst = destination->in;
in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
}
return 0;
}
int fw_iptables_add_masquerade(
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen) {
static const xt_chainlabel chain = "POSTROUTING";
_cleanup_(iptc_freep) struct xtc_handle *h = NULL;
struct ipt_entry *entry, *mask;
struct ipt_entry_target *t;
size_t sz;
struct nf_nat_ipv4_multi_range_compat *mr;
int r, protocol = 0;
const char *out_interface = NULL;
const union in_addr_union *destination = NULL;
unsigned destination_prefixlen = 0;
if (af != AF_INET)
return -EOPNOTSUPP;
if (!source || source_prefixlen == 0)
return -EINVAL;
h = iptc_init("nat");
if (!h)
return -errno;
sz = XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
/* Put together the entry we want to add or remove */
entry = alloca0(sz);
entry->next_offset = sz;
entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
if (r < 0)
return r;
/* Fill in target part */
t = ipt_get_target(entry);
t->u.target_size =
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
mr->rangesize = 1;
/* Create a search mask entry */
mask = alloca(sz);
memset(mask, 0xFF, sz);
if (add) {
if (iptc_check_entry(chain, entry, (unsigned char*) mask, h))
return 0;
if (errno != ENOENT) /* if other error than not existing yet, fail */
return -errno;
if (!iptc_insert_entry(chain, entry, 0, h))
return -errno;
} else {
if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) {
if (errno == ENOENT) /* if it's already gone, all is good! */
return 0;
return -errno;
}
}
if (!iptc_commit(h))
return -errno;
return 0;
}
int fw_iptables_add_local_dnat(
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote) {
static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT";
_cleanup_(iptc_freep) struct xtc_handle *h = NULL;
struct ipt_entry *entry, *mask;
struct ipt_entry_target *t;
struct ipt_entry_match *m;
struct xt_addrtype_info_v1 *at;
struct nf_nat_ipv4_multi_range_compat *mr;
size_t sz, msz;
int r;
const char *in_interface = NULL;
const union in_addr_union *source = NULL;
unsigned source_prefixlen = 0;
const union in_addr_union *destination = NULL;
unsigned destination_prefixlen = 0;
assert(add || !previous_remote);
if (af != AF_INET)
return -EOPNOTSUPP;
if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
return -EOPNOTSUPP;
if (local_port <= 0)
return -EINVAL;
if (remote_port <= 0)
return -EINVAL;
h = iptc_init("nat");
if (!h)
return -errno;
sz = XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
if (protocol == IPPROTO_TCP)
msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_tcp));
else
msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_udp));
sz += msz;
/* Fill in basic part */
entry = alloca0(sz);
entry->next_offset = sz;
entry->target_offset =
XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
msz;
r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
if (r < 0)
return r;
/* Fill in first match */
m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
m->u.match_size = msz;
if (protocol == IPPROTO_TCP) {
struct xt_tcp *tcp;
strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
tcp = (struct xt_tcp*) m->data;
tcp->dpts[0] = tcp->dpts[1] = local_port;
tcp->spts[0] = 0;
tcp->spts[1] = 0xFFFF;
} else {
struct xt_udp *udp;
strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
udp = (struct xt_udp*) m->data;
udp->dpts[0] = udp->dpts[1] = local_port;
udp->spts[0] = 0;
udp->spts[1] = 0xFFFF;
}
/* Fill in second match */
m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
m->u.match_size =
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
m->u.user.revision = 1;
at = (struct xt_addrtype_info_v1*) m->data;
at->dest = XT_ADDRTYPE_LOCAL;
/* Fill in target part */
t = ipt_get_target(entry);
t->u.target_size =
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
mr->rangesize = 1;
mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
if (protocol == IPPROTO_TCP)
mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port);
else
mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port);
mask = alloca0(sz);
memset(mask, 0xFF, sz);
if (add) {
/* Add the PREROUTING rule, if it is missing so far */
if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -EINVAL;
if (!iptc_insert_entry(chain_pre, entry, 0, h))
return -errno;
}
/* If a previous remote is set, remove its entry */
if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
}
/* Add the OUTPUT rule, if it is missing so far */
if (!in_interface) {
/* Don't apply onto loopback addresses */
if (!destination) {
entry->ip.dst.s_addr = htobe32(0x7F000000);
entry->ip.dmsk.s_addr = htobe32(0xFF000000);
entry->ip.invflags = IPT_INV_DSTIP;
}
if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
if (!iptc_insert_entry(chain_output, entry, 0, h))
return -errno;
}
/* If a previous remote is set, remove its entry */
if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
}
}
} else {
if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
if (!in_interface) {
if (!destination) {
entry->ip.dst.s_addr = htobe32(0x7F000000);
entry->ip.dmsk.s_addr = htobe32(0xFF000000);
entry->ip.invflags = IPT_INV_DSTIP;
}
if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
}
}
if (!iptc_commit(h))
return -errno;
return 0;
}

View File

@ -0,0 +1,958 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <arpa/inet.h>
#include <endian.h>
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/netfilter/nf_nat.h>
#include <linux/netfilter_ipv4.h>
#include <netinet/ip.h>
#include "sd-netlink.h"
#include "alloc-util.h"
#include "firewall-util.h"
#include "firewall-util-private.h"
#include "in-addr-util.h"
#include "macro.h"
#include "socket-util.h"
#include "time-util.h"
#define NFT_SYSTEMD_DNAT_MAP_NAME "map_port_ipport"
#define NFT_SYSTEMD_TABLE_NAME "io.systemd.nat"
#define NFT_SYSTEMD_MASQ_SET_NAME "masq_saddr"
#define NFNL_DEFAULT_TIMEOUT_USECS (1ULL * USEC_PER_SEC)
#define UDP_DPORT_OFFSET 2
static int nfnl_netlink_sendv(sd_netlink *nfnl,
sd_netlink_message *messages[],
size_t msgcount) {
_cleanup_free_ uint32_t *serial = NULL;
size_t i;
int r;
assert(msgcount > 0);
r = sd_netlink_sendv(nfnl, messages, msgcount, &serial);
if (r < 0)
return r;
r = 0;
for (i = 1; i < msgcount - 1; i++) {
int tmp;
/* If message is an error, this returns embedded errno */
tmp = sd_netlink_read(nfnl, serial[i], NFNL_DEFAULT_TIMEOUT_USECS, NULL);
if (tmp < 0 && r == 0)
r = tmp;
}
return r;
}
static int nfnl_add_open_expr_container(sd_netlink_message *m, const char *name) {
int r;
r = sd_netlink_message_open_array(m, NFTA_LIST_ELEM);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_EXPR_NAME, name);
if (r < 0)
return r;
return sd_netlink_message_open_container_union(m, NFTA_EXPR_DATA, name);
}
static int nfnl_add_expr_fib(sd_netlink_message *m, uint32_t nft_fib_flags,
enum nft_fib_result result,
enum nft_registers dreg) {
int r;
r = nfnl_add_open_expr_container(m, "fib");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_FIB_FLAGS, htobe32(nft_fib_flags));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_FIB_RESULT, htobe32(result));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_FIB_DREG, htobe32(dreg));
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_meta(sd_netlink_message *m, enum nft_meta_keys key,
enum nft_registers dreg) {
int r;
r = nfnl_add_open_expr_container(m, "meta");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_META_KEY, htobe32(key));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_META_DREG, htobe32(dreg));
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_payload(sd_netlink_message *m, enum nft_payload_bases pb,
uint32_t offset, uint32_t len, enum nft_registers dreg) {
int r;
r = nfnl_add_open_expr_container(m, "payload");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_DREG, htobe32(dreg));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_BASE, htobe32(pb));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_OFFSET, htobe32(offset));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_PAYLOAD_LEN, htobe32(len));
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_lookup_set_data(sd_netlink_message *m, const char *set_name,
enum nft_registers sreg) {
int r;
r = nfnl_add_open_expr_container(m, "lookup");
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_LOOKUP_SET, set_name);
if (r < 0)
return r;
return sd_netlink_message_append_u32(m, NFTA_LOOKUP_SREG, htobe32(sreg));
}
static int nfnl_add_expr_lookup_set(sd_netlink_message *m, const char *set_name,
enum nft_registers sreg) {
int r;
r = nfnl_add_expr_lookup_set_data(m, set_name, sreg);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_lookup_map(sd_netlink_message *m, const char *set_name,
enum nft_registers sreg, enum nft_registers dreg) {
int r;
r = nfnl_add_expr_lookup_set_data(m, set_name, sreg);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_LOOKUP_DREG, htobe32(dreg));
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_data(sd_netlink_message *m, int attr, const void *data, uint32_t dlen) {
int r;
r = sd_netlink_message_open_container(m, attr);
if (r < 0)
return r;
r = sd_netlink_message_append_data(m, NFTA_DATA_VALUE, data, dlen);
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* attr */
}
static int nfnl_add_expr_cmp_data(sd_netlink_message *m, const void *data, uint32_t dlen) {
return nfnl_add_expr_data(m, NFTA_CMP_DATA, data, dlen);
}
static int nfnl_add_expr_cmp(sd_netlink_message *m, enum nft_cmp_ops cmp_op,
enum nft_registers sreg, const void *data, uint32_t dlen) {
int r;
r = nfnl_add_open_expr_container(m, "cmp");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_CMP_OP, htobe32(cmp_op));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_CMP_SREG, htobe32(sreg));
if (r < 0)
return r;
r = nfnl_add_expr_cmp_data(m, data, dlen);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_bitwise(sd_netlink_message *m,
enum nft_registers sreg,
enum nft_registers dreg,
const void *and,
const void *xor, uint32_t len) {
int r;
r = nfnl_add_open_expr_container(m, "bitwise");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_BITWISE_SREG, htobe32(sreg));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_BITWISE_DREG, htobe32(dreg));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_BITWISE_LEN, htobe32(len));
if (r < 0)
return r;
r = nfnl_add_expr_data(m, NFTA_BITWISE_MASK, and, len);
if (r < 0)
return r;
r = nfnl_add_expr_data(m, NFTA_BITWISE_XOR, xor, len);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_EXPR_DATA */
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
static int nfnl_add_expr_dnat(sd_netlink_message *m,
int family,
enum nft_registers areg,
enum nft_registers preg) {
int r;
r = nfnl_add_open_expr_container(m, "nat");
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_NAT_TYPE, htobe32(NFT_NAT_DNAT));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_NAT_FAMILY, htobe32(family));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_NAT_REG_ADDR_MIN, htobe32(areg));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_NAT_REG_PROTO_MIN, htobe32(preg));
if (r < 0)
return r;
r = sd_netlink_message_close_container(m);
if (r < 0)
return r;
return sd_netlink_message_close_container(m);
}
static int nfnl_add_expr_masq(sd_netlink_message *m) {
int r;
r = sd_netlink_message_open_array(m, NFTA_LIST_ELEM);
if (r < 0)
return r;
r = sd_netlink_message_append_string(m, NFTA_EXPR_NAME, "masq");
if (r < 0)
return r;
return sd_netlink_message_close_container(m); /* NFTA_LIST_ELEM */
}
/* -t nat -A POSTROUTING -p protocol -s source/pflen -o out_interface -d destionation/pflen -j MASQUERADE */
static int sd_nfnl_message_new_masq_rule(sd_netlink *nfnl, sd_netlink_message **ret, int family,
const char *chain) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS);
if (r < 0)
return r;
/* 1st statement: ip saddr @masq_saddr. Place iph->saddr in reg1. */
r = nfnl_add_expr_payload(m, NFT_PAYLOAD_NETWORK_HEADER, offsetof(struct iphdr, saddr),
sizeof(uint32_t), NFT_REG32_01);
if (r < 0)
return r;
/* 1st statement: use reg1 content to make lookup in @masq_saddr set. */
r = nfnl_add_expr_lookup_set(m, NFT_SYSTEMD_MASQ_SET_NAME, NFT_REG32_01);
if (r < 0)
return r;
/* 2nd statement: masq. Only executed by kernel if the previous lookup was successful. */
r = nfnl_add_expr_masq(m);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
/* -t nat -A PREROUTING -p protocol --dport local_port -i in_interface -s source/pflen -d destionation/pflen -j DNAT --to-destination remote_addr:remote_port */
static int sd_nfnl_message_new_dnat_rule_pre(sd_netlink *nfnl, sd_netlink_message **ret, int family,
const char *chain) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
enum nft_registers proto_reg;
uint32_t local = RTN_LOCAL;
int r;
r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS);
if (r < 0)
return r;
/* 1st statement: fib daddr type local */
r = nfnl_add_expr_fib(m, NFTA_FIB_F_DADDR, NFT_FIB_RESULT_ADDRTYPE, NFT_REG32_01);
if (r < 0)
return r;
/* 1st statement (cont.): compare RTN_LOCAL */
r = nfnl_add_expr_cmp(m, NFT_CMP_EQ, NFT_REG32_01, &local, sizeof(local));
if (r < 0)
return r;
/* 2nd statement: lookup local port in map, fetch address:dport to map to */
r = nfnl_add_expr_meta(m, NFT_META_L4PROTO, NFT_REG32_01);
if (r < 0)
return r;
r = nfnl_add_expr_payload(m, NFT_PAYLOAD_TRANSPORT_HEADER, UDP_DPORT_OFFSET,
sizeof(uint16_t), NFT_REG32_02);
if (r < 0)
return r;
/* 3rd statement: lookup 'l4proto . dport', e.g. 'tcp . 22' as key and
* store address and port for the dnat mapping in REG1/REG2.
*/
r = nfnl_add_expr_lookup_map(m, NFT_SYSTEMD_DNAT_MAP_NAME, NFT_REG32_01, NFT_REG32_01);
if (r < 0)
return r;
proto_reg = NFT_REG32_02;
r = nfnl_add_expr_dnat(m, family, NFT_REG32_01, proto_reg);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
static int sd_nfnl_message_new_dnat_rule_out(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *chain) {
static const uint32_t zero, one = 1;
uint32_t lonet = htobe32(0x7F000000), lomask = htobe32(0xff000000);
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
enum nft_registers proto_reg;
int r;
r = sd_nfnl_nft_message_new_rule(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, chain);
if (r < 0)
return r;
r = sd_netlink_message_open_container(m, NFTA_RULE_EXPRESSIONS);
if (r < 0)
return r;
/* 1st statement: exclude 127.0.0.1/8: ip daddr != 127.0.0.1/8 */
r = nfnl_add_expr_payload(m, NFT_PAYLOAD_NETWORK_HEADER, offsetof(struct iphdr, daddr),
sizeof(uint32_t), NFT_REG32_01);
if (r < 0)
return r;
/* 1st statement (cont.): bitops/prefix */
r = nfnl_add_expr_bitwise(m, NFT_REG32_01, NFT_REG32_01, &lomask, &zero, sizeof(lomask));
if (r < 0)
return r;
/* 1st statement (cont.): compare reg1 with 127/8 */
r = nfnl_add_expr_cmp(m, NFT_CMP_NEQ, NFT_REG32_01, &lonet, sizeof(lonet));
if (r < 0)
return r;
/* 2nd statement: meta oif lo */
r = nfnl_add_expr_meta(m, NFT_META_OIF, NFT_REG32_01);
if (r < 0)
return r;
/* 2nd statement (cont.): compare to lo ifindex (1) */
r = nfnl_add_expr_cmp(m, NFT_CMP_EQ, NFT_REG32_01, &one, sizeof(one));
if (r < 0)
return r;
/* 3rd statement: meta l4proto . th dport dnat ip . port to map @map_port_ipport */
r = nfnl_add_expr_meta(m, NFT_META_L4PROTO, NFT_REG32_01);
if (r < 0)
return r;
/* 3rd statement (cont): store the port number in reg2 */
r = nfnl_add_expr_payload(m, NFT_PAYLOAD_TRANSPORT_HEADER, UDP_DPORT_OFFSET,
sizeof(uint16_t), NFT_REG32_02);
if (r < 0)
return r;
/* 3rd statement (cont): use reg1 and reg2 and retrieve
* the new destination ip and port number.
*
* reg1 and reg2 are clobbered and will then contain the new
* address/port number.
*/
r = nfnl_add_expr_lookup_map(m, NFT_SYSTEMD_DNAT_MAP_NAME, NFT_REG32_01, NFT_REG32_01);
if (r < 0)
return r;
/* 4th statement: dnat connection to address/port retrieved by the
* preceeding expression. */
proto_reg = NFT_REG32_02;
r = nfnl_add_expr_dnat(m, family, NFT_REG32_01, proto_reg);
if (r < 0)
return r;
r = sd_netlink_message_close_container(m); /* NFTA_RULE_EXPRESSIONS */
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
static int nft_new_set(struct sd_netlink *nfnl,
sd_netlink_message **ret,
int family, const char *set_name,
uint32_t set_id,
uint32_t flags, uint32_t type, uint32_t klen) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = sd_nfnl_nft_message_new_set(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name, set_id, klen);
if (r < 0)
return r;
if (flags != 0) {
r = sd_netlink_message_append_u32(m, NFTA_SET_FLAGS, htobe32(flags));
if (r < 0)
return r;
}
r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_TYPE, htobe32(type));
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return r;
}
static int nft_new_map(struct sd_netlink *nfnl,
sd_netlink_message **ret,
int family, const char *set_name, uint32_t set_id,
uint32_t flags, uint32_t type, uint32_t klen, uint32_t dtype, uint32_t dlen) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = nft_new_set(nfnl, &m, family, set_name, set_id, flags | NFT_SET_MAP, type, klen);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_SET_DATA_TYPE, htobe32(dtype));
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_SET_DATA_LEN, htobe32(dlen));
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
static int nft_add_element(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *set_name,
const void *key, uint32_t klen,
const void *data, uint32_t dlen) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
/*
* Ideally there would be an API that provides:
*
* 1) a init function to add the main ruleset skeleton
* 2) a function that populates the sets with all known address/port pairs to s/dnat for
* 3) a function that can remove address/port pairs again.
*
* At this time, the existing API is used which is built on a
* 'add/delete a rule' paradigm.
*
* This replicated here and each element gets added to the set
* one-by-one.
*/
r = sd_nfnl_nft_message_new_setelems_begin(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name);
if (r < 0)
return r;
r = sd_nfnl_nft_message_add_setelem(m, 0, key, klen, data, dlen);
if (r < 0)
return r;
/* could theoretically append more set elements to add here */
r = sd_nfnl_nft_message_add_setelem_end(m);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
static int nft_del_element(sd_netlink *nfnl,
sd_netlink_message **ret, int family, const char *set_name,
const void *key, uint32_t klen,
const void *data, uint32_t dlen) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
int r;
r = sd_nfnl_nft_message_del_setelems_begin(nfnl, &m, family, NFT_SYSTEMD_TABLE_NAME, set_name);
if (r < 0)
return r;
r = sd_nfnl_nft_message_add_setelem(m, 0, key, klen, data, dlen);
if (r < 0)
return r;
r = sd_nfnl_nft_message_add_setelem_end(m);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
/* This is needed so 'nft' userspace tool can properly format the contents
* of the set/map when someone uses 'nft' to inspect their content.
*
* The values cannot be changed, they are part of the nft tool type identifier ABI.
*/
#define TYPE_BITS 6
enum nft_key_types {
TYPE_IPADDR = 7,
TYPE_IP6ADDR = 8,
TYPE_INET_PROTOCOL = 12,
TYPE_INET_SERVICE = 13,
};
static uint32_t concat_types2(enum nft_key_types a, enum nft_key_types b) {
uint32_t type = (uint32_t)a;
type <<= TYPE_BITS;
type |= (uint32_t)b;
return type;
}
/* enough space to hold netlink messages for table skeleton */
#define NFT_INIT_MSGS 16
static int fw_nftables_init_family(sd_netlink *nfnl, int family) {
sd_netlink_message *batch[NFT_INIT_MSGS] = {};
size_t ip_type_size = sizeof(uint32_t);
int ip_type = TYPE_IPADDR, r;
size_t msgcnt = 0, i;
uint32_t set_id = 0;
r = sd_nfnl_message_batch_begin(nfnl, &batch[msgcnt]);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
/* Set F_EXCL so table add fails if the table already exists. */
r = sd_nfnl_nft_message_new_table(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME, NLM_F_EXCL | NLM_F_ACK);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME,
"prerouting", "nat",
NF_INET_PRE_ROUTING, NF_IP_PRI_NAT_DST + 1);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME,
"output", "nat",
NF_INET_LOCAL_OUT, NF_IP_PRI_NAT_DST + 1);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_nft_message_new_basechain(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_TABLE_NAME,
"postrouting", "nat",
NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 1);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
/* set to store ip address ranges we should masquerade for */
r = nft_new_set(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_MASQ_SET_NAME, ++set_id, NFT_SET_INTERVAL, ip_type, ip_type_size);
if (r < 0)
goto out_unref;
/*
* map to store ip address:port pair to dnat to. elements in concatenation
* are rounded up to 4 bytes.
*
* Example: ip protocol . tcp daddr is sizeof(uint32_t) + sizeof(uint32_t), not
* sizeof(uint8_t) + sizeof(uint16_t).
*/
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = nft_new_map(nfnl, &batch[msgcnt], family, NFT_SYSTEMD_DNAT_MAP_NAME, ++set_id, 0,
concat_types2(TYPE_INET_PROTOCOL, TYPE_INET_SERVICE), sizeof(uint32_t) * 2,
concat_types2(ip_type, TYPE_INET_SERVICE), ip_type_size + sizeof(uint32_t));
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_message_new_dnat_rule_pre(nfnl, &batch[msgcnt], family, "prerouting");
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_message_new_dnat_rule_out(nfnl, &batch[msgcnt], family, "output");
if (r < 0)
goto out_unref;
msgcnt++;
r = sd_nfnl_message_new_masq_rule(nfnl, &batch[msgcnt], family, "postrouting");
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt < NFT_INIT_MSGS);
r = sd_nfnl_message_batch_end(nfnl, &batch[msgcnt]);
if (r < 0)
goto out_unref;
msgcnt++;
assert(msgcnt <= NFT_INIT_MSGS);
r = nfnl_netlink_sendv(nfnl, batch, msgcnt);
if (r == -EEXIST)
r = 0;
out_unref:
for (i = 0; i < msgcnt; i++)
sd_netlink_message_unref(batch[i]);
return r;
}
int fw_nftables_init(FirewallContext *ctx) {
_cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL;
int r;
r = sd_nfnl_socket_open(&nfnl);
if (r < 0)
return r;
r = fw_nftables_init_family(nfnl, AF_INET);
if (r < 0)
return r;
ctx->nfnl = TAKE_PTR(nfnl);
return 0;
}
void fw_nftables_exit(FirewallContext *ctx) {
ctx->nfnl = sd_netlink_unref(ctx->nfnl);
}
static int nft_message_add_setelem_iprange(sd_netlink_message *m,
const union in_addr_union *source,
unsigned int prefixlen) {
uint32_t mask, start, end;
unsigned int nplen;
int r;
assert(prefixlen <= 32);
nplen = 32 - prefixlen;
mask = (1U << nplen) - 1U;
mask = htobe32(~mask);
start = source->in.s_addr & mask;
r = sd_nfnl_nft_message_add_setelem(m, 0, &start, sizeof(start), NULL, 0);
if (r < 0)
return r;
r = sd_nfnl_nft_message_add_setelem_end(m);
if (r < 0)
return r;
end = be32toh(start) + (1U << nplen);
if (end < be32toh(start))
end = 0U;
end = htobe32(end);
r = sd_nfnl_nft_message_add_setelem(m, 1, &end, sizeof(end), NULL, 0);
if (r < 0)
return r;
r = sd_netlink_message_append_u32(m, NFTA_SET_ELEM_FLAGS, htobe32(NFT_SET_ELEM_INTERVAL_END));
if (r < 0)
return r;
r = sd_nfnl_nft_message_add_setelem_end(m);
if (r < 0)
return r;
return 0;
}
/* When someone runs 'nft flush ruleset' in the same net namespace
* this will also tear down the systemd nat table.
*
* Unlike iptables -t nat -F (which will remove all rules added by the
* systemd iptables backend, iptables has builtin chains that cannot be
* deleted -- the next add operation will 'just work'.
*
* In the nftables case, everything gets removed. The next add operation
* will yield -ENOENT.
*
* If we see -ENOENT on add, replay the inital table setup.
* If that works, re-do the add operation.
*
* Note that this doesn't protect against external sabotage such as a
* 'while true; nft flush ruleset;done'. There is nothing that could be
* done about that short of extending the kernel to allow tables to be
* owned by stystemd-networkd and making them non-deleteable except by
* the 'owning process'.
*/
static int fw_nftables_recreate_table(sd_netlink *nfnl, int af, sd_netlink_message **old, size_t size) {
int r = fw_nftables_init_family(nfnl, af);
if (r != 0)
return r;
while (size > 0) {
size_t i = --size;
old[i] = sd_netlink_message_unref(old[i]);
}
return 0;
}
#define NFT_MASQ_MSGS 3
int fw_nftables_add_masquerade(
FirewallContext *ctx,
bool add,
int af,
const union in_addr_union *source,
unsigned int source_prefixlen) {
sd_netlink_message *transaction[NFT_MASQ_MSGS] = {};
bool retry = true;
size_t tsize;
int r;
if (!source || source_prefixlen == 0)
return -EINVAL;
again:
r = sd_nfnl_message_batch_begin(ctx->nfnl, &transaction[0]);
if (r < 0)
return r;
tsize = 1;
if (add)
r = sd_nfnl_nft_message_new_setelems_begin(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_TABLE_NAME, NFT_SYSTEMD_MASQ_SET_NAME);
else
r = sd_nfnl_nft_message_del_setelems_begin(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_TABLE_NAME, NFT_SYSTEMD_MASQ_SET_NAME);
if (r < 0)
goto out_unref;
r = nft_message_add_setelem_iprange(transaction[tsize], source, source_prefixlen);
if (r < 0)
goto out_unref;
++tsize;
assert(tsize < NFT_MASQ_MSGS);
r = sd_nfnl_message_batch_end(ctx->nfnl, &transaction[tsize]);
if (r < 0)
return r;
++tsize;
r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize);
if (retry && r == -ENOENT) {
int tmp = fw_nftables_recreate_table(ctx->nfnl, af, transaction, tsize);
if (tmp == 0) {
retry = false;
goto again;
}
}
out_unref:
while (tsize > 0)
sd_netlink_message_unref(transaction[--tsize]);
return r < 0 ? r : 0;
}
#define NFT_DNAT_MSGS 4
int fw_nftables_add_local_dnat(
FirewallContext *ctx,
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote) {
uint32_t data[2], key[2];
sd_netlink_message *transaction[NFT_DNAT_MSGS] = {};
bool retry = true;
size_t tsize;
int r;
assert(add || !previous_remote);
if (af != AF_INET)
return -EAFNOSUPPORT;
if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
return -EPROTONOSUPPORT;
if (local_port <= 0)
return -EINVAL;
again:
key[0] = protocol;
key[1] = htobe16(local_port);
if (!remote)
return -EOPNOTSUPP;
if (remote_port <= 0)
return -EINVAL;
data[1] = htobe16(remote_port);
r = sd_nfnl_message_batch_begin(ctx->nfnl, &transaction[0]);
if (r < 0)
return r;
tsize = 1;
/* If a previous remote is set, remove its entry */
if (add && previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
data[0] = previous_remote->in.s_addr;
r = nft_del_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data));
if (r < 0)
goto out_unref;
tsize++;
}
data[0] = remote->in.s_addr;
assert(tsize < NFT_DNAT_MSGS);
if (add)
nft_add_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data));
else
nft_del_element(ctx->nfnl, &transaction[tsize], af, NFT_SYSTEMD_DNAT_MAP_NAME, key, sizeof(key), data, sizeof(data));
tsize++;
assert(tsize < NFT_DNAT_MSGS);
r = sd_nfnl_message_batch_end(ctx->nfnl, &transaction[tsize]);
if (r < 0)
goto out_unref;
tsize++;
assert(tsize <= NFT_DNAT_MSGS);
r = nfnl_netlink_sendv(ctx->nfnl, transaction, tsize);
if (retry && r == -ENOENT) {
int tmp = fw_nftables_recreate_table(ctx->nfnl, af, transaction, tsize);
if (tmp == 0) {
retry = false;
goto again;
}
}
out_unref:
while (tsize > 0)
sd_netlink_message_unref(transaction[--tsize]);
return r < 0 ? r : 0;
}

View File

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include "in-addr-util.h"
#include "sd-netlink.h"
enum FirewallBackend {
FW_BACKEND_NONE,
#if HAVE_LIBIPTC
FW_BACKEND_IPTABLES,
#endif
FW_BACKEND_NFTABLES,
};
struct FirewallContext {
enum FirewallBackend firewall_backend;
sd_netlink *nfnl;
};
int fw_nftables_init(FirewallContext *ctx);
void fw_nftables_exit(FirewallContext *ctx);
int fw_nftables_add_masquerade(
FirewallContext *ctx,
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen);
int fw_nftables_add_local_dnat(
FirewallContext *ctx,
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote);
#if HAVE_LIBIPTC
int fw_iptables_add_masquerade(
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen);
int fw_iptables_add_local_dnat(
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote);
#endif

View File

@ -1,156 +1,86 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/* Temporary work-around for broken glibc vs. linux kernel header definitions
* This is already fixed upstream, remove this when distributions have updated.
*/
#define _NET_IF_H 1
#include <arpa/inet.h>
#include <endian.h>
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <net/if.h>
#ifndef IFNAMSIZ
#define IFNAMSIZ 16
#endif
#include <linux/if.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter/nf_nat.h>
#include <linux/netfilter/xt_addrtype.h>
#include <libiptc/libiptc.h>
#include "alloc-util.h"
#include "firewall-util.h"
#include "in-addr-util.h"
#include "macro.h"
#include "socket-util.h"
#include "firewall-util-private.h"
DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
static enum FirewallBackend firewall_backend_probe(FirewallContext *ctx) {
if (fw_nftables_init(ctx) == 0)
return FW_BACKEND_NFTABLES;
#if HAVE_LIBIPTC
return FW_BACKEND_IPTABLES;
#else
return FW_BACKEND_NONE;
#endif
}
static int entry_fill_basics(
struct ipt_entry *entry,
int protocol,
const char *in_interface,
const union in_addr_union *source,
unsigned source_prefixlen,
const char *out_interface,
const union in_addr_union *destination,
unsigned destination_prefixlen) {
int fw_ctx_new(FirewallContext **ret) {
_cleanup_free_ FirewallContext *ctx = NULL;
assert(entry);
if (out_interface && !ifname_valid(out_interface))
return -EINVAL;
if (in_interface && !ifname_valid(in_interface))
return -EINVAL;
entry->ip.proto = protocol;
if (in_interface) {
size_t l;
l = strlen(in_interface);
assert(l < sizeof entry->ip.iniface);
assert(l < sizeof entry->ip.iniface_mask);
strcpy(entry->ip.iniface, in_interface);
memset(entry->ip.iniface_mask, 0xFF, l + 1);
}
if (source) {
entry->ip.src = source->in;
in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
}
if (out_interface) {
size_t l = strlen(out_interface);
assert(l < sizeof entry->ip.outiface);
assert(l < sizeof entry->ip.outiface_mask);
strcpy(entry->ip.outiface, out_interface);
memset(entry->ip.outiface_mask, 0xFF, l + 1);
}
if (destination) {
entry->ip.dst = destination->in;
in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
}
ctx = new0(FirewallContext, 1);
if (!ctx)
return -ENOMEM;
/* could probe here. However, this means that we will load
* iptable_nat or nf_tables, both will enable connection tracking.
*
* Alternative would be to probe here but only call
* fw_ctx_new when nspawn/networkd know they will call
* fw_add_masquerade/local_dnat later anyway.
*/
*ret = TAKE_PTR(ctx);
return 0;
}
FirewallContext *fw_ctx_free(FirewallContext *ctx) {
if (!ctx)
return NULL;
if (ctx->firewall_backend == FW_BACKEND_NFTABLES)
fw_nftables_exit(ctx);
return mfree(ctx);
}
int fw_add_masquerade(
FirewallContext **fw_ctx,
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen) {
FirewallContext *ctx;
int r;
static const xt_chainlabel chain = "POSTROUTING";
_cleanup_(iptc_freep) struct xtc_handle *h = NULL;
struct ipt_entry *entry, *mask;
struct ipt_entry_target *t;
size_t sz;
struct nf_nat_ipv4_multi_range_compat *mr;
int r, protocol = 0;
const char *out_interface = NULL;
const union in_addr_union *destination = NULL;
unsigned destination_prefixlen = 0;
if (af != AF_INET)
return -EOPNOTSUPP;
h = iptc_init("nat");
if (!h)
return -errno;
sz = XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
/* Put together the entry we want to add or remove */
entry = alloca0(sz);
entry->next_offset = sz;
entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
if (r < 0)
return r;
/* Fill in target part */
t = ipt_get_target(entry);
t->u.target_size =
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
mr->rangesize = 1;
/* Create a search mask entry */
mask = alloca(sz);
memset(mask, 0xFF, sz);
if (add) {
if (iptc_check_entry(chain, entry, (unsigned char*) mask, h))
return 0;
if (errno != ENOENT) /* if other error than not existing yet, fail */
return -errno;
if (!iptc_insert_entry(chain, entry, 0, h))
return -errno;
} else {
if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) {
if (errno == ENOENT) /* if it's already gone, all is good! */
return 0;
return -errno;
}
if (!*fw_ctx) {
r = fw_ctx_new(fw_ctx);
if (r < 0)
return r;
}
if (!iptc_commit(h))
return -errno;
ctx = *fw_ctx;
if (ctx->firewall_backend == FW_BACKEND_NONE)
ctx->firewall_backend = firewall_backend_probe(ctx);
return 0;
switch (ctx->firewall_backend) {
case FW_BACKEND_NONE:
return -EOPNOTSUPP;
#if HAVE_LIBIPTC
case FW_BACKEND_IPTABLES:
return fw_iptables_add_masquerade(add, af, source, source_prefixlen);
#endif
case FW_BACKEND_NFTABLES:
return fw_nftables_add_masquerade(ctx, add, af, source, source_prefixlen);
}
return -EOPNOTSUPP;
}
int fw_add_local_dnat(
FirewallContext **fw_ctx,
bool add,
int af,
int protocol,
@ -158,189 +88,28 @@ int fw_add_local_dnat(
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote) {
FirewallContext *ctx;
static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT";
_cleanup_(iptc_freep) struct xtc_handle *h = NULL;
struct ipt_entry *entry, *mask;
struct ipt_entry_target *t;
struct ipt_entry_match *m;
struct xt_addrtype_info_v1 *at;
struct nf_nat_ipv4_multi_range_compat *mr;
size_t sz, msz;
int r;
const char *in_interface = NULL;
const union in_addr_union *source = NULL;
unsigned source_prefixlen = 0;
const union in_addr_union *destination = NULL;
unsigned destination_prefixlen = 0;
assert(add || !previous_remote);
if (af != AF_INET)
return -EOPNOTSUPP;
if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
return -EOPNOTSUPP;
if (local_port <= 0)
return -EINVAL;
if (remote_port <= 0)
return -EINVAL;
h = iptc_init("nat");
if (!h)
return -errno;
sz = XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
if (protocol == IPPROTO_TCP)
msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_tcp));
else
msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_udp));
sz += msz;
/* Fill in basic part */
entry = alloca0(sz);
entry->next_offset = sz;
entry->target_offset =
XT_ALIGN(sizeof(struct ipt_entry)) +
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
msz;
r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
if (r < 0)
return r;
/* Fill in first match */
m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
m->u.match_size = msz;
if (protocol == IPPROTO_TCP) {
struct xt_tcp *tcp;
strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
tcp = (struct xt_tcp*) m->data;
tcp->dpts[0] = tcp->dpts[1] = local_port;
tcp->spts[0] = 0;
tcp->spts[1] = 0xFFFF;
} else {
struct xt_udp *udp;
strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
udp = (struct xt_udp*) m->data;
udp->dpts[0] = udp->dpts[1] = local_port;
udp->spts[0] = 0;
udp->spts[1] = 0xFFFF;
if (!*fw_ctx) {
int ret = fw_ctx_new(fw_ctx);
if (ret < 0)
return ret;
}
/* Fill in second match */
m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
m->u.match_size =
XT_ALIGN(sizeof(struct ipt_entry_match)) +
XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
m->u.user.revision = 1;
at = (struct xt_addrtype_info_v1*) m->data;
at->dest = XT_ADDRTYPE_LOCAL;
ctx = *fw_ctx;
if (ctx->firewall_backend == FW_BACKEND_NONE)
ctx->firewall_backend = firewall_backend_probe(ctx);
/* Fill in target part */
t = ipt_get_target(entry);
t->u.target_size =
XT_ALIGN(sizeof(struct ipt_entry_target)) +
XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
mr->rangesize = 1;
mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
if (protocol == IPPROTO_TCP)
mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port);
else
mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port);
mask = alloca0(sz);
memset(mask, 0xFF, sz);
if (add) {
/* Add the PREROUTING rule, if it is missing so far */
if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -EINVAL;
if (!iptc_insert_entry(chain_pre, entry, 0, h))
return -errno;
}
/* If a previous remote is set, remove its entry */
if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
}
/* Add the OUTPUT rule, if it is missing so far */
if (!in_interface) {
/* Don't apply onto loopback addresses */
if (!destination) {
entry->ip.dst.s_addr = htobe32(0x7F000000);
entry->ip.dmsk.s_addr = htobe32(0xFF000000);
entry->ip.invflags = IPT_INV_DSTIP;
}
if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
if (!iptc_insert_entry(chain_output, entry, 0, h))
return -errno;
}
/* If a previous remote is set, remove its entry */
if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
}
}
} else {
if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
if (!in_interface) {
if (!destination) {
entry->ip.dst.s_addr = htobe32(0x7F000000);
entry->ip.dmsk.s_addr = htobe32(0xFF000000);
entry->ip.invflags = IPT_INV_DSTIP;
}
if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
if (errno != ENOENT)
return -errno;
}
}
switch (ctx->firewall_backend) {
case FW_BACKEND_NONE:
return -EOPNOTSUPP;
case FW_BACKEND_NFTABLES:
return fw_nftables_add_local_dnat(ctx, add, af, protocol, local_port, remote, remote_port, previous_remote);
#if HAVE_LIBIPTC
case FW_BACKEND_IPTABLES:
return fw_iptables_add_local_dnat(add, af, protocol, local_port, remote, remote_port, previous_remote);
#endif
}
if (!iptc_commit(h))
return -errno;
return 0;
return -EOPNOTSUPP;
}

View File

@ -6,15 +6,22 @@
#include "in-addr-util.h"
#if HAVE_LIBIPTC
typedef struct FirewallContext FirewallContext;
int fw_ctx_new(FirewallContext **ret);
FirewallContext *fw_ctx_free(FirewallContext *fw_ctx);
DEFINE_TRIVIAL_CLEANUP_FUNC(FirewallContext *, fw_ctx_free);
int fw_add_masquerade(
FirewallContext **fw_ctx,
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen);
int fw_add_local_dnat(
FirewallContext **fw_ctx,
bool add,
int af,
int protocol,
@ -22,26 +29,3 @@ int fw_add_local_dnat(
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote);
#else
static inline int fw_add_masquerade(
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen) {
return -EOPNOTSUPP;
}
static inline int fw_add_local_dnat(
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote) {
return -EOPNOTSUPP;
}
#endif

View File

@ -101,7 +101,10 @@ shared_sources = files('''
fdset.h
fileio-label.c
fileio-label.h
firewall-util.c
firewall-util-nft.c
firewall-util.h
firewall-util-private.h
format-table.c
format-table.h
fsck-util.h
@ -295,7 +298,7 @@ if conf.get('HAVE_SECCOMP') == 1
endif
if conf.get('HAVE_LIBIPTC') == 1
shared_sources += files('firewall-util.c')
shared_sources += files('firewall-util-iptables.c')
endif
if conf.get('HAVE_KMOD') == 1

View File

@ -60,11 +60,13 @@ sd_netlink *sd_netlink_ref(sd_netlink *nl);
sd_netlink *sd_netlink_unref(sd_netlink *nl);
int sd_netlink_send(sd_netlink *nl, sd_netlink_message *message, uint32_t *serial);
int sd_netlink_sendv(sd_netlink *nl, sd_netlink_message *messages[], size_t msgcnt, uint32_t **ret_serial);
int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink_message *message,
sd_netlink_message_handler_t callback, sd_netlink_destroy_t destoy_callback,
void *userdata, uint64_t usec, const char *description);
int sd_netlink_call(sd_netlink *nl, sd_netlink_message *message, uint64_t timeout,
sd_netlink_message **reply);
int sd_netlink_read(sd_netlink *nl, uint32_t serial, uint64_t timeout, sd_netlink_message **reply);
int sd_netlink_get_events(const sd_netlink *nl);
int sd_netlink_get_timeout(const sd_netlink *nl, uint64_t *timeout);
@ -219,6 +221,32 @@ int sd_rtnl_message_set_tclass_handle(sd_netlink_message *m, uint32_t handle);
int sd_rtnl_message_new_mdb(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int mdb_ifindex);
/* nfnl */
int sd_nfnl_socket_open(sd_netlink **nl);
int sd_nfnl_message_batch_begin(sd_netlink *nfnl, sd_netlink_message **ret);
int sd_nfnl_message_batch_end(sd_netlink *nfnl, sd_netlink_message **ret);
int sd_nfnl_nft_message_del_table(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table);
int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, uint16_t nl_flags);
int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *chain,
const char *type, uint8_t hook, int prio);
int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *chain);
int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name,
uint32_t setid, uint32_t klen);
int sd_nfnl_nft_message_new_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name);
int sd_nfnl_nft_message_del_setelems_begin(sd_netlink *nfnl, sd_netlink_message **ret,
int family, const char *table, const char *set_name);
int sd_nfnl_nft_message_add_setelem(sd_netlink_message *m,
uint32_t num,
const void *key, uint32_t klen,
const void *data, uint32_t dlen);
int sd_nfnl_nft_message_add_setelem_end(sd_netlink_message *m);
/* genl */
int sd_genl_socket_open(sd_netlink **nl);
int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m);

View File

@ -559,8 +559,7 @@ tests += [
[['src/test/test-firewall-util.c'],
[libshared],
[],
'HAVE_LIBIPTC'],
[]],
[['src/test/test-netlink-manual.c'],
[],

View File

@ -7,34 +7,53 @@
#define MAKE_IN_ADDR_UNION(a,b,c,d) (union in_addr_union) { .in.s_addr = htobe32((uint32_t) (a) << 24 | (uint32_t) (b) << 16 | (uint32_t) (c) << 8 | (uint32_t) (d))}
int main(int argc, char *argv[]) {
_cleanup_(fw_ctx_freep) FirewallContext *ctx;
int r;
test_setup_logging(LOG_DEBUG);
uint8_t prefixlen = 32;
r = fw_add_masquerade(true, AF_INET, NULL, 0);
r = fw_ctx_new(&ctx);
if (r < 0)
return log_error_errno(r, "Failed to init firewall: %m");
r = fw_add_masquerade(&ctx, true, AF_INET, NULL, 0);
if (r == 0)
log_error("Expected failure: NULL source");
r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,0), 0);
if (r == 0)
log_error("Expected failure: 0 prefixlen");
r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), prefixlen);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_masquerade(true, AF_INET, NULL, 0);
prefixlen = 28;
r = fw_add_masquerade(&ctx, true, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_masquerade(false, AF_INET, NULL, 0);
r = fw_add_masquerade(&ctx, false, AF_INET, &MAKE_IN_ADDR_UNION(10,0,2,0), prefixlen);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
r = fw_add_masquerade(&ctx, false, AF_INET, &MAKE_IN_ADDR_UNION(10,1,2,3), 32);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4));
r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_local_dnat(false, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL);
r = fw_add_local_dnat(&ctx, true, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4));
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");
r = fw_add_local_dnat(&ctx, false, AF_INET, IPPROTO_TCP, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL);
if (r < 0)
log_error_errno(r, "Failed to modify firewall: %m");