Systemd/src/shared/firewall-util.c

116 lines
3.3 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include "alloc-util.h"
#include "firewall-util.h"
#include "firewall-util-private.h"
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
static enum FirewallBackend firewall_backend_probe(FirewallContext *ctx) {
if (fw_nftables_init(ctx) == 0)
return FW_BACKEND_NFTABLES;
#if HAVE_LIBIPTC
return FW_BACKEND_IPTABLES;
#else
return FW_BACKEND_NONE;
#endif
}
int fw_ctx_new(FirewallContext **ret) {
_cleanup_free_ FirewallContext *ctx = NULL;
ctx = new0(FirewallContext, 1);
if (!ctx)
return -ENOMEM;
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
/* could probe here. However, this means that we will load
* iptable_nat or nf_tables, both will enable connection tracking.
*
* Alternative would be to probe here but only call
* fw_ctx_new when nspawn/networkd know they will call
* fw_add_masquerade/local_dnat later anyway.
*/
*ret = TAKE_PTR(ctx);
return 0;
}
FirewallContext *fw_ctx_free(FirewallContext *ctx) {
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
if (!ctx)
return NULL;
if (ctx->firewall_backend == FW_BACKEND_NFTABLES)
fw_nftables_exit(ctx);
return mfree(ctx);
}
int fw_add_masquerade(
FirewallContext **fw_ctx,
bool add,
int af,
const union in_addr_union *source,
unsigned source_prefixlen) {
FirewallContext *ctx;
int r;
if (!*fw_ctx) {
r = fw_ctx_new(fw_ctx);
if (r < 0)
return r;
}
ctx = *fw_ctx;
if (ctx->firewall_backend == FW_BACKEND_NONE)
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
ctx->firewall_backend = firewall_backend_probe(ctx);
switch (ctx->firewall_backend) {
case FW_BACKEND_NONE:
return -EOPNOTSUPP;
#if HAVE_LIBIPTC
case FW_BACKEND_IPTABLES:
return fw_iptables_add_masquerade(add, af, source, source_prefixlen);
#endif
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
case FW_BACKEND_NFTABLES:
return fw_nftables_add_masquerade(ctx, add, af, source, source_prefixlen);
}
return -EOPNOTSUPP;
}
int fw_add_local_dnat(
FirewallContext **fw_ctx,
bool add,
int af,
int protocol,
uint16_t local_port,
const union in_addr_union *remote,
uint16_t remote_port,
const union in_addr_union *previous_remote) {
FirewallContext *ctx;
if (!*fw_ctx) {
int ret = fw_ctx_new(fw_ctx);
if (ret < 0)
return ret;
}
ctx = *fw_ctx;
if (ctx->firewall_backend == FW_BACKEND_NONE)
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
ctx->firewall_backend = firewall_backend_probe(ctx);
switch (ctx->firewall_backend) {
case FW_BACKEND_NONE:
return -EOPNOTSUPP;
firewall-util: add nftables backend Idea is to use a static ruleset, added when the first attempt to add a masquerade or dnat rule is made. The alternative would be to add the ruleset when the init function is called. The disadvantage is that this enables connection tracking and NAT in the kernel (as the ruleset needs this to work), which comes with some overhead that might not be needed (no nspawn usage and no IPMasquerade option set). There is no additional dependency on the 'nft' userspace binary or other libraries. sd-netlinks nfnetlink backend is used to modify the nftables ruleset. The commit message/comments still use nft syntax since that is what users will see when they use the nft tool to list the ruleset. The added initial skeleton (added on first fw_add_masquerade/local_dnat call) looks like this: table ip io.systemd.nat { set masq_saddr { type ipv4_addr flags interval elements = { 192.168.59.160/28 } } map map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service elements = { tcp . 2222 : 192.168.59.169 . 22 } } chain prerouting { type nat hook prerouting priority dstnat + 1; policy accept; fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain output { type nat hook output priority -99; policy accept; ip daddr != 127.0.0.0/8 oif "lo" dnat ip addr . port to meta l4proto . th dport map @map_port_ipport } chain postrouting { type nat hook postrouting priority srcnat + 1; policy accept; ip saddr @masq_saddr masquerade } } Next calls to fw_add_masquerade/add_local_dnat will then only add/delete the element/mapping to masq_saddr and map_port_ipport, i.e. the ruleset doesn't change -- only the set/map content does. Running test-firewall-util with this backend gives following output on a parallel 'nft monitor': $ nft monitor add table ip io.systemd.nat add chain ip io.systemd.nat prerouting { type nat hook prerouting priority dstnat + 1; policy accept; } add chain ip io.systemd.nat output { type nat hook output priority -99; policy accept; } add chain ip io.systemd.nat postrouting { type nat hook postrouting priority srcnat + 1; policy accept; } add set ip io.systemd.nat masq_saddr { type ipv4_addr; flags interval; } add map ip io.systemd.nat map_port_ipport { type inet_proto . inet_service : ipv4_addr . inet_service; } add rule ip io.systemd.nat prerouting fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat output ip daddr != 127.0.0.0/8 fib daddr type local dnat ip addr . port to meta l4proto . th dport map @map_port_ipport add rule ip io.systemd.nat postrouting ip saddr @masq_saddr masquerade add element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.0.2.0/28 } delete element ip io.systemd.nat masq_saddr { 10.1.2.3 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.4 . 815 } add element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } delete element ip io.systemd.nat map_port_ipport { tcp . 4711 : 1.2.3.5 . 815 } CTRL-C Things not implemented/supported: 1. Change monitoring. The kernel allows userspace to learn about changes made by other clients (using nfnetlink notifications). It would be possible to detect when e.g. someone removes the systemd nat table. This would need more work. Its also not clear on how to react to external changes -- it doesn't seem like a good idea to just auto-undo everthing. 2. 'set masq_saddr' doesn't handle overlaps. Example: fw_add_masquerade(true, AF_INET, "10.0.0.0" , 16); fw_add_masquerade(true, AF_INET, "10.0.0.0" , 8); /* fails */ With the iptables backend the second call works, as it adds an independent iptables rule. With the nftables backend, the range 10.0.0.0-10.255.255.255 clashes with the existing range of 10.0.0.0-10.0.255.255 so 2nd add gets rejected by the kernel. This will generate an error message from networkd ("Could not enable IP masquerading: File exists"). To resolve this it would be needed to either keep track of the added elements and perform range merging when overlaps are detected. However, the add erquests are done using the configured network on a device, so no overlaps should occur in normal setups. IPv6 support is added in a extra changeset. Fixes: #13307
2020-06-19 15:53:03 +02:00
case FW_BACKEND_NFTABLES:
return fw_nftables_add_local_dnat(ctx, add, af, protocol, local_port, remote, remote_port, previous_remote);
#if HAVE_LIBIPTC
case FW_BACKEND_IPTABLES:
return fw_iptables_add_local_dnat(add, af, protocol, local_port, remote, remote_port, previous_remote);
#endif
}
return -EOPNOTSUPP;
}