network: Introduce SR-IOV

SR-IOV provides the ability to partition a single physical PCI
resource into virtual PCI functions which can then be injected in
to a VM. In the case of network VFs, SR-IOV improves north-south n
etwork performance (that is, traffic with endpoints outside the
host machine) by allowing traffic to bypass the host machine’s network stack.
This commit is contained in:
Susant Sahani 2020-06-21 11:17:34 +00:00 committed by Yu Watanabe
parent 5908ddd763
commit 518cd6b527
10 changed files with 583 additions and 0 deletions

View File

@ -237,6 +237,79 @@
</variablelist>
</refsect1>
<refsect1>
<title>[SR-IOV] Section Options</title>
<para>The <literal>[SR-IOV]</literal> section accepts the
following keys. Specify several <literal>[SR-IOV]</literal>
sections to configure several SR-IOVs. SR-IOV provides the ability to partition a single physical PCI resource
into virtual PCI functions which can then be injected into a VM. In the case of network VFs, SR-IOV improves
north-south network performance (that is, traffic with endpoints outside the host machine) by allowing traffic to
bypass the host machines network stack.</para>
<variablelist class='network-directives'>
<varlistentry>
<term><varname>VirtualFunction=</varname></term>
<listitem>
<para>Specifies a Virtual Function (VF), lightweight PCIe function designed solely to move data
in and out. Takes an unsigned integer in the range 0..2147483646. This option is compulsory.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>VLANId=</varname></term>
<listitem>
<para>Specifies VLAN ID of the virtual function. Takes an unsigned integer in the range 1..4095.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>QualityOfService=</varname></term>
<listitem>
<para>Specifies quality of service of the virtual function. Takes an unsigned integer in the range 1..4294967294.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>MACSpoofCheck=</varname></term>
<listitem>
<para>Takes a boolean. Controls the MAC spoof checking. When unset, the kernel's default will be used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>QueryReceiveSideScaling=</varname></term>
<listitem>
<para>Takes a boolean. Toggle the ability of querying the receive side scaling (RSS)
configuration of the virtual function (VF). The VF RSS information like RSS hash key may be
considered sensitive on some devices where this information is shared between VF and the
physical function (PF). When unset, the kernel's default will be used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>Trust=</varname></term>
<listitem>
<para>Takes a boolean. Allows to set trust mode of the virtual function (VF). When set, VF
users can set a specific feature which may impact security and/or performance. When unset,
the kernel's default will be used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>LinkState=</varname></term>
<listitem>
<para>Allows to set the link state of the virtual function (VF). Takes a boolean or a
special value <literal>auto</literal>. Setting to <literal>auto</literal> means a
reflection of the physical function (PF) link state, <literal>yes</literal> lets the VF to
communicate with other VFs on this host even if the PF link state is down,
<literal>no</literal> causes the hardware to drop any packets sent by the VF. When unset,
the kernel's default will be used.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>[Network] Section Options</title>

View File

@ -105,6 +105,8 @@ sources = files('''
networkd-routing-policy-rule.h
networkd-speed-meter.c
networkd-speed-meter.h
networkd-sriov.c
networkd-sriov.h
networkd-util.c
networkd-util.h
networkd-wifi.c

View File

@ -3,6 +3,7 @@
#include <netinet/in.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_link.h>
#include <unistd.h>
#include "alloc-util.h"
@ -31,6 +32,7 @@
#include "networkd-manager.h"
#include "networkd-ndisc.h"
#include "networkd-neighbor.h"
#include "networkd-sriov.h"
#include "networkd-radv.h"
#include "networkd-routing-policy-rule.h"
#include "networkd-wifi.h"
@ -1127,6 +1129,9 @@ void link_check_ready(Link *link) {
if (!link->tc_configured)
return;
if (!link->sr_iov_configured)
return;
if (link_has_carrier(link) || !link->network->configure_without_carrier) {
if (link_ipv4ll_enabled(link, ADDRESS_FAMILY_IPV4) && !link->ipv4ll_address)
@ -2836,6 +2841,28 @@ static int link_configure_traffic_control(Link *link) {
return 0;
}
static int link_configure_sr_iov(Link *link) {
SRIOV *sr_iov;
Iterator i;
int r;
link->sr_iov_configured = false;
link->sr_iov_messages = 0;
ORDERED_HASHMAP_FOREACH(sr_iov, link->network->sr_iov_by_section, i) {
r = sr_iov_configure(link, sr_iov);
if (r < 0)
return r;
}
if (link->sr_iov_messages == 0)
link->sr_iov_configured = true;
else
log_link_debug(link, "Configuring SR-IOV");
return 0;
}
static int link_configure(Link *link) {
int r;
@ -2847,6 +2874,10 @@ static int link_configure(Link *link) {
if (r < 0)
return r;
r = link_configure_sr_iov(link);
if (r < 0)
return r;
if (link->iftype == ARPHRD_CAN)
return link_configure_can(link);

View File

@ -82,6 +82,7 @@ typedef struct Link {
unsigned routing_policy_rule_messages;
unsigned routing_policy_rule_remove_messages;
unsigned tc_messages;
unsigned sr_iov_messages;
unsigned enslaving;
Set *addresses;
@ -118,6 +119,7 @@ typedef struct Link {
bool static_nexthops_configured:1;
bool routing_policy_rules_configured:1;
bool tc_configured:1;
bool sr_iov_configured:1;
bool setting_mtu:1;
bool setting_genmode:1;
bool ipv6_mtu_set:1;

View File

@ -15,6 +15,7 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
#include "networkd-ipv4ll.h"
#include "networkd-ndisc.h"
#include "networkd-network.h"
#include "networkd-sriov.h"
#include "qdisc.h"
#include "tclass.h"
#include "vlan-util.h"
@ -53,6 +54,13 @@ Link.Multicast, config_parse_tristate,
Link.AllMulticast, config_parse_tristate, 0, offsetof(Network, allmulticast)
Link.Unmanaged, config_parse_bool, 0, offsetof(Network, unmanaged)
Link.RequiredForOnline, config_parse_required_for_online, 0, 0
SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, 0
SR-IOV.VLANId, config_parse_sr_iov_uint32, 0, 0
SR-IOV.QualityOfService, config_parse_sr_iov_uint32, 0, 0
SR-IOV.MACSpoofCheck, config_parse_sr_iov_boolean, 0, 0
SR-IOV.QueryReceiveSideScaling, config_parse_sr_iov_boolean, 0, 0
SR-IOV.Trust, config_parse_sr_iov_boolean, 0, 0
SR-IOV.LinkState, config_parse_sr_iov_link_state, 0, 0
Network.Description, config_parse_string, 0, offsetof(Network, description)
Network.Bridge, config_parse_ifname, 0, offsetof(Network, bridge_name)
Network.Bond, config_parse_ifname, 0, offsetof(Network, bond_name)

View File

@ -16,6 +16,7 @@
#include "network-internal.h"
#include "networkd-manager.h"
#include "networkd-network.h"
#include "networkd-sriov.h"
#include "parse-util.h"
#include "path-lookup.h"
#include "set.h"
@ -158,6 +159,7 @@ int network_verify(Network *network) {
Route *route, *route_next;
FdbEntry *fdb, *fdb_next;
TrafficControl *tc;
SRIOV *sr_iov;
Iterator i;
assert(network);
@ -330,6 +332,10 @@ int network_verify(Network *network) {
if (traffic_control_section_verify(tc, &has_root, &has_clsact) < 0)
traffic_control_free(tc);
ORDERED_HASHMAP_FOREACH(sr_iov, network->sr_iov_by_section, i)
if (sr_iov_section_verify(sr_iov) < 0)
sr_iov_free(sr_iov);
return 0;
}
@ -484,6 +490,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi
filename, NETWORK_DIRS, dropin_dirname,
"Match\0"
"Link\0"
"SR-IOV\0"
"Network\0"
"Address\0"
"Neighbor\0"
@ -731,6 +738,7 @@ static Network *network_free(Network *network) {
hashmap_free(network->prefixes_by_section);
hashmap_free(network->route_prefixes_by_section);
hashmap_free(network->rules_by_section);
ordered_hashmap_free_with_destructor(network->sr_iov_by_section, sr_iov_free);
ordered_hashmap_free_with_destructor(network->tc_by_section, traffic_control_free);
if (network->manager &&

View File

@ -311,6 +311,7 @@ struct Network {
Hashmap *route_prefixes_by_section;
Hashmap *rules_by_section;
OrderedHashmap *tc_by_section;
OrderedHashmap *sr_iov_by_section;
/* All kinds of DNS configuration */
struct in_addr_data *dns;

View File

@ -0,0 +1,408 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2020 VMware, Inc. */
#include "alloc-util.h"
#include "netlink-util.h"
#include "networkd-manager.h"
#include "networkd-sriov.h"
#include "parse-util.h"
#include "set.h"
#include "string-util.h"
static int sr_iov_new(SRIOV **ret) {
SRIOV *sr_iov;
sr_iov = new(SRIOV, 1);
if (!sr_iov)
return -ENOMEM;
*sr_iov = (SRIOV) {
.vf = (uint32_t) -1,
.vf_spoof_check_setting = -1,
.trust = -1,
.query_rss = -1,
.link_state = _SR_IOV_LINK_STATE_INVALID,
};
*ret = TAKE_PTR(sr_iov);
return 0;
}
static int sr_iov_new_static(Network *network, const char *filename, unsigned section_line, SRIOV **ret) {
_cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
_cleanup_(sr_iov_freep) SRIOV *sr_iov = NULL;
SRIOV *existing = NULL;
int r;
assert(network);
assert(ret);
assert(filename);
assert(section_line > 0);
r = network_config_section_new(filename, section_line, &n);
if (r < 0)
return r;
existing = ordered_hashmap_get(network->sr_iov_by_section, n);
if (existing) {
*ret = existing;
return 0;
}
r = sr_iov_new(&sr_iov);
if (r < 0)
return r;
sr_iov->network = network;
sr_iov->section = TAKE_PTR(n);
r = ordered_hashmap_ensure_allocated(&network->sr_iov_by_section, &network_config_hash_ops);
if (r < 0)
return r;
r = ordered_hashmap_put(network->sr_iov_by_section, sr_iov->section, sr_iov);
if (r < 0)
return r;
*ret = TAKE_PTR(sr_iov);
return 0;
}
SRIOV *sr_iov_free(SRIOV *sr_iov) {
if (!sr_iov)
return NULL;
if (sr_iov->network && sr_iov->section)
ordered_hashmap_remove(sr_iov->network->sr_iov_by_section, sr_iov->section);
network_config_section_free(sr_iov->section);
return mfree(sr_iov);
}
static int sr_iov_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
int r;
assert(link);
assert(link->sr_iov_messages > 0);
link->sr_iov_messages--;
if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
return 1;
r = sd_netlink_message_get_errno(m);
if (r < 0 && r != -EEXIST) {
log_link_message_error_errno(link, m, r, "Could not set up SR-IOV");
link_enter_failed(link);
return 1;
}
if (link->sr_iov_messages == 0) {
log_link_debug(link, "SR-IOV configured");
link->sr_iov_configured = true;
link_check_ready(link);
}
return 1;
}
int sr_iov_configure(Link *link, SRIOV *sr_iov) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
int r;
assert(link);
assert(link->manager);
assert(link->manager->rtnl);
assert(link->ifindex > 0);
log_link_debug(link, "Setting SR-IOV virtual function %"PRIu32, sr_iov->vf);
r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
if (r < 0)
return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
r = sd_netlink_message_open_container(req, IFLA_VFINFO_LIST);
if (r < 0)
return log_link_error_errno(link, r, "Could not open IFLA_VFINFO_LIST container: %m");
r = sd_netlink_message_open_container(req, IFLA_VF_INFO);
if (r < 0)
return log_link_error_errno(link, r, "Could not open IFLA_VF_INFO container: %m");
if (sr_iov->vf_spoof_check_setting >= 0) {
struct ifla_vf_spoofchk ivs = {
.vf = sr_iov->vf,
.setting = sr_iov->vf_spoof_check_setting,
};
r = sd_netlink_message_append_data(req, IFLA_VF_SPOOFCHK, &ivs, sizeof(struct ifla_vf_spoofchk));
if (r < 0)
return log_link_error_errno(link, r, "Could not append IFLA_VF_SPOOFCHK: %m");
}
if (sr_iov->query_rss >= 0) {
struct ifla_vf_rss_query_en ivs = {
.vf = sr_iov->vf,
.setting = sr_iov->query_rss,
};
r = sd_netlink_message_append_data(req, IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(struct ifla_vf_rss_query_en));
if (r < 0)
return log_link_error_errno(link, r, "Could not append IFLA_VF_RSS_QUERY_EN: %m");
}
if (sr_iov->trust >= 0) {
struct ifla_vf_trust ivt = {
.vf = sr_iov->vf,
.setting = sr_iov->trust,
};
r = sd_netlink_message_append_data(req, IFLA_VF_TRUST, &ivt, sizeof(struct ifla_vf_trust));
if (r < 0)
return log_link_error_errno(link, r, "Could not append IFLA_VF_TRUST: %m");
}
if (sr_iov->link_state >= 0) {
struct ifla_vf_link_state ivl = {
.vf = sr_iov->vf,
.link_state = sr_iov->link_state,
};
r = sd_netlink_message_append_data(req, IFLA_VF_LINK_STATE, &ivl, sizeof(struct ifla_vf_link_state));
if (r < 0)
return log_link_error_errno(link, r, "Could not append IFLA_VF_LINK_STATE: %m");
}
if (sr_iov->vlan > 0) {
/* Because of padding, first the buffer must be initialized with 0. */
struct ifla_vf_vlan_info ivvi = {};
ivvi.vf = sr_iov->vf;
ivvi.vlan = sr_iov->vlan;
ivvi.qos = sr_iov->qos;
ivvi.vlan_proto = htobe16(ETH_P_8021Q);
r = sd_netlink_message_open_container(req, IFLA_VF_VLAN_LIST);
if (r < 0)
return log_link_error_errno(link, r, "Could not open IFLA_VF_VLAN_LIST container: %m");
r = sd_netlink_message_append_data(req, IFLA_VF_VLAN_INFO, &ivvi, sizeof(struct ifla_vf_vlan_info));
if (r < 0)
return log_link_error_errno(link, r, "Could not append IFLA_VF_VLAN_INFO: %m");
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_link_error_errno(link, r, "Could not close IFLA_VF_VLAN_LIST container: %m");
}
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_link_error_errno(link, r, "Could not close IFLA_VF_INFO container: %m");
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_link_error_errno(link, r, "Could not close IFLA_VFINFO_LIST container: %m");
r = netlink_call_async(link->manager->rtnl, NULL, req, sr_iov_handler,
link_netlink_destroy_callback, link);
if (r < 0)
return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
link_ref(link);
link->sr_iov_messages++;
return 0;
}
int sr_iov_section_verify(SRIOV *sr_iov) {
assert(sr_iov);
if (section_is_invalid(sr_iov->section))
return -EINVAL;
if (sr_iov->vf == (uint32_t) -1)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: [SRIOV] section without VirtualFunction= field configured. "
"Ignoring [SRIOV] section from line %u.",
sr_iov->section->filename, sr_iov->section->line);
return 0;
}
int config_parse_sr_iov_uint32(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
Network *network = data;
uint32_t k;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = sr_iov_new_static(network, filename, section_line, &sr_iov);
if (r < 0)
return r;
if (isempty(rvalue)) {
if (streq(lvalue, "VirtualFunction"))
sr_iov->vf = (uint32_t) -1;
else if (streq(lvalue, "VLANId"))
sr_iov->vlan = 0;
else if (streq(lvalue, "QualityOfService"))
sr_iov->qos = 0;
else
assert_not_reached("Invalid lvalue");
TAKE_PTR(sr_iov);
return 0;
}
r = safe_atou32(rvalue, &k);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
return 0;
}
if (streq(lvalue, "VLANId")) {
if (k == 0 || k > 4095) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid SR-IOV VLANId: %d", k);
return 0;
}
sr_iov->vlan = k;
} else if (streq(lvalue, "VirtualFunction")) {
if (k >= INT_MAX) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid SR-IOV virtual function: %d", k);
return 0;
}
sr_iov->vf = k;
} else if (streq(lvalue, "QualityOfService"))
sr_iov->qos = k;
else
assert_not_reached("Invalid lvalue");
TAKE_PTR(sr_iov);
return 0;
}
int config_parse_sr_iov_link_state(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
Network *network = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = sr_iov_new_static(network, filename, section_line, &sr_iov);
if (r < 0)
return r;
/* Unfortunately, SR_IOV_LINK_STATE_DISABLE is 2, not 0. So, we cannot use
* DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN() macro. */
if (isempty(rvalue)) {
sr_iov->link_state = _SR_IOV_LINK_STATE_INVALID;
TAKE_PTR(sr_iov);
return 0;
}
if (streq(rvalue, "auto")) {
sr_iov->link_state = SR_IOV_LINK_STATE_AUTO;
TAKE_PTR(sr_iov);
return 0;
}
r = parse_boolean(rvalue);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
return 0;
}
sr_iov->link_state = r ? SR_IOV_LINK_STATE_ENABLE : SR_IOV_LINK_STATE_DISABLE;
TAKE_PTR(sr_iov);
return 0;
}
int config_parse_sr_iov_boolean(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
Network *network = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = sr_iov_new_static(network, filename, section_line, &sr_iov);
if (r < 0)
return r;
if (isempty(rvalue)) {
if (streq(lvalue, "MACSpoofCheck"))
sr_iov->vf_spoof_check_setting = -1;
else if (streq(lvalue, "QueryReceiveSideScaling"))
sr_iov->query_rss = -1;
else if (streq(lvalue, "Trust"))
sr_iov->trust = -1;
else
assert_not_reached("Invalid lvalue");
TAKE_PTR(sr_iov);
return 0;
}
r = parse_boolean(rvalue);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse '%s=', ignoring: %s", lvalue, rvalue);
return 0;
}
if (streq(lvalue, "MACSpoofCheck"))
sr_iov->vf_spoof_check_setting = r;
else if (streq(lvalue, "QueryReceiveSideScaling"))
sr_iov->query_rss = r;
else if (streq(lvalue, "Trust"))
sr_iov->trust = r;
else
assert_not_reached("Invalid lvalue");
TAKE_PTR(sr_iov);
return 0;
}

View File

@ -0,0 +1,42 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2020 VMware, Inc. */
#pragma once
#include <linux/if_link.h>
#include "conf-parser.h"
#include "networkd-link.h"
#include "networkd-network.h"
#include "networkd-util.h"
typedef enum SRIOVLinkState {
SR_IOV_LINK_STATE_AUTO = IFLA_VF_LINK_STATE_AUTO,
SR_IOV_LINK_STATE_ENABLE = IFLA_VF_LINK_STATE_ENABLE,
SR_IOV_LINK_STATE_DISABLE = IFLA_VF_LINK_STATE_DISABLE,
_SR_IOV_LINK_STATE_MAX,
_SR_IOV_LINK_STATE_INVALID = -1,
} SRIOVLinkState;
typedef struct SRIOV {
NetworkConfigSection *section;
Network *network;
uint32_t vf; /* 0 - 2147483646 */
uint32_t vlan; /* 0 - 4095, 0 disables VLAN filter */
uint32_t qos;
int vf_spoof_check_setting;
int query_rss;
int trust;
SRIOVLinkState link_state;
} SRIOV;
SRIOV *sr_iov_free(SRIOV *sr_iov);
int sr_iov_configure(Link *link, SRIOV *sr_iov);
int sr_iov_section_verify(SRIOV *sr_iov);
DEFINE_NETWORK_SECTION_FUNCTIONS(SRIOV, sr_iov_free);
CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_uint32);
CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_boolean);
CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_link_state);

View File

@ -38,6 +38,14 @@ MTUBytes=
Multicast=
MACAddress=
Group=
[SR-IOV]
VirtualFunction=
MACSpoofCheck=
VLANId=
QualityOfService=
QueryReceiveSideScaling=
Trust=
LinkState=
[BridgeFDB]
VLANId=
MACAddress=