Compare commits

...

5 Commits

Author SHA1 Message Date
Félix Baylac-Jacqué 5158dcab0a
Some notes 2022-10-04 11:34:43 +02:00
Aurelien Jarno 7e8283170c x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations
The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
instruction which belongs to the BMI1 CPU feature and the 'shrx'
instruction, which belongs to the BMI2 CPU feature.

Fixes: df7e295d18 ("x86: Optimize {str|wcs}rchr-avx2")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
2022-10-03 23:46:11 +02:00
Aurelien Jarno 3c0c78afab x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation
The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.

Fixes: af5306a735 ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
2022-10-03 23:46:11 +02:00
Aurelien Jarno e3e7fab7fe x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations
The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
and 'sarx' instructions, which belongs to the BMI2 CPU feature.

Fixes: acfd088a19 ("x86: Optimize memchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
2022-10-03 23:46:11 +02:00
Aurelien Jarno f31a5a884e x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e2 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
2022-10-03 23:46:11 +02:00
8 changed files with 49 additions and 10 deletions

View File

@ -1163,6 +1163,7 @@ request from '%s' [%ld] not handled due to missing permission"),
dbg_log ("\t%s", serv2str[req->type]);
}
// HERE: request handler
/* Handle the request. */
switch (req->type)
{

View File

@ -60,6 +60,7 @@ const struct iovec hst_iov_disabled =
};
// HERE: interesting test cases
/* This is the standard reply in case we haven't found the dataset. */
static const hst_response_header notfound =
{
@ -269,6 +270,7 @@ cache_addhst (struct database_dyn *db, int fd, request_header *req,
cp = dataset->strdata;
// Payload????? Not sure :/
cp = mempcpy (cp, hst->h_name, h_name_len);
cp = mempcpy (cp, h_aliases_len, h_aliases_cnt * sizeof (uint32_t));
@ -424,6 +426,7 @@ lookup (int type, void *key, struct hostent *resultbufp, char *buffer,
}
// HERE: top-level hst imp
static time_t
addhstbyX (struct database_dyn *db, int fd, request_header *req,
void *key, uid_t uid, struct hashentry *he, struct datahead *dh)

View File

@ -114,6 +114,7 @@ typedef struct
} gr_response_header;
// HERE: header struct
/* Structure sent in reply to host query. Note that this struct is
sent also if the service is disabled or there is no record found. */
typedef struct

View File

@ -129,6 +129,7 @@ __nscd_get_nl_timestamp (void)
int __nss_have_localdomain attribute_hidden;
//HERE impl. Client!?!? It reads from the socket to **result
static int
nscd_gethst_r (const char *key, size_t keylen, request_type type,
struct hostent *resultbuf, char *buffer, size_t buflen,
@ -214,6 +215,7 @@ nscd_gethst_r (const char *key, size_t keylen, request_type type,
if (h_name == NULL)
{
// Read here
sock = __nscd_open_socket (key, keylen, type, &hst_resp,
sizeof (hst_resp));
if (sock == -1)

View File

@ -94,6 +94,7 @@ extern const char *hstrerror (int __err_num) __THROW;
#endif
// HERE: payload???
/* Description of data base entry for a single host. */
struct hostent
{

View File

@ -79,7 +79,9 @@
/* ISA level >= 3 guaranteed includes. */
#define AVX_X86_ISA_LEVEL 3
#define AVX2_X86_ISA_LEVEL 3
#define BMI1_X86_ISA_LEVEL 3
#define BMI2_X86_ISA_LEVEL 3
#define LZCNT_X86_ISA_LEVEL 3
#define MOVBE_X86_ISA_LEVEL 3
/* ISA level >= 2 guaranteed includes. */

View File

@ -36,7 +36,9 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{

View File

@ -69,10 +69,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__memchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -207,13 +209,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, memrchr,
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (LZCNT)),
__memrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (LZCNT)),
__memrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (LZCNT)
&& CPU_FEATURE_USABLE (RTM)),
__memrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -335,10 +343,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__rawmemchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -568,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strrchr,
X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)),
__strrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)),
__strrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -787,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI1)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -810,10 +831,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wcscmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcscmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcscmp_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@ -830,10 +853,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsncmp_avx2_rtm)
/* ISA V2 wrapper for GENERIC implementation because the
@ -923,10 +948,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex_rtm)
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
CPU_FEATURE_USABLE (AVX2),
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX2)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wmemchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2