Skip to content

Commit 17ffb65

Browse files
author
Jeff Hammond
authored
detect AVX-512 FMA count (#125)
* add Ice Lake Server and Sapphire Rapids models The information contained in this commit was obtained from "Intel® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com> * Tiger Lake; Ice Lake NNP-I; SPR string Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * second FMA features - incomplete and wrong Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * oops: use T/F not 2/1 Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com> * implement SKX lookup Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * add Intel copyright * cleanup AVX512 second FMA code 1) remove debug stuff 2) remove ICX - will add details when available Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * fix CPX detection Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * remove elses Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * remove curly braces from single-line conditional bodies Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * apply clang-format Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> Fixes #120
1 parent 76dafc7 commit 17ffb65

File tree

2 files changed

+63
-14
lines changed

2 files changed

+63
-14
lines changed

include/cpuinfo_x86.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,11 @@ typedef struct {
6969
int avx512bitalg : 1;
7070
int avx512vpopcntdq : 1;
7171
int avx512_4vnniw : 1;
72+
int avx512_4vbmi2 : 1;
73+
int avx512_second_fma : 1;
7274
int avx512_4fmaps : 1;
7375
int avx512_bf16 : 1;
7476
int avx512_vp2intersect : 1;
75-
7677
int amx_bf16 : 1;
7778
int amx_tile : 1;
7879
int amx_int8 : 1;
@@ -194,6 +195,8 @@ typedef enum {
194195
X86_AVX512BITALG,
195196
X86_AVX512VPOPCNTDQ,
196197
X86_AVX512_4VNNIW,
198+
X86_AVX512_4VBMI2,
199+
X86_AVX512_SECOND_FMA,
197200
X86_AVX512_4FMAPS,
198201
X86_AVX512_BF16,
199202
X86_AVX512_VP2INTERSECT,

src/cpuinfo_x86.c

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414
// limitations under the License.
1515

1616
#include "cpuinfo_x86.h"
17-
#include "internal/bit_utils.h"
18-
#include "internal/cpuid_x86.h"
1917

2018
#include <stdbool.h>
2119
#include <string.h>
2220

21+
#include "internal/bit_utils.h"
22+
#include "internal/cpuid_x86.h"
23+
2324
#if !defined(CPU_FEATURES_ARCH_X86)
2425
#error "Cannot compile cpuinfo_x86 on a non x86 platform."
2526
#endif
@@ -125,6 +126,35 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) {
125126
MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
126127
}
127128

129+
static bool HasSecondFMA(uint32_t model) {
130+
// Skylake server
131+
if (model == 0x55) {
132+
char proc_name[49] = {0};
133+
FillX86BrandString(proc_name);
134+
// detect Xeon
135+
if (proc_name[9] == 'X') {
136+
// detect Silver or Bronze
137+
if (proc_name[17] == 'S' || proc_name[17] == 'B') return false;
138+
// detect Gold 5_20 and below, except for Gold 53__
139+
if (proc_name[17] == 'G' && proc_name[22] == '5')
140+
return ((proc_name[23] == '3') ||
141+
(proc_name[24] == '2' && proc_name[25] == '2'));
142+
// detect Xeon W 210x
143+
if (proc_name[17] == 'W' && proc_name[21] == '0') return false;
144+
// detect Xeon D 2xxx
145+
if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1')
146+
return false;
147+
}
148+
return true;
149+
}
150+
// Cannon Lake client
151+
if (model == 0x66) return false;
152+
// Ice Lake client
153+
if (model == 0x7d || model == 0x7e) return false;
154+
// This is the right default...
155+
return true;
156+
}
157+
128158
static void SetVendor(const Leaf leaf, char* const vendor) {
129159
*(uint32_t*)(vendor) = leaf.ebx;
130160
*(uint32_t*)(vendor + 4) = leaf.edx;
@@ -1059,7 +1089,8 @@ typedef struct {
10591089
} OsSupport;
10601090

10611091
// Reference https://en.wikipedia.org/wiki/CPUID.
1062-
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) {
1092+
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info,
1093+
OsSupport* os_support) {
10631094
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
10641095
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
10651096
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
@@ -1141,6 +1172,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
11411172
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
11421173
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
11431174
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
1175+
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
1176+
features->avx512_second_fma = HasSecondFMA(info->model);
11441177
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
11451178
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
11461179
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
@@ -1153,7 +1186,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
11531186
}
11541187
}
11551188

1156-
// Reference https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
1189+
// Reference
1190+
// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
11571191
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) {
11581192
const Leaf leaf_80000000 = CpuId(0x80000000);
11591193
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
@@ -1265,11 +1299,11 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
12651299
case CPUID(0x06, 0x66):
12661300
// https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture)
12671301
return INTEL_CNL;
1268-
case CPUID(0x06, 0x7D): // client
1269-
case CPUID(0x06, 0x7E): // client
1270-
case CPUID(0x06, 0x9D): // NNP-I
1271-
case CPUID(0x06, 0x6A): // server
1272-
case CPUID(0x06, 0x6C): // server
1302+
case CPUID(0x06, 0x7D): // client
1303+
case CPUID(0x06, 0x7E): // client
1304+
case CPUID(0x06, 0x9D): // NNP-I
1305+
case CPUID(0x06, 0x6A): // server
1306+
case CPUID(0x06, 0x6C): // server
12731307
// https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor)
12741308
return INTEL_ICL;
12751309
case CPUID(0x06, 0x8C):
@@ -1281,10 +1315,14 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
12811315
return INTEL_SPR;
12821316
case CPUID(0x06, 0x8E):
12831317
switch (info->stepping) {
1284-
case 9: return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
1285-
case 10: return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
1286-
case 11: return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
1287-
default: return X86_UNKNOWN;
1318+
case 9:
1319+
return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
1320+
case 10:
1321+
return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
1322+
case 11:
1323+
return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
1324+
default:
1325+
return X86_UNKNOWN;
12881326
}
12891327
case CPUID(0x06, 0x9E):
12901328
if (info->stepping > 9) {
@@ -1427,6 +1465,10 @@ int GetX86FeaturesEnumValue(const X86Features* features,
14271465
return features->avx512vpopcntdq;
14281466
case X86_AVX512_4VNNIW:
14291467
return features->avx512_4vnniw;
1468+
case X86_AVX512_4VBMI2:
1469+
return features->avx512_4vbmi2;
1470+
case X86_AVX512_SECOND_FMA:
1471+
return features->avx512_second_fma;
14301472
case X86_AVX512_4FMAPS:
14311473
return features->avx512_4fmaps;
14321474
case X86_AVX512_BF16:
@@ -1551,6 +1593,10 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
15511593
return "avx512vpopcntdq";
15521594
case X86_AVX512_4VNNIW:
15531595
return "avx512_4vnniw";
1596+
case X86_AVX512_4VBMI2:
1597+
return "avx512_4vbmi2";
1598+
case X86_AVX512_SECOND_FMA:
1599+
return "avx512_second_fma";
15541600
case X86_AVX512_4FMAPS:
15551601
return "avx512_4fmaps";
15561602
case X86_AVX512_BF16:

0 commit comments

Comments
 (0)