diff --git a/src/common/cpu.h b/src/common/cpu.h index 265a411f..ed08fc75 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -35,6 +35,12 @@ enum { HV_VENDOR_INVALID }; +enum { + CORE_TYPE_EFFICIENCY, + CORE_TYPE_PERFORMANCE, + CORE_TYPE_UNKNOWN +}; + #define UNKNOWN_DATA -1 #define CPU_NAME_MAX_LENGTH 64 @@ -78,6 +84,7 @@ struct topology { uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled) #ifdef ARCH_X86 uint32_t smt_available; // Number of SMT that is currently enabled + int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL) struct apic* apic; #endif #endif @@ -131,6 +138,10 @@ struct cpuInfo { uint32_t maxExtendedLevels; // Topology Extensions (AMD only) bool topology_extensions; + // Hybrid Flag (Intel only) + bool hybrid_flag; + // Core Type (P/E) + uint32_t core_type; #elif ARCH_PPC uint32_t pvr; #elif ARCH_ARM @@ -140,11 +151,18 @@ struct cpuInfo { #ifdef ARCH_ARM struct system_on_chip* soc; +#endif + +#if defined(ARCH_X86) || defined(ARCH_ARM) // If SoC contains more than one CPU and they // are different, the others will be stored in // the next_cpu field - struct cpuInfo* next_cpu; + struct cpuInfo* next_cpu; uint8_t num_cpus; +#ifdef ARCH_X86 + // The index of the first core in the module + uint32_t first_core_id; +#endif #endif }; diff --git a/src/common/printer.c b/src/common/printer.c index bb0c6724..b0b8a0da 100644 --- a/src/common/printer.c +++ b/src/common/printer.c @@ -44,6 +44,8 @@ enum { ATTRIBUTE_NAME, #elif ARCH_ARM ATTRIBUTE_SOC, +#endif +#if defined(ARCH_X86) || defined(ARCH_ARM) ATTRIBUTE_CPU_NUM, #endif ATTRIBUTE_HYPERVISOR, @@ -75,6 +77,8 @@ static const char* ATTRIBUTE_FIELDS [] = { "Part Number:", #elif ARCH_ARM "SoC:", +#endif +#if defined(ARCH_X86) || defined(ARCH_ARM) "", #endif "Hypervisor:", @@ -106,6 +110,8 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = { "P/N:", #elif ARCH_ARM "SoC:", +#endif +#if defined(ARCH_X86) || defined(ARCH_ARM) "", #endif "Hypervisor:", @@ -424,11 +430,12 @@ uint32_t longest_field_length(struct ascii* art, int la) { } #if defined(ARCH_X86) || defined(ARCH_PPC) -void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields) { +void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields, bool hybrid_architecture) { struct ascii_logo* logo = art->art; int attr_to_print = 0; int attr_type; char* attr_value; + int32_t beg_space; int32_t space_right; int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2; int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up; @@ -439,6 +446,7 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE); lbuf->pos = 0; lbuf->chars = 0; + bool add_space = false; printf("\n"); for(int32_t n=0; n < iters; n++) { @@ -473,9 +481,24 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch attr_value = art->attributes[attr_to_print]->value; attr_to_print++; - space_right = 1 + (la - strlen(attribute_fields[attr_type])); - printOut(lbuf, strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value), - "%s%s%s%*s%s%s%s", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset); + if(attr_type == ATTRIBUTE_L3) { + add_space = false; + } + if(attr_type == ATTRIBUTE_CPU_NUM) { + printOut(lbuf, strlen(attr_value), "%s%s%s", logo->color_text[0], attr_value, art->reset); + add_space = true; + } + else { + beg_space = 0; + space_right = 2 + 1 + (la - strlen(attribute_fields[attr_type])); + if(hybrid_architecture && add_space) { + beg_space = 2; + space_right -= 2; + } + + printOut(lbuf, beg_space + strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value), + "%*s%s%s%s%*s%s%s%s", beg_space, "", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset); + } } printOutLine(lbuf, art, termw); printf("\n"); @@ -501,57 +524,71 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct art->new_intel_logo = choose_new_intel_logo(cpu); - // Step 1. Retrieve attributes (if some structures are NULL, like topo - // or cache, do not try to retrieve them) uint32_t socket_num = 1; char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets; l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL; - char* uarch = get_str_uarch(cpu); - char* manufacturing_process = get_str_process(cpu); - char* max_frequency = get_str_freq(cpu->freq); char* cpu_name = get_str_cpu_name(cpu, fcpuname); - char* avx = get_str_avx(cpu); - char* fma = get_str_fma(cpu); + char* uarch = get_str_uarch(cpu); char* pp = get_str_peak_performance(cpu->peak_performance); - - if(cpu->topo != NULL) { - sockets = get_str_sockets(cpu->topo); - n_cores = get_str_topology(cpu, cpu->topo, false); - n_cores_dual = get_str_topology(cpu, cpu->topo, true); - } + char* manufacturing_process = get_str_process(cpu); + bool hybrid_architecture = cpu->next_cpu != NULL; if(cpu->cach != NULL) { - l1i = get_str_l1i(cpu->cach); - l1d = get_str_l1d(cpu->cach); - l2 = get_str_l2(cpu->cach); l3 = get_str_l3(cpu->cach); } - // Step 2. Set attributes setAttribute(art, ATTRIBUTE_NAME, cpu_name); if(cpu->hv->present) { setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name); } setAttribute(art, ATTRIBUTE_UARCH, uarch); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); - setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); - if(cpu->topo != NULL) { - socket_num = get_nsockets(cpu->topo); - if (socket_num > 1) { - setAttribute(art, ATTRIBUTE_SOCKETS, sockets); - setAttribute(art, ATTRIBUTE_NCORES, n_cores); - setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual); + + struct cpuInfo* ptr = cpu; + for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { + char* max_frequency = get_str_freq(ptr->freq); + char* avx = get_str_avx(ptr); + char* fma = get_str_fma(ptr); + char* cpu_num = emalloc(sizeof(char) * 9); + + if(ptr->topo != NULL) { + sockets = get_str_sockets(ptr->topo); + n_cores = get_str_topology(ptr, ptr->topo, false); + n_cores_dual = get_str_topology(ptr, ptr->topo, true); } - else { - setAttribute(art, ATTRIBUTE_NCORES, n_cores); + + if(ptr->cach != NULL) { + l1i = get_str_l1i(ptr->cach); + l1d = get_str_l1d(ptr->cach); + l2 = get_str_l2(ptr->cach); + } + + if(hybrid_architecture) { + if(ptr->core_type == CORE_TYPE_EFFICIENCY) sprintf(cpu_num, "E-cores:"); + else if(ptr->core_type == CORE_TYPE_PERFORMANCE) sprintf(cpu_num, "P-cores:"); + else printBug("Found invalid core type!\n"); + + setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num); } + setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); + if(ptr->topo != NULL) { + socket_num = get_nsockets(ptr->topo); + if (socket_num > 1) { + setAttribute(art, ATTRIBUTE_SOCKETS, sockets); + setAttribute(art, ATTRIBUTE_NCORES, n_cores); + setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual); + } + else { + setAttribute(art, ATTRIBUTE_NCORES, n_cores); + } + } + setAttribute(art, ATTRIBUTE_AVX, avx); + setAttribute(art, ATTRIBUTE_FMA, fma); + if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i); + if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d); + if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2); } - setAttribute(art, ATTRIBUTE_AVX, avx); - setAttribute(art, ATTRIBUTE_FMA, fma); - if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i); - if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d); - if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2); if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3); setAttribute(art, ATTRIBUTE_PEAK, pp); @@ -568,15 +605,12 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct longest_attribute = longest_attribute_length(art, attribute_fields); } - print_ascii_generic(art, longest_attribute, term->w, attribute_fields); + print_ascii_generic(art, longest_attribute, term->w, attribute_fields, hybrid_architecture); free(manufacturing_process); - free(max_frequency); free(sockets); free(n_cores); free(n_cores_dual); - free(avx); - free(fma); free(l1i); free(l1d); free(l2); diff --git a/src/x86/apic.c b/src/x86/apic.c index 3e7a6d85..2d9ecf72 100644 --- a/src/x86/apic.c +++ b/src/x86/apic.c @@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) { } #endif +int get_total_cores_module(int total_cores, int module) { + int total_modules = 2; + int32_t current_module_idx = -1; + bool end = false; + int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules); + for(int i=0; i < total_modules; i++) core_types[i] = -1; + int cores_in_module = 0; + int i = 0; + + // Get the original mask to restore it later + cpu_set_t original_mask; + if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) { + printWarn("sched_getaffinity: %s", strerror(errno)); + return false; + } + + while(!end) { + if(!bind_to_cpu(i)) { + return -1; + } + uint32_t eax = 0x0000001A; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + int32_t core_type = eax >> 24 & 0xFF; + bool found = false; + + for(int j=0; j < total_modules && !found; j++) { + if(core_types[j] == core_type) found = true; + } + if(!found) { + current_module_idx++; + core_types[current_module_idx] = core_type; + } + if(current_module_idx == module) { + cores_in_module++; + if(i+1 == total_cores) end = true; + } + else if(cores_in_module > 0) end = true; + i++; + } + + // Reset the original affinity + if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) { + printWarn("sched_setaffinity: %s", strerror(errno)); + return false; + } + + //printf("Module %d has %d cores\n", module, cores_in_module); + return cores_in_module; +} + bool fill_topo_masks_apic(struct topology* topo) { uint32_t eax = 0x00000001; uint32_t ebx = 0; @@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) { uint32_t max = 0; for(int i=0; i < topo->cach->max_cache_level; i++) { - for(int j=0; j < topo->total_cores; j++) { + for(int j=0; j < topo->total_cores_module; j++) { if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i]; } } max++; - if(max > (uint32_t) topo->total_cores) return max; - return topo->total_cores; + if(max > (uint32_t) topo->total_cores_module) return max; + return topo->total_cores_module; } bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) { @@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac memset(apic_id, 0, sizeof(uint32_t) * size); // System topology - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { sockets[apic_pkg[i]] = 1; smt[apic_smt[i]] = 1; } - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { if(sockets[i] != 0) topo->sockets++; if(smt[i] != 0) topo->smt_available++; } - topo->logical_cores = topo->total_cores / topo->sockets; + topo->logical_cores = topo->total_cores_module / topo->sockets; topo->physical_cores = topo->logical_cores / topo->smt_available; // Cache topology @@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac num_caches = 0; memset(apic_id, 0, sizeof(uint32_t) * size); - for(int c=0; c < topo->total_cores; c++) { + for(int c=0; c < topo->total_cores_module; c++) { apic_id[cache_id_apic[c][i]]++; } for(uint32_t c=0; c < size; c++) { @@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) { } } -bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { +bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) { #ifdef __APPLE__ // macOS extremely dirty approach... printf("cpufetch is computing APIC IDs, please wait...\n"); @@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { } #endif - for(int i=0; i < n; i++) { + for(int i=first_core; i < first_core+n; i++) { if(!bind_to_cpu(i)) { printErr("Failed binding the process to CPU %d", i); return false; } - apic_ids[i] = get_apic_id(x2apic_id); + apic_ids[i-first_core] = get_apic_id(x2apic_id); } #ifdef __linux__ @@ -344,12 +397,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { uint32_t apic_id; - uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); - uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); + uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module); + uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module); bool x2apic_id; if(cpu->maxLevels >= 0x0000000B) { @@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { x2apic_id = false; } - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); } @@ -385,10 +438,10 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { get_cache_topology_from_apic(topo); - if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id)) + if(!fill_apic_ids(apic_ids, cpu->first_core_id, topo->total_cores_module, x2apic_id)) return false; - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { apic_id = apic_ids[i]; apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift; @@ -404,20 +457,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { /* DEBUG for(int i=0; i < topo->cach->max_cache_level; i++) { printf("[CACH %1d]", i); - for(int j=0; j < topo->total_cores; j++) + for(int j=0; j < topo->total_cores_module; j++) printf("[%03d]", cache_id_apic[j][i]); printf("\n"); } - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_pkg[i]); printf("\n"); - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_core[i]); printf("\n"); - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/ - bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); // Assumption: If we cant get smt_available, we assume it is equal to smt_supported... @@ -429,7 +481,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { free(apic_pkg); free(apic_core); free(apic_smt); - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { free(cache_smt_id_apic[i]); free(cache_id_apic[i]); } diff --git a/src/x86/apic.h b/src/x86/apic.h index 1b183b41..98b63371 100644 --- a/src/x86/apic.h +++ b/src/x86/apic.h @@ -21,4 +21,6 @@ uint32_t is_smt_enabled_amd(struct topology* topo); bool bind_to_cpu(int cpu_id); #endif +int get_total_cores_module(int total_cores, int module); + #endif diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index d0b7eb14..bdc397d0 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) { return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping); } -int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) { +int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) { /* * PP = PeakPerformance * SP = SinglePrecision @@ -192,46 +192,56 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t * 16(If AVX512), 8(If AVX), 4(If SSE) * */ - int64_t freq; -#ifdef __linux__ - if(accurate_pp) - freq = measure_frequency(cpu); - else - freq = max_freq; -#else - // Silence compiler warning - (void)(accurate_pp); - freq = max_freq; -#endif + struct cpuInfo* ptr = cpu; + int64_t total_flops = 0; - //First, check we have consistent data - if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { - return -1; - } + for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { + struct topology* topo = ptr->topo; + int64_t max_freq = get_freq(ptr->freq); - struct features* feat = cpu->feat; - int vpus = get_number_of_vpus(cpu); - int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; - - if(feat->FMA3 || feat->FMA4) - flops = flops*2; + int64_t freq; + #ifdef __linux__ + if(accurate_pp) + freq = measure_frequency(ptr); + else + freq = max_freq; + #else + // Silence compiler warning + (void)(accurate_pp); + freq = max_freq; + #endif - // Ice Lake has AVX512, but it has 1 VPU for AVX512, while - // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing - // the peak performance supposing AVX2, not AVX512 - if(feat->AVX512 && vpus_are_AVX512(cpu)) - flops = flops*16; - else if(feat->AVX || feat->AVX2) - flops = flops*8; - else if(feat->SSE) - flops = flops*4; + //First, check we have consistent data + if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { + return -1; + } - // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- - // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ - if(is_knights_landing(cpu)) - flops = flops * 6 / 7; + struct features* feat = ptr->feat; + int vpus = get_number_of_vpus(ptr); + int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; + + if(feat->FMA3 || feat->FMA4) + flops = flops*2; + + // Ice Lake has AVX512, but it has 1 VPU for AVX512, while + // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing + // the peak performance supposing AVX2, not AVX512 + if(feat->AVX512 && vpus_are_AVX512(ptr)) + flops = flops*16; + else if(feat->AVX || feat->AVX2) + flops = flops*8; + else if(feat->SSE) + flops = flops*4; + + // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- + // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ + if(is_knights_landing(ptr)) + flops = flops * 6 / 7; + + total_flops += flops; + } - return flops; + return total_flops; } struct hypervisor* get_hp_info(bool hv_present) { @@ -274,51 +284,19 @@ struct hypervisor* get_hp_info(bool hv_present) { return hv; } -struct cpuInfo* get_cpu_info() { - struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo)); - struct features* feat = emalloc(sizeof(struct features)); - cpu->feat = feat; - cpu->peak_performance = -1; - cpu->topo = NULL; - cpu->cach = NULL; - - bool *ptr = &(feat->AES); - for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) { - *ptr = false; - } - +struct features* get_features_info(struct cpuInfo* cpu) { uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; uint32_t edx = 0; - //Get max cpuid level - cpuid(&eax, &ebx, &ecx, &edx); - cpu->maxLevels = eax; - - //Fill vendor - char name[13]; - memset(name,0,13); - get_name_cpuid(name, ebx, edx, ecx); + struct features* feat = emalloc(sizeof(struct features)); - if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0) - cpu->cpu_vendor = CPU_VENDOR_INTEL; - else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0) - cpu->cpu_vendor = CPU_VENDOR_AMD; - else { - cpu->cpu_vendor = CPU_VENDOR_INVALID; - printErr("Unknown CPU vendor: %s", name); - return NULL; + bool *ptr = &(feat->AES); + for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) { + *ptr = false; } - //Get max extended level - eax = 0x80000000; - ebx = 0; - ecx = 0; - edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - cpu->maxExtendedLevels = eax; - //Fill instructions support if (cpu->maxLevels >= 0x00000001){ eax = 0x00000001; @@ -373,6 +351,116 @@ struct cpuInfo* get_cpu_info() { printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels); } + return feat; +} + +bool set_cpu_module(int m, int total_modules, int32_t* first_core) { + if(total_modules > 1) { + // We have a hybrid architecture. + // 1. Find the first core from module m + int32_t core_id = -1; + int32_t currrent_module_idx = -1; + int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules); + for(int i=0; i < total_modules; i++) core_types[i] = -1; + int i = 0; + + while(core_id == -1) { + if(!bind_to_cpu(i)) { + return false; + } + uint32_t eax = 0x0000001A; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + int32_t core_type = eax >> 24 & 0xFF; + bool found = false; + + for(int j=0; j < total_modules && !found; j++) { + if(core_types[j] == core_type) found = true; + } + if(!found) { + currrent_module_idx++; + core_types[currrent_module_idx] = core_type; + if(currrent_module_idx == m) { + core_id = i; + } + } + + i++; + } + + *first_core = core_id; + + //printf("Module %d: Core %d\n", m, core_id); + // 2. Now bind to that core + if(!bind_to_cpu(core_id)) { + return false; + } + } + + return true; +} + +int32_t get_core_type() { + uint32_t eax = 0x0000001A; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + + eax = 0x0000001A; + cpuid(&eax, &ebx, &ecx, &edx); + + int32_t type = eax >> 24 & 0xFF; + if(type == 0x20) return CORE_TYPE_EFFICIENCY; + else if(type == 0x40) return CORE_TYPE_PERFORMANCE; + else { + printErr("Found invalid core type: 0x%.8X\n", type); + return CORE_TYPE_UNKNOWN; + } +} + +struct cpuInfo* get_cpu_info() { + struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo)); + cpu->peak_performance = -1; + cpu->next_cpu = NULL; + cpu->topo = NULL; + cpu->cach = NULL; + cpu->feat = NULL; + + uint32_t modules = 1; + uint32_t eax = 0; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + + //Get max cpuid level + cpuid(&eax, &ebx, &ecx, &edx); + cpu->maxLevels = eax; + + //Fill vendor + char name[13]; + memset(name,0,13); + get_name_cpuid(name, ebx, edx, ecx); + + if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0) + cpu->cpu_vendor = CPU_VENDOR_INTEL; + else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0) + cpu->cpu_vendor = CPU_VENDOR_AMD; + else { + cpu->cpu_vendor = CPU_VENDOR_INVALID; + printErr("Unknown CPU vendor: %s", name); + return NULL; + } + + //Get max extended level + eax = 0x80000000; + ebx = 0; + ecx = 0; + edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + cpu->maxExtendedLevels = eax; + if (cpu->maxExtendedLevels >= 0x80000004){ cpu->cpu_name = get_str_cpu_name_internal(); } @@ -389,19 +477,66 @@ struct cpuInfo* get_cpu_info() { cpu->topology_extensions = (ecx >> 22) & 1; } - // If any field of the struct is NULL, - // return inmideately, as further functions - // require valid fields (cach, topo, etc) - cpu->arch = get_cpu_uarch(cpu); - cpu->freq = get_frequency_info(cpu); + cpu->hybrid_flag = false; + if(cpu->cpu_vendor == CPU_VENDOR_INTEL && cpu->maxLevels >= 0x00000007) { + eax = 0x00000007; + ecx = 0x00000000; + cpuid(&eax, &ebx, &ecx, &edx); + cpu->hybrid_flag = (edx >> 15) & 0x1; + } + + if(cpu->hybrid_flag) modules = 2; + + struct cpuInfo* ptr = cpu; + for(uint32_t i=0; i < modules; i++) { + int32_t first_core; + set_cpu_module(i, modules, &first_core); + + if(i > 0) { + ptr->next_cpu = emalloc(sizeof(struct cpuInfo)); + ptr = ptr->next_cpu; + ptr->next_cpu = NULL; + ptr->peak_performance = -1; + ptr->topo = NULL; + ptr->cach = NULL; + ptr->feat = NULL; + // We assume that this cores have the + // same cpuid capabilities + ptr->cpu_vendor = cpu->cpu_vendor; + ptr->maxLevels = cpu->maxLevels; + ptr->maxExtendedLevels = cpu->maxExtendedLevels; + ptr->hybrid_flag = cpu->hybrid_flag; + } - cpu->cach = get_cache_info(cpu); - if(cpu->cach == NULL) return cpu; + if(cpu->hybrid_flag) { + // Detect core type + eax = 0x0000001A; + cpuid(&eax, &ebx, &ecx, &edx); + ptr->core_type = get_core_type(); + } + ptr->first_core_id = first_core; + ptr->feat = get_features_info(ptr); - cpu->topo = get_topology_info(cpu, cpu->cach); - if(cpu->topo == NULL) return cpu; + // If any field of the struct is NULL, + // return inmideately, as further functions + // require valid fields (cach, topo, etc) + ptr->arch = get_cpu_uarch(ptr); + ptr->freq = get_frequency_info(ptr); - cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); + ptr->cach = get_cache_info(ptr); + if(ptr->cach == NULL) return cpu; + + if(cpu->hybrid_flag) { + ptr->topo = get_topology_info(ptr, ptr->cach, i); + } + else { + ptr->topo = get_topology_info(ptr, ptr->cach, -1); + } + if(cpu->topo == NULL) return cpu; + } + + cpu->num_cpus = modules; + cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); return cpu; } @@ -492,7 +627,7 @@ void get_topology_from_udev(struct topology* topo) { // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86) -struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) { struct topology* topo = emalloc(sizeof(struct topology)); init_topology_struct(topo, cach); @@ -516,6 +651,13 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { } #endif + if(cpu->hybrid_flag) { + topo->total_cores_module = get_total_cores_module(topo->total_cores, module); + } + else { + topo->total_cores_module = topo->total_cores; + } + switch(cpu->cpu_vendor) { case CPU_VENDOR_INTEL: if (cpu->maxLevels >= 0x00000004) { @@ -919,6 +1061,9 @@ void print_debug(struct cpuInfo* cpu) { if(cpu->cpu_vendor == CPU_VENDOR_AMD) { printf("- AMD topology extensions: %d\n", cpu->topology_extensions); } + if(cpu->cpu_vendor == CPU_VENDOR_INTEL) { + printf("- Hybrid Flag: %d\n", cpu->hybrid_flag); + } printf("- CPUID dump: 0x%.8X\n", eax); free_cpuinfo_struct(cpu); diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index d78517ab..3b0e21bd 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -6,7 +6,7 @@ struct cpuInfo* get_cpu_info(); struct cache* get_cache_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu); -struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach); +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module); char* get_str_avx(struct cpuInfo* cpu); char* get_str_sse(struct cpuInfo* cpu); diff --git a/src/x86/uarch.c b/src/x86/uarch.c index 491fca2a..70ba57c9 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -421,6 +421,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) { case UARCH_ICE_LAKE: case UARCH_TIGER_LAKE: + case UARCH_ALDER_LAKE: // AMD case UARCH_ZEN2: