diff --git a/src/common/cpu.h b/src/common/cpu.h
index 265a411f..ed08fc75 100644
--- a/src/common/cpu.h
+++ b/src/common/cpu.h
@@ -35,6 +35,12 @@ enum {
   HV_VENDOR_INVALID
 };
 
+enum {
+  CORE_TYPE_EFFICIENCY,
+  CORE_TYPE_PERFORMANCE,
+  CORE_TYPE_UNKNOWN
+};
+
 #define UNKNOWN_DATA -1
 #define CPU_NAME_MAX_LENGTH 64
 
@@ -78,6 +84,7 @@ struct topology {
   uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
 #ifdef ARCH_X86
   uint32_t smt_available; // Number of SMT that is currently enabled
+  int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL)
   struct apic* apic;
 #endif
 #endif
@@ -131,6 +138,10 @@ struct cpuInfo {
   uint32_t maxExtendedLevels;
   // Topology Extensions (AMD only)
   bool topology_extensions;
+  // Hybrid Flag (Intel only)
+  bool hybrid_flag;
+  // Core Type (P/E)
+  uint32_t core_type;
 #elif ARCH_PPC
   uint32_t pvr;
 #elif ARCH_ARM
@@ -140,11 +151,18 @@ struct cpuInfo {
 
 #ifdef ARCH_ARM
   struct system_on_chip* soc;
+#endif
+
+#if defined(ARCH_X86) || defined(ARCH_ARM)
   // If SoC contains more than one CPU and they
   // are different, the others will be stored in
   // the next_cpu field
-  struct cpuInfo* next_cpu;  
+  struct cpuInfo* next_cpu;
   uint8_t num_cpus;
+#ifdef ARCH_X86
+  // The index of the first core in the module
+  uint32_t first_core_id;
+#endif
 #endif
 };
 
diff --git a/src/common/printer.c b/src/common/printer.c
index bb0c6724..b0b8a0da 100644
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -44,6 +44,8 @@ enum {
   ATTRIBUTE_NAME,
 #elif ARCH_ARM
   ATTRIBUTE_SOC,
+#endif
+#if defined(ARCH_X86) || defined(ARCH_ARM)
   ATTRIBUTE_CPU_NUM,
 #endif
   ATTRIBUTE_HYPERVISOR,
@@ -75,6 +77,8 @@ static const char* ATTRIBUTE_FIELDS [] = {
   "Part Number:",
 #elif ARCH_ARM
   "SoC:",
+#endif
+#if defined(ARCH_X86) || defined(ARCH_ARM)
   "",
 #endif
   "Hypervisor:",
@@ -106,6 +110,8 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
   "P/N:",
 #elif ARCH_ARM
   "SoC:",
+#endif
+#if defined(ARCH_X86) || defined(ARCH_ARM)
   "",
 #endif
   "Hypervisor:",
@@ -424,11 +430,12 @@ uint32_t longest_field_length(struct ascii* art, int la) {
 }
 
 #if defined(ARCH_X86) || defined(ARCH_PPC)
-void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields) {
+void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields, bool hybrid_architecture) {
   struct ascii_logo* logo = art->art;
   int attr_to_print = 0;
   int attr_type;
   char* attr_value;
+  int32_t beg_space;
   int32_t space_right;
   int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2;
   int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up;
@@ -439,6 +446,7 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
   lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE);
   lbuf->pos = 0;
   lbuf->chars = 0;
+  bool add_space = false;
 
   printf("\n");
   for(int32_t n=0; n < iters; n++) {
@@ -473,9 +481,24 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
       attr_value = art->attributes[attr_to_print]->value;
       attr_to_print++;
 
-      space_right = 1 + (la - strlen(attribute_fields[attr_type]));
-      printOut(lbuf, strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
-               "%s%s%s%*s%s%s%s", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
+      if(attr_type == ATTRIBUTE_L3) {
+        add_space = false;
+      }
+      if(attr_type == ATTRIBUTE_CPU_NUM) {
+        printOut(lbuf, strlen(attr_value), "%s%s%s", logo->color_text[0], attr_value, art->reset);
+        add_space = true;
+      }
+      else {
+        beg_space = 0;
+        space_right = 2 + 1 + (la - strlen(attribute_fields[attr_type]));
+        if(hybrid_architecture && add_space) {
+          beg_space = 2;
+          space_right -= 2;
+        }
+
+        printOut(lbuf, beg_space + strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
+                 "%*s%s%s%s%*s%s%s%s", beg_space, "", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
+      }
     }
     printOutLine(lbuf, art, termw);
     printf("\n");
@@ -501,57 +524,71 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
 
   art->new_intel_logo = choose_new_intel_logo(cpu);
 
-  // Step 1. Retrieve attributes (if some structures are NULL, like topo
-  //         or cache, do not try to retrieve them)
   uint32_t socket_num = 1;
   char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets;
   l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL;
 
-  char* uarch = get_str_uarch(cpu);
-  char* manufacturing_process = get_str_process(cpu);
-  char* max_frequency = get_str_freq(cpu->freq);
   char* cpu_name = get_str_cpu_name(cpu, fcpuname);
-  char* avx = get_str_avx(cpu);
-  char* fma = get_str_fma(cpu);
+  char* uarch = get_str_uarch(cpu);
   char* pp = get_str_peak_performance(cpu->peak_performance);
-
-  if(cpu->topo != NULL) {
-    sockets = get_str_sockets(cpu->topo);
-    n_cores = get_str_topology(cpu, cpu->topo, false);
-    n_cores_dual = get_str_topology(cpu, cpu->topo, true);
-  }
+  char* manufacturing_process = get_str_process(cpu);
+  bool hybrid_architecture = cpu->next_cpu != NULL;
 
   if(cpu->cach != NULL) {
-    l1i = get_str_l1i(cpu->cach);
-    l1d = get_str_l1d(cpu->cach);
-    l2 = get_str_l2(cpu->cach);
     l3 = get_str_l3(cpu->cach);
   }
 
-  // Step 2. Set attributes
   setAttribute(art, ATTRIBUTE_NAME, cpu_name);
   if(cpu->hv->present) {
     setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name);
   }
   setAttribute(art, ATTRIBUTE_UARCH, uarch);
   setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
-  setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
-  if(cpu->topo != NULL) {
-    socket_num = get_nsockets(cpu->topo);
-    if (socket_num > 1) {
-      setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
-      setAttribute(art, ATTRIBUTE_NCORES, n_cores);
-      setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
+
+  struct cpuInfo* ptr = cpu;
+  for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
+    char* max_frequency = get_str_freq(ptr->freq);
+    char* avx = get_str_avx(ptr);
+    char* fma = get_str_fma(ptr);
+    char* cpu_num = emalloc(sizeof(char) * 9);
+
+    if(ptr->topo != NULL) {
+      sockets = get_str_sockets(ptr->topo);
+      n_cores = get_str_topology(ptr, ptr->topo, false);
+      n_cores_dual = get_str_topology(ptr, ptr->topo, true);
     }
-    else {
-      setAttribute(art, ATTRIBUTE_NCORES, n_cores);
+
+    if(ptr->cach != NULL) {
+      l1i = get_str_l1i(ptr->cach);
+      l1d = get_str_l1d(ptr->cach);
+      l2 = get_str_l2(ptr->cach);
+    }
+
+    if(hybrid_architecture) {
+      if(ptr->core_type == CORE_TYPE_EFFICIENCY) sprintf(cpu_num, "E-cores:");
+      else if(ptr->core_type == CORE_TYPE_PERFORMANCE) sprintf(cpu_num, "P-cores:");
+      else printBug("Found invalid core type!\n");
+
+      setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num);
     }
+    setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
+    if(ptr->topo != NULL) {
+      socket_num = get_nsockets(ptr->topo);
+      if (socket_num > 1) {
+        setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
+        setAttribute(art, ATTRIBUTE_NCORES, n_cores);
+        setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
+      }
+      else {
+        setAttribute(art, ATTRIBUTE_NCORES, n_cores);
+      }
+    }
+    setAttribute(art, ATTRIBUTE_AVX, avx);
+    setAttribute(art, ATTRIBUTE_FMA, fma);
+    if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
+    if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
+    if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
   }
-  setAttribute(art, ATTRIBUTE_AVX, avx);
-  setAttribute(art, ATTRIBUTE_FMA, fma);
-  if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
-  if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
-  if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
   if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3);
   setAttribute(art, ATTRIBUTE_PEAK, pp);
 
@@ -568,15 +605,12 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
     longest_attribute = longest_attribute_length(art, attribute_fields);
   }
 
-  print_ascii_generic(art, longest_attribute, term->w, attribute_fields);
+  print_ascii_generic(art, longest_attribute, term->w, attribute_fields, hybrid_architecture);
 
   free(manufacturing_process);
-  free(max_frequency);
   free(sockets);
   free(n_cores);
   free(n_cores_dual);
-  free(avx);
-  free(fma);
   free(l1i);
   free(l1d);
   free(l2);
diff --git a/src/x86/apic.c b/src/x86/apic.c
index 3e7a6d85..2d9ecf72 100644
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) {
 }
 #endif
 
+int get_total_cores_module(int total_cores, int module) {
+  int total_modules = 2;
+  int32_t current_module_idx = -1;
+  bool end = false;
+  int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
+  for(int i=0; i < total_modules; i++) core_types[i] = -1;
+  int cores_in_module = 0;
+  int i = 0;
+
+  // Get the original mask to restore it later
+  cpu_set_t original_mask;
+  if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) {
+    printWarn("sched_getaffinity: %s", strerror(errno));
+    return false;
+  }
+
+  while(!end) {
+    if(!bind_to_cpu(i)) {
+      return -1;
+    }
+    uint32_t eax = 0x0000001A;
+    uint32_t ebx = 0;
+    uint32_t ecx = 0;
+    uint32_t edx = 0;
+    cpuid(&eax, &ebx, &ecx, &edx);
+    int32_t core_type = eax >> 24 & 0xFF;
+    bool found = false;
+
+    for(int j=0; j < total_modules && !found; j++) {
+      if(core_types[j] == core_type) found = true;
+    }
+    if(!found) {
+      current_module_idx++;
+      core_types[current_module_idx] = core_type;
+    }
+    if(current_module_idx == module) {
+      cores_in_module++;
+      if(i+1 == total_cores) end = true;
+    }
+    else if(cores_in_module > 0) end = true;
+    i++;
+  }
+
+  // Reset the original affinity
+  if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) {
+    printWarn("sched_setaffinity: %s", strerror(errno));
+    return false;
+  }
+
+  //printf("Module %d has %d cores\n", module, cores_in_module);
+  return cores_in_module;
+}
+
 bool fill_topo_masks_apic(struct topology* topo) {
   uint32_t eax = 0x00000001;
   uint32_t ebx = 0;
@@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
   uint32_t max = 0;
 
   for(int i=0; i < topo->cach->max_cache_level; i++) {
-    for(int j=0; j < topo->total_cores; j++) {
+    for(int j=0; j < topo->total_cores_module; j++) {
       if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
     }
   }
 
   max++;
-  if(max > (uint32_t) topo->total_cores) return max;
-  return topo->total_cores;
+  if(max > (uint32_t) topo->total_cores_module) return max;
+  return topo->total_cores_module;
 }
 
 bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
@@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
   memset(apic_id, 0, sizeof(uint32_t) * size);
 
   // System topology
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
     sockets[apic_pkg[i]] = 1;
     smt[apic_smt[i]] = 1;
   }
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
     if(sockets[i] != 0)
       topo->sockets++;
     if(smt[i] != 0)
       topo->smt_available++;
   }
 
-  topo->logical_cores = topo->total_cores / topo->sockets;
+  topo->logical_cores = topo->total_cores_module / topo->sockets;
   topo->physical_cores = topo->logical_cores / topo->smt_available;
 
   // Cache topology
@@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
     num_caches = 0;
     memset(apic_id, 0, sizeof(uint32_t) * size);
 
-    for(int c=0; c < topo->total_cores; c++) {
+    for(int c=0; c < topo->total_cores_module; c++) {
       apic_id[cache_id_apic[c][i]]++;
     }
     for(uint32_t c=0; c < size; c++) {
@@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) {
   }
 }
 
-bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
+bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
 #ifdef __APPLE__
   // macOS extremely dirty approach...
   printf("cpufetch is computing APIC IDs, please wait...\n");
@@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
   }
   #endif
 
-  for(int i=0; i < n; i++) {
+  for(int i=first_core; i < first_core+n; i++) {
     if(!bind_to_cpu(i)) {
       printErr("Failed binding the process to CPU %d", i);
       return false;
     }
-    apic_ids[i] = get_apic_id(x2apic_id);
+    apic_ids[i-first_core] = get_apic_id(x2apic_id);
   }
 
   #ifdef __linux__
@@ -344,12 +397,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
 
 bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
   uint32_t apic_id;
-  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
-  uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
+  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
+  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
+  uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module);
+  uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module);
+  uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
+  uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
   bool x2apic_id;
 
   if(cpu->maxLevels >= 0x0000000B) {
@@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
     x2apic_id = false;
   }
 
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
     cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
     cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
   }
@@ -385,10 +438,10 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
 
   get_cache_topology_from_apic(topo);
 
-  if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id))
+  if(!fill_apic_ids(apic_ids, cpu->first_core_id, topo->total_cores_module, x2apic_id))
     return false;
 
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
     apic_id = apic_ids[i];
 
     apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
@@ -404,20 +457,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
   /* DEBUG
   for(int i=0; i < topo->cach->max_cache_level; i++) {
     printf("[CACH %1d]", i);
-    for(int j=0; j < topo->total_cores; j++)
+    for(int j=0; j < topo->total_cores_module; j++)
       printf("[%03d]", cache_id_apic[j][i]);
     printf("\n");
   }
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
     printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
   printf("\n");
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
     printf("[%2d] 0x%.8X\n", i, apic_core[i]);
   printf("\n");
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
     printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
 
-
   bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
 
   // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
@@ -429,7 +481,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
   free(apic_pkg);
   free(apic_core);
   free(apic_smt);
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
     free(cache_smt_id_apic[i]);
     free(cache_id_apic[i]);
   }
diff --git a/src/x86/apic.h b/src/x86/apic.h
index 1b183b41..98b63371 100644
--- a/src/x86/apic.h
+++ b/src/x86/apic.h
@@ -21,4 +21,6 @@ uint32_t is_smt_enabled_amd(struct topology* topo);
 bool bind_to_cpu(int cpu_id);
 #endif
 
+int get_total_cores_module(int total_cores, int module);
+
 #endif
diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c
index d0b7eb14..bdc397d0 100644
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
   return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
 }
 
-int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) {
+int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
   /*
    * PP = PeakPerformance
    * SP = SinglePrecision
@@ -192,46 +192,56 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
    * 16(If AVX512), 8(If AVX), 4(If SSE) *
    */
 
-  int64_t freq;
-#ifdef __linux__
-  if(accurate_pp)
-    freq = measure_frequency(cpu);
-  else
-    freq = max_freq;
-#else
-  // Silence compiler warning
-  (void)(accurate_pp);
-  freq = max_freq;
-#endif
+  struct cpuInfo* ptr = cpu;
+  int64_t total_flops = 0;
 
-  //First, check we have consistent data
-  if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
-    return -1;
-  }
+  for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
+    struct topology* topo = ptr->topo;
+    int64_t max_freq = get_freq(ptr->freq);
 
-  struct features* feat = cpu->feat;
-  int vpus = get_number_of_vpus(cpu);
-  int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
-
-  if(feat->FMA3 || feat->FMA4)
-    flops = flops*2;
+    int64_t freq;
+  #ifdef __linux__
+    if(accurate_pp)
+      freq = measure_frequency(ptr);
+    else
+      freq = max_freq;
+  #else
+    // Silence compiler warning
+    (void)(accurate_pp);
+    freq = max_freq;
+  #endif
 
-  // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
-  // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
-  // the peak performance supposing AVX2, not AVX512
-  if(feat->AVX512 && vpus_are_AVX512(cpu))
-    flops = flops*16;
-  else if(feat->AVX || feat->AVX2)
-    flops = flops*8;
-  else if(feat->SSE)
-    flops = flops*4;
+    //First, check we have consistent data
+    if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
+      return -1;
+    }
 
-  // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
-  // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
-  if(is_knights_landing(cpu))
-    flops = flops * 6 / 7;
+    struct features* feat = ptr->feat;
+    int vpus = get_number_of_vpus(ptr);
+    int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
+
+    if(feat->FMA3 || feat->FMA4)
+      flops = flops*2;
+
+    // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
+    // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
+    // the peak performance supposing AVX2, not AVX512
+    if(feat->AVX512 && vpus_are_AVX512(ptr))
+      flops = flops*16;
+    else if(feat->AVX || feat->AVX2)
+      flops = flops*8;
+    else if(feat->SSE)
+      flops = flops*4;
+
+    // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
+    // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
+    if(is_knights_landing(ptr))
+      flops = flops * 6 / 7;
+
+    total_flops += flops;
+  }
 
-  return flops;
+  return total_flops;
 }
 
 struct hypervisor* get_hp_info(bool hv_present) {
@@ -274,51 +284,19 @@ struct hypervisor* get_hp_info(bool hv_present) {
   return hv;
 }
 
-struct cpuInfo* get_cpu_info() {
-  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
-  struct features* feat = emalloc(sizeof(struct features));
-  cpu->feat = feat;
-  cpu->peak_performance = -1;
-  cpu->topo = NULL;
-  cpu->cach = NULL;
-
-  bool *ptr = &(feat->AES);
-  for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
-    *ptr = false;
-  }
-
+struct features* get_features_info(struct cpuInfo* cpu) {
   uint32_t eax = 0;
   uint32_t ebx = 0;
   uint32_t ecx = 0;
   uint32_t edx = 0;
 
-  //Get max cpuid level
-  cpuid(&eax, &ebx, &ecx, &edx);
-  cpu->maxLevels = eax;
-
-  //Fill vendor
-  char name[13];
-  memset(name,0,13);
-  get_name_cpuid(name, ebx, edx, ecx);
+  struct features* feat = emalloc(sizeof(struct features));
 
-  if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
-    cpu->cpu_vendor = CPU_VENDOR_INTEL;
-  else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
-    cpu->cpu_vendor = CPU_VENDOR_AMD;
-  else {
-    cpu->cpu_vendor = CPU_VENDOR_INVALID;
-    printErr("Unknown CPU vendor: %s", name);
-    return NULL;
+  bool *ptr = &(feat->AES);
+  for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
+    *ptr = false;
   }
 
-  //Get max extended level
-  eax = 0x80000000;
-  ebx = 0;
-  ecx = 0;
-  edx = 0;
-  cpuid(&eax, &ebx, &ecx, &edx);
-  cpu->maxExtendedLevels = eax;
-
   //Fill instructions support
   if (cpu->maxLevels >= 0x00000001){
     eax = 0x00000001;
@@ -373,6 +351,116 @@ struct cpuInfo* get_cpu_info() {
     printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
   }
 
+  return feat;
+}
+
+bool set_cpu_module(int m, int total_modules, int32_t* first_core) {
+  if(total_modules > 1) {
+    // We have a hybrid architecture.
+    // 1. Find the first core from module m
+    int32_t core_id = -1;
+    int32_t currrent_module_idx = -1;
+    int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
+    for(int i=0; i < total_modules; i++) core_types[i] = -1;
+    int i = 0;
+
+    while(core_id == -1) {
+      if(!bind_to_cpu(i)) {
+        return false;
+      }
+      uint32_t eax = 0x0000001A;
+      uint32_t ebx = 0;
+      uint32_t ecx = 0;
+      uint32_t edx = 0;
+      cpuid(&eax, &ebx, &ecx, &edx);
+      int32_t core_type = eax >> 24 & 0xFF;
+      bool found = false;
+
+      for(int j=0; j < total_modules && !found; j++) {
+        if(core_types[j] == core_type) found = true;
+      }
+      if(!found) {
+        currrent_module_idx++;
+        core_types[currrent_module_idx] = core_type;
+        if(currrent_module_idx == m) {
+          core_id = i;
+        }
+      }
+
+      i++;
+    }
+
+    *first_core = core_id;
+
+    //printf("Module %d: Core %d\n", m, core_id);
+    // 2. Now bind to that core
+    if(!bind_to_cpu(core_id)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int32_t get_core_type() {
+  uint32_t eax = 0x0000001A;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+
+  eax = 0x0000001A;
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  int32_t type = eax >> 24 & 0xFF;
+  if(type == 0x20) return CORE_TYPE_EFFICIENCY;
+  else if(type == 0x40) return CORE_TYPE_PERFORMANCE;
+  else {
+    printErr("Found invalid core type: 0x%.8X\n", type);
+    return CORE_TYPE_UNKNOWN;
+  }
+}
+
+struct cpuInfo* get_cpu_info() {
+  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
+  cpu->peak_performance = -1;
+  cpu->next_cpu = NULL;
+  cpu->topo = NULL;
+  cpu->cach = NULL;
+  cpu->feat = NULL;
+
+  uint32_t modules = 1;
+  uint32_t eax = 0;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+
+  //Get max cpuid level
+  cpuid(&eax, &ebx, &ecx, &edx);
+  cpu->maxLevels = eax;
+
+  //Fill vendor
+  char name[13];
+  memset(name,0,13);
+  get_name_cpuid(name, ebx, edx, ecx);
+
+  if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
+    cpu->cpu_vendor = CPU_VENDOR_INTEL;
+  else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
+    cpu->cpu_vendor = CPU_VENDOR_AMD;
+  else {
+    cpu->cpu_vendor = CPU_VENDOR_INVALID;
+    printErr("Unknown CPU vendor: %s", name);
+    return NULL;
+  }
+
+  //Get max extended level
+  eax = 0x80000000;
+  ebx = 0;
+  ecx = 0;
+  edx = 0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  cpu->maxExtendedLevels = eax;
+
   if (cpu->maxExtendedLevels >= 0x80000004){
     cpu->cpu_name = get_str_cpu_name_internal();
   }
@@ -389,19 +477,66 @@ struct cpuInfo* get_cpu_info() {
     cpu->topology_extensions = (ecx >> 22) & 1;
   }
 
-  // If any field of the struct is NULL,
-  // return inmideately, as further functions
-  // require valid fields (cach, topo, etc)
-  cpu->arch = get_cpu_uarch(cpu);
-  cpu->freq = get_frequency_info(cpu);
+  cpu->hybrid_flag = false;
+  if(cpu->cpu_vendor == CPU_VENDOR_INTEL && cpu->maxLevels >= 0x00000007) {
+    eax = 0x00000007;
+    ecx = 0x00000000;
+    cpuid(&eax, &ebx, &ecx, &edx);
+    cpu->hybrid_flag = (edx >> 15) & 0x1;
+  }
+
+  if(cpu->hybrid_flag) modules = 2;
+
+  struct cpuInfo* ptr = cpu;
+  for(uint32_t i=0; i < modules; i++) {
+    int32_t first_core;
+    set_cpu_module(i, modules, &first_core);
+
+    if(i > 0) {
+      ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
+      ptr = ptr->next_cpu;
+      ptr->next_cpu = NULL;
+      ptr->peak_performance = -1;
+      ptr->topo = NULL;
+      ptr->cach = NULL;
+      ptr->feat = NULL;
+      // We assume that this cores have the
+      // same cpuid capabilities
+      ptr->cpu_vendor = cpu->cpu_vendor;
+      ptr->maxLevels = cpu->maxLevels;
+      ptr->maxExtendedLevels = cpu->maxExtendedLevels;
+      ptr->hybrid_flag = cpu->hybrid_flag;
+    }
 
-  cpu->cach = get_cache_info(cpu);
-  if(cpu->cach == NULL) return cpu;
+    if(cpu->hybrid_flag) {
+      // Detect core type
+      eax = 0x0000001A;
+      cpuid(&eax, &ebx, &ecx, &edx);
+      ptr->core_type = get_core_type();
+    }
+    ptr->first_core_id = first_core;
+    ptr->feat = get_features_info(ptr);
 
-  cpu->topo = get_topology_info(cpu, cpu->cach);
-  if(cpu->topo == NULL) return cpu;
+    // If any field of the struct is NULL,
+    // return inmideately, as further functions
+    // require valid fields (cach, topo, etc)
+    ptr->arch = get_cpu_uarch(ptr);
+    ptr->freq = get_frequency_info(ptr);
 
-  cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
+    ptr->cach = get_cache_info(ptr);
+    if(ptr->cach == NULL) return cpu;
+
+    if(cpu->hybrid_flag) {
+      ptr->topo = get_topology_info(ptr, ptr->cach, i);
+    }
+    else {
+      ptr->topo = get_topology_info(ptr, ptr->cach, -1);
+    }
+    if(cpu->topo == NULL) return cpu;
+  }
+
+  cpu->num_cpus = modules;
+  cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
 
   return cpu;
 }
@@ -492,7 +627,7 @@ void get_topology_from_udev(struct topology* topo) {
 
 // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
 // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
-struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
+struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) {
   struct topology* topo = emalloc(sizeof(struct topology));
   init_topology_struct(topo, cach);
 
@@ -516,6 +651,13 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
     }
   #endif
 
+  if(cpu->hybrid_flag) {
+    topo->total_cores_module = get_total_cores_module(topo->total_cores, module);
+  }
+  else {
+    topo->total_cores_module = topo->total_cores;
+  }
+
   switch(cpu->cpu_vendor) {
     case CPU_VENDOR_INTEL:
       if (cpu->maxLevels >= 0x00000004) {
@@ -919,6 +1061,9 @@ void print_debug(struct cpuInfo* cpu) {
   if(cpu->cpu_vendor == CPU_VENDOR_AMD) {
     printf("- AMD topology extensions: %d\n", cpu->topology_extensions);
   }
+  if(cpu->cpu_vendor == CPU_VENDOR_INTEL) {
+    printf("- Hybrid Flag: %d\n", cpu->hybrid_flag);
+  }
   printf("- CPUID dump: 0x%.8X\n", eax);
 
   free_cpuinfo_struct(cpu);
diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h
index d78517ab..3b0e21bd 100644
--- a/src/x86/cpuid.h
+++ b/src/x86/cpuid.h
@@ -6,7 +6,7 @@
 struct cpuInfo* get_cpu_info();
 struct cache* get_cache_info(struct cpuInfo* cpu);
 struct frequency* get_frequency_info(struct cpuInfo* cpu);
-struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach);
+struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module);
 
 char* get_str_avx(struct cpuInfo* cpu);
 char* get_str_sse(struct cpuInfo* cpu);
diff --git a/src/x86/uarch.c b/src/x86/uarch.c
index 491fca2a..70ba57c9 100644
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -421,6 +421,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
 
       case UARCH_ICE_LAKE:
       case UARCH_TIGER_LAKE:
+      case UARCH_ALDER_LAKE:
 
       // AMD
       case UARCH_ZEN2: