diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 92921f4e..4c2e712e 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -92,8 +92,12 @@ endif() # Add compile flags for Cheetah-runtime compilation that should be # excluded from bitcode compilation -if (CHEETAH_HAS_MAVX_FLAG) - list(APPEND CHEETAH_COMPILE_FLAGS -mavx) +if (DEFINED CHEETAH_ARCH_FLAGS) + list(APPEND CHEETAH_COMPILE_FLAGS ${CHEETAH_ARCH_FLAGS}) +else() + if (CHEETAH_HAS_MAVX_FLAG) + list(APPEND CHEETAH_COMPILE_FLAGS -mavx) + endif() endif() if (APPLE) diff --git a/runtime/cilk2c_inlined.c b/runtime/cilk2c_inlined.c index fd30717e..14db7519 100644 --- a/runtime/cilk2c_inlined.c +++ b/runtime/cilk2c_inlined.c @@ -47,12 +47,17 @@ unsigned __cilkrts_get_worker_number(void) { return 0; } -void *__cilkrts_reducer_lookup(void *key, size_t size, - void *identity_ptr, void *reduce_ptr) { +void *__cilkrts_reducer_lookup_in_frame(struct __cilkrts_stack_frame *frame, + void *key, size_t size, + void *identity_ptr, void *reduce_ptr) { // If we're outside a cilkified region, then the key is the view. - if (__cilkrts_need_to_cilkify) - return key; - struct local_hyper_table *table = get_hyper_table(); + // The null test will normally be optimized out. + __cilkrts_worker *w; + if (frame) + w = frame->fh->worker; // Never null + else if (!(w = __cilkrts_get_tls_worker())) + return key; + struct local_hyper_table *table = get_local_hyper_table(w); struct bucket *b = find_hyperobject(table, (uintptr_t)key); if (__builtin_expect(!!b, true)) { // Return the existing view. diff --git a/runtime/local-reducer-api.h b/runtime/local-reducer-api.h index a4db25c8..c8e67b71 100644 --- a/runtime/local-reducer-api.h +++ b/runtime/local-reducer-api.h @@ -7,7 +7,7 @@ static inline struct local_hyper_table * get_local_hyper_table(__cilkrts_worker *w) { - if (NULL == w->hyper_table) { + if (__builtin_expect(NULL == w->hyper_table, 0)) { w->hyper_table = __cilkrts_local_hyper_table_alloc(); } return w->hyper_table;