Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Page-agnostic block trampolines #317

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ jobs:
run: |
sudo apt update
sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build
# Copied from Microsoft's snmalloc CI configuration
- name: Reconfigure for powerpc64le
if: startsWith(matrix.arch.triple, 'powerpc64le')
# The default PowerPC qemu configuration uses the wrong page size.
# Wrap it in a script that fixes this.
run: |
sudo update-binfmts --disable qemu-ppc64le
sudo sh -c 'echo ":qemu-ppc64le:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:`pwd`/ppc64.sh:" > /proc/sys/fs/binfmt_misc/register'
echo '#!/bin/sh' > ppc64.sh
echo '/usr/bin/qemu-ppc64le -p 65536 $@' >> ppc64.sh
chmod +x ppc64.sh
- name: Configure CMake
run: |
export LDFLAGS="-L/usr/lib/llvm-${{ matrix.llvm-version }}/lib/ -fuse-ld=lld-${{ matrix.llvm-version}} -Wl,--dynamic-linker=/usr/${{ matrix.arch.triple }}/lib/${{ matrix.arch.rtld }},-rpath,/usr/${{ matrix.arch.triple }}/lib"
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(libobjc_C_SRCS
runtime.c
sarray2.c
sendmsg2.c
shims.c
fast_paths.m
)
set(libobjc_HDRS
Expand Down
8 changes: 0 additions & 8 deletions asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,3 @@
#define SLOT_OFFSET 0
#endif
#define SMALLOBJ_MASK ((1<<SMALLOBJ_BITS) - 1)

// Page size configuration
#if defined(__powerpc64__)
# define PAGE_SHIFT 16
#else
# define PAGE_SHIFT 12
#endif
#define PAGE_SIZE (1<<PAGE_SHIFT)
131 changes: 90 additions & 41 deletions block_to_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "blocks_runtime.h"
#include "lock.h"
#include "visibility.h"
#include "asmconstants.h" // For PAGE_SIZE
#include "shims.h"

#ifndef __has_builtin
#define __has_builtin(x) 0
Expand Down Expand Up @@ -115,39 +115,54 @@ struct block_header
*/
#if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) || (defined(__powerpc__) && !defined(__powerpc64__))
uint64_t padding[3];
#elif defined(__mips__) || defined(__powerpc64__)
#elif defined(__mips__) || defined(__ARM_ARCH_ISA_A64) || defined(__powerpc64__)
uint64_t padding[2];
#elif defined(__arm__)
uint64_t padding;
#endif
};

#define HEADERS_PER_PAGE (PAGE_SIZE/sizeof(struct block_header))

/**
* Structure containing a two pages of block trampolines. Each trampoline
* loads its block and target method address from the corresponding
* block_header (one page before the start of the block structure).
*/
struct trampoline_buffers
{
struct block_header headers[HEADERS_PER_PAGE];
char rx_buffer[PAGE_SIZE];
};
_Static_assert(__builtin_offsetof(struct trampoline_buffers, rx_buffer) == PAGE_SIZE,
"Incorrect offset for read-execute buffer");
_Static_assert(sizeof(struct trampoline_buffers) == 2*PAGE_SIZE,
"Incorrect size for trampoline buffers");

struct trampoline_set
{
struct trampoline_buffers *buffers;
struct trampoline_set *next;
/**
* Memory region containing a two pages of block trampolines. Each trampoline
* loads its block and target method address from the corresponding
* block_header (one page before the start of the block structure).
*
* Page | Description
* 1 | Page filled with block_header's
* 2 | RX buffer page
*/
uint8_t *region;
struct trampoline_set *next;
int first_free;
};


/**
* Current page size of the system in bytes.
* Set in init_trampolines.
*/
static int trampoline_page_size;
/**
* Number of block_header's per page.
* Calculated in init_trampolines after retrieving the current page size:
*/
static size_t trampoline_header_per_page;
/**
* Size of a trampoline region in bytes.
*/
static size_t trampoline_region_size;
static mutex_t trampoline_lock;

/**
* Size of the trampoline region (in pages)
*/
#define TRAMPOLINE_REGION_PAGES 2

#define REGION_HEADERS_START(metadata) ((struct block_header *) metadata->region)
#define REGION_RX_BUFFER_START(metadata) (metadata->region + trampoline_page_size)

struct wx_buffer
{
void *w;
Expand All @@ -160,8 +175,26 @@ extern char __objc_block_trampoline_end_sret;

PRIVATE void init_trampolines(void)
{
// Retrieve the page size
trampoline_page_size = getpagesize();
trampoline_region_size = trampoline_page_size * TRAMPOLINE_REGION_PAGES;
trampoline_header_per_page = trampoline_page_size / sizeof(struct block_header);

// Check that sizeof(struct block_header) is a divisor of the current page size
assert(trampoline_header_per_page * sizeof(struct block_header) == trampoline_page_size);

// Check that assumpttions for all non-variable page size implementations
// (currently everything except AArch64) are met
#if defined(__powerpc64__)
assert(trampoline_page_size == 0x10000);
#elif !defined(__ARM_ARCH_ISA_A64)
assert(trampoline_page_size == 0x1000);
#endif

// Check that we can fit the body of the trampoline function inside a block_header
assert(&__objc_block_trampoline_end - &__objc_block_trampoline <= sizeof(struct block_header));
assert(&__objc_block_trampoline_end_sret - &__objc_block_trampoline_sret <= sizeof(struct block_header));

INIT_LOCK(trampoline_lock);
}

Expand All @@ -176,20 +209,31 @@ static struct trampoline_set *alloc_trampolines(char *start, char *end)
{
struct trampoline_set *metadata = calloc(1, sizeof(struct trampoline_set));
#if _WIN32
metadata->buffers = VirtualAlloc(NULL, sizeof(struct trampoline_buffers), MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
metadata->region = VirtualAlloc(NULL, trampoline_region_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
#else
metadata->buffers = mmap(NULL, sizeof(struct trampoline_buffers), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
metadata->region = mmap(NULL, trampoline_region_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
#endif
for (int i=0 ; i<HEADERS_PER_PAGE ; i++)
struct block_header *headers_start = REGION_HEADERS_START(metadata);
uint8_t *rx_buffer_start = REGION_RX_BUFFER_START(metadata);
for (int i=0 ; i<trampoline_header_per_page ; i++)
{
metadata->buffers->headers[i].fnptr = (void(*)(void))invalid;
metadata->buffers->headers[i].block = &metadata->buffers->headers[i+1].block;
char *block = metadata->buffers->rx_buffer + (i * sizeof(struct block_header));
headers_start[i].fnptr = (void(*)(void))invalid;
headers_start[i].block = &headers_start[i+1].block;
uint8_t *block = rx_buffer_start + (i * sizeof(struct block_header));

memcpy(block, start, end-start);
#if defined(__ARM_ARCH_ISA_A64)
// Fix-up the trampoline with the address to its block header.
// We reserved 8 bytes after the branch for the block header address.
// See block_trampolines.S for more information.
uintptr_t ptr = (uintptr_t)&headers_start[i];
memcpy(block + (end-start) -8, &ptr, 8);
#endif
}
metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL;
mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC);
clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]);
headers_start[trampoline_header_per_page-1].block = NULL;
mprotect(rx_buffer_start, trampoline_page_size, PROT_READ | PROT_EXEC);
// Intrinsic expects char pointers, which assumes that sizeof(char) == 1
clear_cache((char *)rx_buffer_start, (char *)rx_buffer_start + trampoline_page_size);

return metadata;
}
Expand Down Expand Up @@ -232,14 +276,16 @@ IMP imp_implementationWithBlock(id block)
if (set->first_free != -1)
{
int i = set->first_free;
struct block_header *h = &set->buffers->headers[i];
struct block_header *headers_start = REGION_HEADERS_START(set);
uint8_t *rx_buffer_start = REGION_RX_BUFFER_START(set);
struct block_header *h = &headers_start[i];
struct block_header *next = h->block;
set->first_free = next ? (next - set->buffers->headers) : -1;
assert(set->first_free < HEADERS_PER_PAGE);
set->first_free = next ? (next - headers_start) : -1;
assert(set->first_free < trampoline_header_per_page);
assert(set->first_free >= -1);
h->fnptr = (void(*)(void))b->invoke;
h->block = b;
uintptr_t addr = (uintptr_t)&set->buffers->rx_buffer[i*sizeof(struct block_header)];
uintptr_t addr = (uintptr_t)&rx_buffer_start[i*sizeof(struct block_header)];
#if (__ARM_ARCH_ISA_THUMB == 2)
// If the trampoline is Thumb-2 code, then we must set the low bit
// to 1 so that b[l]x instructions put the CPU in the correct mode.
Expand All @@ -255,11 +301,13 @@ static int indexForIMP(IMP anIMP, struct trampoline_set **setptr)
{
for (struct trampoline_set *set=*setptr ; set!=NULL ; set=set->next)
{
if (((char*)anIMP >= set->buffers->rx_buffer) &&
((char*)anIMP < &set->buffers->rx_buffer[PAGE_SIZE]))
struct block_header *headers_start = REGION_HEADERS_START(set);
uint8_t *rx_buffer_start = REGION_RX_BUFFER_START(set);
if (((uint8_t *)anIMP >= rx_buffer_start) &&
((uint8_t *)anIMP < &rx_buffer_start[trampoline_page_size]))
{
*setptr = set;
ptrdiff_t offset = (char*)anIMP - set->buffers->rx_buffer;
ptrdiff_t offset = (uint8_t *)anIMP - rx_buffer_start;
return offset / sizeof(struct block_header);
}
}
Expand All @@ -280,7 +328,7 @@ id imp_getBlock(IMP anImp)
{
return NULL;
}
return set->buffers->headers[idx].block;
return REGION_HEADERS_START(set)[idx].block;
}

BOOL imp_removeBlock(IMP anImp)
Expand All @@ -297,11 +345,12 @@ BOOL imp_removeBlock(IMP anImp)
{
return NO;
}
struct block_header *h = &set->buffers->headers[idx];
struct block_header *header_start = REGION_HEADERS_START(set);
struct block_header *h = &header_start[idx];
Block_release(h->block);
h->fnptr = (void(*)(void))invalid;
h->block = set->first_free == -1 ? NULL : &set->buffers->headers[set->first_free];
set->first_free = h - set->buffers->headers;
h->block = set->first_free == -1 ? NULL : &header_start[set->first_free];
set->first_free = h - header_start;
return YES;
}

Expand Down
20 changes: 15 additions & 5 deletions block_trampolines.S
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

.file "block_trampolines.S"


#if __x86_64
////////////////////////////////////////////////////////////////////////////////
// x86-64 trampoline
Expand Down Expand Up @@ -107,9 +106,11 @@
#if defined(__powerpc64__)
#define LOAD ld
#define OFFSET 8
#define PAGE_SHIFT 16
#else
#define LOAD lwz
#define OFFSET 4
#define PAGE_SIZE 0x1000
#endif

.macro trampoline arg0, arg1
Expand Down Expand Up @@ -161,10 +162,19 @@
// AArch64 (ARM64) trampoline
////////////////////////////////////////////////////////////////////////////////
.macro trampoline arg0, arg1
adr x17, #-4096
mov \arg1, \arg0
ldp \arg0, x17, [x17]
br x17
// Retrieve the pointer to the block_header from pc + 20
adr x17, #20
ldr x17, [x17]
mov \arg1, \arg0
ldp \arg0, x17, [x17]
br x17
// In AArch64 a nop "advance[s] the value of the program counter by 4".
// We reserve 8 bytes for the block_header address.
// When the trampoline is copied into the rx_buffer of a trampoline region,
// the two nops are overwritten by the block_header address, associated
// with this trampoline. This happens in `alloc_trampolines`.
nop
nop
.endm
#define ARG0 x0
#define ARG1 x1
Expand Down
20 changes: 20 additions & 0 deletions shims.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <assert.h>
#include <stdint.h>

#include "shims.h"

#if defined(_WIN32)

#include "safewindows.h"

int getpagesize(void) {
SYSTEM_INFO si;
GetSystemInfo(&si);

DWORD page_size = si.dwPageSize;
assert(page_size <= INT_MAX);

return (int)page_size;
}

#endif // defined(_WIN32)
21 changes: 21 additions & 0 deletions shims.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Shims around non-standardized platform-specific APIs
*/

#ifndef SHIMS_H_INCLUDED
#define SHIMS_H_INCLUDED

#if defined(_WIN32)

/**
* getpagesize() returns the current page size
*/
int getpagesize(void);

#else // Assume that the platform implements the X/Open System Interface

#include <unistd.h>

#endif // defined(_WIN32)

#endif // SHIMS_H_INCLUDED
Loading