From 4c7e5586d109eba46c4f067699ae652d722270a0 Mon Sep 17 00:00:00 2001 From: "jeffery.wsj" Date: Fri, 8 May 2020 18:06:37 +0800 Subject: [PATCH 1/2] adapt golang to occlum libos: 1. hook heap malloc 2. hook all syscall based on amd64 linux platform 3. hook vdso call --- src/runtime/malloc.go | 76 ++++++++++----------- src/runtime/os_linux.go | 4 ++ src/runtime/sys_linux_amd64.go | 3 + src/runtime/sys_linux_amd64.s | 116 +++++++++++++++++++++------------ src/runtime/textflag.h | 11 ++++ src/syscall/asm_linux_amd64.s | 24 +++++-- 6 files changed, 150 insertions(+), 84 deletions(-) create mode 100644 src/runtime/sys_linux_amd64.go diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index d768054198..0d7166397d 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -610,43 +610,45 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { goto mapped } - // Try to grow the heap at a hint address. - for h.arenaHints != nil { - hint := h.arenaHints - p := hint.addr - if hint.down { - p -= n - } - if p+n < p { - // We can't use this, so don't ask. - v = nil - } else if arenaIndex(p+n-1) >= 1<= 1<procid to Linux tid MOVL $SYS_gettid, AX - SYSCALL + SYSCALL_ENHANCE MOVQ AX, m_procid(R8) // Set FS to point at m->tls. @@ -589,14 +624,14 @@ nog: // It shouldn't return. If it does, exit that thread. MOVL $111, DI MOVL $SYS_exit, AX - SYSCALL + SYSCALL_ENHANCE JMP -3(PC) // keep exiting TEXT runtime·sigaltstack(SB),NOSPLIT,$-8 MOVQ new+0(FP), DI MOVQ old+8(FP), SI MOVQ $SYS_sigaltstack, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -613,7 +648,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 MOVQ DI, SI MOVQ $0x1002, DI // ARCH_SET_FS MOVQ $SYS_arch_prctl, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -621,7 +656,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 TEXT runtime·osyield(SB),NOSPLIT,$0 MOVL $SYS_sched_yield, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 @@ -629,7 +664,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 MOVQ len+8(FP), SI MOVQ buf+16(FP), DX MOVL $SYS_sched_getaffinity, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -637,7 +672,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 TEXT runtime·epollcreate(SB),NOSPLIT,$0 MOVL size+0(FP), DI MOVL $SYS_epoll_create, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+8(FP) RET @@ -645,7 +680,7 @@ TEXT runtime·epollcreate(SB),NOSPLIT,$0 TEXT runtime·epollcreate1(SB),NOSPLIT,$0 MOVL flags+0(FP), DI MOVL $SYS_epoll_create1, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+8(FP) RET @@ -656,7 +691,7 @@ TEXT runtime·epollctl(SB),NOSPLIT,$0 MOVL fd+8(FP), DX MOVQ ev+16(FP), R10 MOVL $SYS_epoll_ctl, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -669,7 +704,7 @@ TEXT runtime·epollwait(SB),NOSPLIT,$0 MOVL timeout+20(FP), R10 MOVQ $0, R8 MOVL $SYS_epoll_pwait, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -679,10 +714,9 @@ TEXT runtime·closeonexec(SB),NOSPLIT,$0 MOVQ $2, SI // F_SETFD MOVQ $1, DX // FD_CLOEXEC MOVL $SYS_fcntl, AX - SYSCALL + SYSCALL_ENHANCE RET - // int access(const char *name, int mode) TEXT runtime·access(SB),NOSPLIT,$0 // This uses faccessat instead of access, because Android O blocks access. @@ -691,7 +725,7 @@ TEXT runtime·access(SB),NOSPLIT,$0 MOVL mode+8(FP), DX MOVL $0, R10 MOVL $SYS_faccessat, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+16(FP) RET @@ -701,7 +735,7 @@ TEXT runtime·connect(SB),NOSPLIT,$0-28 MOVQ addr+8(FP), SI MOVL len+16(FP), DX MOVL $SYS_connect, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -711,7 +745,7 @@ TEXT runtime·socket(SB),NOSPLIT,$0-20 MOVL typ+4(FP), SI MOVL prot+8(FP), DX MOVL $SYS_socket, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+16(FP) RET @@ -720,6 +754,6 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 // Implemented as brk(NULL). MOVQ $0, DI MOVL $SYS_brk, AX - SYSCALL + SYSCALL_ENHANCE MOVQ AX, ret+0(FP) RET diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index daca36d948..824f069076 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -35,3 +35,14 @@ // Function is the top of the call stack. Call stack unwinders should stop // at this function. #define TOPFRAME 2048 + +#define SYSCALL_ENHANCE \ + CMPQ runtime·occlumentry(SB), $0x0 \ + JBE 4(PC) \ + CALL *runtime·occlumentry(SB) \ + CMPQ runtime·occlumentry(SB), $0x0 \ + JNE 2(PC) \ + SYSCALL + +#define OCCLUM_GET_TIME_OF_DAY \ + CALL *runtime·occlumentry(SB) diff --git a/src/syscall/asm_linux_amd64.s b/src/syscall/asm_linux_amd64.s index 364815df18..4417768e8a 100644 --- a/src/syscall/asm_linux_amd64.s +++ b/src/syscall/asm_linux_amd64.s @@ -5,6 +5,8 @@ #include "textflag.h" #include "funcdata.h" +#define SYS_gettimeofday 96 + // // System calls for AMD64, Linux // @@ -23,7 +25,7 @@ TEXT ·Syscall(SB),NOSPLIT,$0-56 MOVQ $0, R8 MOVQ $0, R9 MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok MOVQ $-1, r1+32(FP) @@ -49,7 +51,7 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 MOVQ a5+40(FP), R8 MOVQ a6+48(FP), R9 MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok6 MOVQ $-1, r1+56(FP) @@ -74,7 +76,7 @@ TEXT ·RawSyscall(SB),NOSPLIT,$0-56 MOVQ $0, R8 MOVQ $0, R9 MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok1 MOVQ $-1, r1+32(FP) @@ -97,7 +99,7 @@ TEXT ·RawSyscall6(SB),NOSPLIT,$0-80 MOVQ a5+40(FP), R8 MOVQ a6+48(FP), R9 MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok2 MOVQ $-1, r1+56(FP) @@ -121,7 +123,7 @@ TEXT ·rawVforkSyscall(SB),NOSPLIT,$0-32 MOVQ $0, R9 MOVQ trap+0(FP), AX // syscall entry POPQ R12 // preserve return address - SYSCALL + SYSCALL_ENHANCE PUSHQ R12 CMPQ AX, $0xfffffffffffff001 JLS ok2 @@ -143,18 +145,28 @@ TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48 MOVQ $0, R8 MOVQ $0, R9 MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE MOVQ AX, r1+32(FP) MOVQ DX, r2+40(FP) RET // func gettimeofday(tv *Timeval) (err uintptr) TEXT ·gettimeofday(SB),NOSPLIT,$0-16 + CMPQ runtime·occlumentry(SB), $0x0 + JBE start + MOVQ tv+0(FP), DI + MOVQ $0, SI + MOVQ $SYS_gettimeofday, AX + OCCLUM_GET_TIME_OF_DAY + JMP result + +start: MOVQ tv+0(FP), DI MOVQ $0, SI MOVQ runtime·vdsoGettimeofdaySym(SB), AX CALL AX +result: CMPQ AX, $0xfffffffffffff001 JLS ok7 NEGQ AX -- 2.17.1 From 4e78cdfbfbe4382a4dc9f2d4d7048f3c20bb1d74 Mon Sep 17 00:00:00 2001 From: "jeffery.wsj" Date: Fri, 8 May 2020 18:06:37 +0800 Subject: [PATCH 2/2] refactor syscall ABI for occlum 0.15.0 --- src/runtime/sys_linux_amd64.s | 4 ++-- src/runtime/textflag.h | 23 +++++++++++++++-------- src/syscall/asm_linux_amd64.s | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index db9c9ab55b..6416170c41 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -187,7 +187,7 @@ TEXT runtime·walltime(SB),NOSPLIT,$0-12 MOVQ $SYS_clock_gettime, AX MOVQ $0, DI LEAQ ret+0(SP), SI - CALL *runtime·occlumentry(SB) + OCCLUM_SYSCALL MOVQ 0(SP), AX MOVQ 8(SP), DX MOVQ BP, SP @@ -261,7 +261,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$0-8 MOVQ $SYS_clock_gettime, AX MOVQ $1, DI LEAQ ret+0(SP), SI - CALL *runtime·occlumentry(SB) + OCCLUM_SYSCALL MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec IMULQ $1000000000, AX diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index 824f069076..e51f98849a 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -36,13 +36,20 @@ // at this function. #define TOPFRAME 2048 -#define SYSCALL_ENHANCE \ - CMPQ runtime·occlumentry(SB), $0x0 \ - JBE 4(PC) \ - CALL *runtime·occlumentry(SB) \ - CMPQ runtime·occlumentry(SB), $0x0 \ - JNE 2(PC) \ +// refactor syscall ABI according to occlum v0.15.0. +// step one: assign syscall return address to register rcx, occlum know where +// should return according to rcx. +// step two: jump to syscall interface address provided by occlum when go +// .bin file loaded. +// +// actually is the assembler instruction: lea 0xc(%rip),%rcx +#define OCCLUM_SYSCALL \ + BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0c; BYTE $0x00; BYTE $0x00; BYTE $0x00 \ + MOVQ runtime·occlumentry(SB), R11 \ + JMP R11 \ SYSCALL -#define OCCLUM_GET_TIME_OF_DAY \ - CALL *runtime·occlumentry(SB) +#define SYSCALL_ENHANCE \ + CMPQ runtime·occlumentry(SB), $0x0 \ + JBE 10(PC) \ + OCCLUM_SYSCALL diff --git a/src/syscall/asm_linux_amd64.s b/src/syscall/asm_linux_amd64.s index 4417768e8a..06f2eb09c0 100644 --- a/src/syscall/asm_linux_amd64.s +++ b/src/syscall/asm_linux_amd64.s @@ -157,7 +157,7 @@ TEXT ·gettimeofday(SB),NOSPLIT,$0-16 MOVQ tv+0(FP), DI MOVQ $0, SI MOVQ $SYS_gettimeofday, AX - OCCLUM_GET_TIME_OF_DAY + OCCLUM_SYSCALL JMP result start: -- 2.17.1