From 311c6ae76cf8548af278894999434f8be604500a Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 19:12:02 +0000 Subject: [PATCH] [nce, dynarmic] macOS port Signed-off-by: lizzie --- src/core/arm/nce/arm_nce.cpp | 63 +++++++++++++++++----- src/core/arm/nce/arm_nce_asm_definitions.h | 9 +++- src/core/arm/nce/interpreter_visitor.cpp | 26 +++++++-- src/core/arm/nce/interpreter_visitor.h | 7 ++- 4 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 0e0d72fc8a..0633d54ea3 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -33,6 +33,11 @@ static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0Nat static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock); static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic); +#ifdef __APPLE__ +_STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx.__ns); +} +#else fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); while (header->magic != FPSIMD_MAGIC) { @@ -40,6 +45,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { } return reinterpret_cast(header); } +#endif using namespace Common::Literals; constexpr u32 StackSize = 128_KiB; @@ -49,32 +55,42 @@ constexpr u32 StackSize = 128_KiB; void* ArmNce::RestoreGuestContext(void* raw_context) { // Retrieve the host context. auto& host_ctx = static_cast(raw_context)->uc_mcontext; - - // Thread-local parameters will be located in x9. - auto* tpidr = reinterpret_cast(host_ctx.regs[9]); - auto* guest_ctx = static_cast(tpidr->native_context); - // Retrieve the host floating point state. auto* fpctx = GetFloatingPointState(host_ctx); + // Restore all guest state except tpidr_el0. +#ifdef __APPLE__ + // Thread-local parameters will be located in x9. + auto* tpidr = reinterpret_cast(host_ctx->__ss.__r[9]); + auto* guest_ctx = static_cast(tpidr->native_context); + // Save host callee-saved registers. + std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->__v[8], + sizeof(guest_ctx->host_ctx.host_saved_vregs)); + // Save stack pointer. + guest_ctx->host_ctx.host_sp = host_ctx->__ss.__sp; + host_ctx->__ss.__pc = guest_ctx->sp; + host_ctx->__ss.__sp = guest_ctx->pc; + host_ctx->__ss.__pstate = guest_ctx->pstate; + fpctx->__fpcr = guest_ctx->fpcr; + fpctx->__fpsr = guest_ctx->fpsr; + std::memcpy(fpctx->__v, guest_ctx->vector_registers.data(), sizeof(fpctx->__v)); +#else + // Thread-local parameters will be located in x9. + auto* tpidr = reinterpret_cast(host_ctx.regs[9]); + auto* guest_ctx = static_cast(tpidr->native_context); // Save host callee-saved registers. std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8], sizeof(guest_ctx->host_ctx.host_saved_vregs)); - std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19], - sizeof(guest_ctx->host_ctx.host_saved_regs)); - // Save stack pointer. guest_ctx->host_ctx.host_sp = host_ctx.sp; - - // Restore all guest state except tpidr_el0. host_ctx.sp = guest_ctx->sp; host_ctx.pc = guest_ctx->pc; host_ctx.pstate = guest_ctx->pstate; fpctx->fpcr = guest_ctx->fpcr; fpctx->fpsr = guest_ctx->fpsr; - std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs)); std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs)); - +#endif + std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs)); // Return the new thread-local storage pointer. return tpidr; } @@ -87,6 +103,26 @@ void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) { auto* fpctx = GetFloatingPointState(host_ctx); // Save all guest registers except tpidr_el0. +#ifdef __APPLE__ + std::memcpy(guest_ctx->cpu_registers.data(), host_ctx->__ss.__r, sizeof(host_ctx->__ss.__r)); + std::memcpy(guest_ctx->vector_registers.data(), fpctx->__v, sizeof(fpctx->__v)); + guest_ctx->fpsr = fpctx->__fpsr; + guest_ctx->fpcr = fpctx->__fpcr; + guest_ctx->pstate = static_cast(host_ctx->__ss.__pstate); + guest_ctx->pc = host_ctx->__ss.__pc; + guest_ctx->sp = host_ctx->__ss.__sp; + // Restore stack pointer. + host_ctx->__ss.__sp = guest_ctx->host_ctx.host_sp; + // Restore host callee-saved registers. + std::memcpy(&host_ctx->__ss.__r[19], guest_ctx->host_ctx.host_saved_regs.data(), + sizeof(guest_ctx->host_ctx.host_saved_regs)); + std::memcpy(&fpctx->__v[8], guest_ctx->host_ctx.host_saved_vregs.data(), + sizeof(guest_ctx->host_ctx.host_saved_vregs)); + // Return from the call on exit by setting pc to x30. + host_ctx->__ss.__pc = guest_ctx->host_ctx.host_saved_regs[11]; + // Clear esr_el1 and return it. + host_ctx->__ss.__r[0] = guest_ctx->esr_el1.exchange(0); +#else std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs)); std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs)); guest_ctx->fpsr = fpctx->fpsr; @@ -103,12 +139,11 @@ void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) { sizeof(guest_ctx->host_ctx.host_saved_regs)); std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(), sizeof(guest_ctx->host_ctx.host_saved_vregs)); - // Return from the call on exit by setting pc to x30. host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11]; - // Clear esr_el1 and return it. host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0); +#endif } bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h index 8ea4383f73..88ddcb3594 100644 --- a/src/core/arm/nce/arm_nce_asm_definitions.h +++ b/src/core/arm/nce/arm_nce_asm_definitions.h @@ -5,13 +5,20 @@ #define __ASSEMBLY__ +#ifdef __APPLE__ +/* https://cpip.readthedocs.io/en/stable/_static/dictobject.c/signal.h_bbe000f9714f274340a28e000a369354.html */ +#define ReturnToRunCodeByExceptionLevelChangeSignal 31 +#define BreakFromRunCodeSignal 16 +#define GuestAccessFaultSignal 11 +#define GuestAlignmentFaultSignal 10 +#else #include #include - #define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2 #define BreakFromRunCodeSignal SIGURG #define GuestAccessFaultSignal SIGSEGV #define GuestAlignmentFaultSignal SIGBUS +#endif #define GuestContextSp 0xF8 #define GuestContextHostContext 0x320 diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index bbe0289f8e..93a710aead 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -790,24 +790,40 @@ bool InterpreterVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3 return this->SIMDOffset(scale, shift, opc_0, Rm, option, Rn, Vt); } +#ifdef __APPLE__ std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, - fpsimd_context* fpsimd_context) { + _STRUCT_ARM_NEON_STATE64* fpctx) { std::span regs(reinterpret_cast(context->regs), 31); - std::span vregs(reinterpret_cast(fpsimd_context->vregs), 32); + std::span vregs(reinterpret_cast(fpctx->__v), 32); u64& sp = *reinterpret_cast(&context->sp); const u64& pc = *reinterpret_cast(&context->pc); - InterpreterVisitor visitor(memory, regs, vregs, sp, pc); u32 instruction = memory.Read32(pc); bool was_executed = false; - if (auto decoder = Dynarmic::A64::Decode(instruction)) { was_executed = decoder->get().call(visitor, instruction); } else { LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction); } - return was_executed ? std::optional(pc + 4) : std::nullopt; } +#else +std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, + fpsimd_context* fpctx) { + std::span regs(reinterpret_cast(context->regs), 31); + std::span vregs(reinterpret_cast(fpctx->vregs), 32); + u64& sp = *reinterpret_cast(&context->sp); + const u64& pc = *reinterpret_cast(&context->pc); + InterpreterVisitor visitor(memory, regs, vregs, sp, pc); + u32 instruction = memory.Read32(pc); + bool was_executed = false; + if (auto decoder = Dynarmic::A64::Decode(instruction)) { + was_executed = decoder->get().call(visitor, instruction); + } else { + LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction); + } + return was_executed ? std::optional(pc + 4) : std::nullopt; +} +#endif } // namespace Core diff --git a/src/core/arm/nce/interpreter_visitor.h b/src/core/arm/nce/interpreter_visitor.h index daae204310..051005c467 100644 --- a/src/core/arm/nce/interpreter_visitor.h +++ b/src/core/arm/nce/interpreter_visitor.h @@ -105,7 +105,12 @@ private: const u64& m_pc; }; +#ifdef __APPLE__ std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, - fpsimd_context* fpsimd_context); + _STRUCT_ARM_NEON_STATE64* fpctx); +#else +std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, + fpsimd_context* fpctx); +#endif } // namespace Core