diff --git a/CMakeLists.txt b/CMakeLists.txt index b61bb49359..80f162a3ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,6 +183,8 @@ set(YUZU_QT_MIRROR "" CACHE STRING "What mirror to use for downloading the bundl option(ENABLE_CUBEB "Enables the cubeb audio backend" ON) +# See https://github.com/llvm/llvm-project/issues/123946 +# OpenBSD va_list doesn't play nice with precompiled headers set(EXT_DEFAULT OFF) if (MSVC OR ANDROID) set(EXT_DEFAULT ON) @@ -357,6 +359,7 @@ if (YUZU_LEGACY) add_compile_definitions(YUZU_LEGACY) endif() +# TODO: APPLE if (ARCHITECTURE_arm64 AND (ANDROID OR PLATFORM_LINUX)) set(HAS_NCE 1) add_compile_definitions(HAS_NCE=1) @@ -502,7 +505,6 @@ if (YUZU_USE_CPM) # Opus AddJsonPackage(opus) - if (Opus_ADDED) if (MSVC AND CXX_CLANG) target_compile_options(opus PRIVATE diff --git a/CMakeModules/CPMUtil.cmake b/CMakeModules/CPMUtil.cmake index 3d7b84c029..6419e69511 100644 --- a/CMakeModules/CPMUtil.cmake +++ b/CMakeModules/CPMUtil.cmake @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: Copyright 2025 crueter # SPDX-License-Identifier: GPL-3.0-or-later +cmake_minimum_required(VERSION 3.22) + if (MSVC OR ANDROID) set(BUNDLED_DEFAULT ON) else() @@ -13,7 +15,6 @@ option(CPMUTIL_FORCE_BUNDLED option(CPMUTIL_FORCE_SYSTEM "Force system packages for all CPM dependencies (NOT RECOMMENDED)" OFF) -cmake_minimum_required(VERSION 3.22) include(CPM) # cpmfile parsing diff --git a/docs/CrossCompileARM64.md b/docs/CrossCompileARM64.md new file mode 100644 index 0000000000..003c2aa826 --- /dev/null +++ b/docs/CrossCompileARM64.md @@ -0,0 +1,8 @@ +# Cross compile ARM64 + +A painless guide for cross compilation (or to test NCE) from a x86_64 system without polluting your main. + +- Install QEMU: `sudo pkg install qemu` +- Download Debian 13: `wget https://cdimage.debian.org/debian-cd/current/arm64/iso-cd/debian-13.0.0-arm64-netinst.iso` +- Create a system disk: `qemu-img create -f qcow2 debian-13-arm64-ci.qcow2 30G` +- Run the VM: `qemu-system-aarch64 -M virt -m 2G -cpu max -bios /usr/local/share/qemu/edk2-aarch64-code.fd -drive if=none,file=debian-13.0.0-arm64-netinst.iso,format=raw,id=cdrom -device scsi-cd,drive=cdrom -drive if=none,file=debian-13-arm64-ci.qcow2,id=hd0,format=qcow2 -device virtio-blk-device,drive=hd0 -device virtio-gpu-pci -device usb-ehci -device usb-kbd -device intel-hda -device hda-output -nic user,model=virtio-net-pci` diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2bd74503f4..988e8a0a91 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -275,4 +275,13 @@ if(YUZU_USE_PRECOMPILED_HEADERS) target_precompile_headers(common PRIVATE precompiled_headers.h) endif() +# IOPS (needed for power state) requires linking to IOKit +if (APPLE) + find_library(IOKIT_LIBRARY IOKit) + if(NOT IOKIT_LIBRARY) + message(FATAL_ERROR "IOKit not found, did you install XCode tools?") + endif() + target_link_libraries(common PRIVATE ${IOKIT_LIBRARY}) +endif() + create_target_directory_groups(common) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 3838c12903..c2fdd2f194 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #endif // FreeBSD @@ -396,21 +397,68 @@ private: #elif defined(__linux__) || defined(__FreeBSD__) || defined(__sun__) || defined(__APPLE__) // ^^^ Windows ^^^ vvv POSIX vvv -#ifdef ARCHITECTURE_arm64 - +// Use NCE friendly mapping techniques +#if defined(ARCHITECTURE_arm64) && defined(HAS_NCE) static void* ChooseVirtualBase(size_t virtual_size) { - constexpr uintptr_t Map39BitSize = (1ULL << 39); constexpr uintptr_t Map36BitSize = (1ULL << 36); - - // This is not a cryptographic application, we just want something random. - std::mt19937_64 rng; - +#ifdef __APPLE__ + // TODO: Fatal flaw, regions may change if map inserts elements, very rare, but MAY HAPPEN! + std::map aspace_list; + kern_return_t krc = KERN_SUCCESS; + vm_address_t address = 0; + vm_size_t r_size = 0; + uint32_t depth = 1; + do { + struct vm_region_submap_info_64 info; + mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; + krc = vm_region_recurse_64(mach_task_self(), &address, &r_size, &depth, (vm_region_info_64_t)&info, &count); + if (krc == KERN_INVALID_ADDRESS) + break; + if (info.is_submap){ + depth++; + } else { + aspace_list.insert({ u64(address), u64(r_size) }); + address += r_size; + } + } while(1); + for (auto it = aspace_list.begin(); it != aspace_list.end(); it++) { + auto const next = std::next(it); + // properties of hole + auto const addr = it->first + it->second; + auto const size = (next != aspace_list.end()) ? next->first - addr : std::numeric_limits::max() - addr; + ASSERT(next == aspace_list.end() || it->first < next->first); + if (size > virtual_size && uintptr_t(addr + size) >= Map36BitSize) { + // valid address for NCE + if (uintptr_t(addr) >= Map36BitSize) { //common case: hole after 39 bit + if (size >= virtual_size) { + void* p = mmap(reinterpret_cast(addr), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) + continue; + return p; + } + // skip + } else { //edge case: hole before 39 bit + auto const rem_size = size - (Map36BitSize - uintptr_t(addr)); + if (rem_size >= virtual_size) { + void* p = mmap(reinterpret_cast(Map36BitSize), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) + continue; + return p; + } + // skip + } + } + } + UNREACHABLE(); +#else + constexpr uintptr_t Map39BitSize = (1ULL << 39); // We want to ensure we are allocating at an address aligned to the L2 block size. // For Qualcomm devices, we must also allocate memory above 36 bits. const size_t lower = Map36BitSize / HugePageSize; const size_t upper = (Map39BitSize - virtual_size) / HugePageSize; const size_t range = upper - lower; - + // This is not a cryptographic application, we just want something random. + std::mt19937_64 rng; // Try up to 64 times to allocate memory at random addresses in the range. for (int i = 0; i < 64; i++) { // Calculate a possible location. @@ -434,10 +482,9 @@ static void* ChooseVirtualBase(size_t virtual_size) { } return MAP_FAILED; +#endif } - #else - static void* ChooseVirtualBase(size_t virtual_size) { #if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__sun__) || defined(__HAIKU__) || defined(__managarm__) || defined(__AIX__) void* virtual_base = mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_ALIGNED_SUPER, -1, 0); @@ -446,7 +493,6 @@ static void* ChooseVirtualBase(size_t virtual_size) { #endif return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); } - #endif #if defined(__sun__) || defined(__HAIKU__) || defined(__NetBSD__) || defined(__DragonFly__) @@ -500,9 +546,10 @@ class HostMemory::Impl { public: explicit Impl(size_t backing_size_, size_t virtual_size_) : backing_size{backing_size_}, virtual_size{virtual_size_} { - long page_size = sysconf(_SC_PAGESIZE); - ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging", - page_size); + // TODO: Solve all 4k paging issues + //long page_size = sysconf(_SC_PAGESIZE); + //ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging", + // page_size); // Backing memory initialization #if defined(__sun__) || defined(__HAIKU__) || defined(__NetBSD__) || defined(__DragonFly__) fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600); @@ -559,14 +606,19 @@ public: if (True(perms & MemoryPermission::Write)) { flags |= PROT_WRITE; } -#ifdef ARCHITECTURE_arm64 + // W^X and stuff only supported with NCE +#if defined(ARCHITECTURE_arm64) && defined(HAS_NCE) if (True(perms & MemoryPermission::Execute)) { flags |= PROT_EXEC; } #endif - void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd, - host_offset); +#if defined(ARCHITECTURE_arm64) && defined(__APPLE__) + length = ((length / 16384) + 1) * 16384; + void* ret = mmap(virtual_base + (virtual_offset & ~16383), length, flags, MAP_SHARED | MAP_FIXED, fd, host_offset); +#else + void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd, host_offset); +#endif ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); } diff --git a/src/common/signal_chain.cpp b/src/common/signal_chain.cpp index 2e4fecc482..261e02c341 100644 --- a/src/common/signal_chain.cpp +++ b/src/common/signal_chain.cpp @@ -10,27 +10,16 @@ namespace Common { +#ifdef __ANDROID__ template T* LookupLibcSymbol(const char* name) { -#if defined(__BIONIC__) Common::DynamicLibrary provider("libc.so"); - if (!provider.IsOpen()) { - UNREACHABLE_MSG("Failed to open libc!"); - } -#else - // For other operating environments, we assume the symbol is not overridden. - const char* base = nullptr; - Common::DynamicLibrary provider(base); -#endif - + ASSERT_MSG(provider.IsOpen(), "Failed to open libc!"); void* sym = provider.GetSymbolAddress(name); if (sym == nullptr) { sym = dlsym(RTLD_DEFAULT, name); } - if (sym == nullptr) { - UNREACHABLE_MSG("Unable to find symbol {}!", name); - } - + ASSERT_MSG(sym != nullptr, "Unable to find symbol {}!", name); return reinterpret_cast(sym); } @@ -38,5 +27,10 @@ int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) static auto libc_sigaction = LookupLibcSymbol("sigaction"); return libc_sigaction(signum, act, oldact); } +#else +int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) { + return sigaction(signum, act, oldact); +} +#endif } // namespace Common diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 0e0d72fc8a..531bcc8a33 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -13,12 +13,14 @@ #include "core/arm/nce/patcher.h" #include "core/core.h" #include "core/memory.h" - #include "core/hle/kernel/k_process.h" +#include "dynarmic/common/context.h" + #include #include #include +#include namespace Core { @@ -33,95 +35,67 @@ static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0Nat static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock); static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic); -fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { - _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); - while (header->magic != FPSIMD_MAGIC) { - header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); - } - return reinterpret_cast(header); -} - using namespace Common::Literals; constexpr u32 StackSize = 128_KiB; } // namespace void* ArmNce::RestoreGuestContext(void* raw_context) { - // Retrieve the host context. - auto& host_ctx = static_cast(raw_context)->uc_mcontext; - - // Thread-local parameters will be located in x9. - auto* tpidr = reinterpret_cast(host_ctx.regs[9]); - auto* guest_ctx = static_cast(tpidr->native_context); - - // Retrieve the host floating point state. - auto* fpctx = GetFloatingPointState(host_ctx); - - // Save host callee-saved registers. - std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8], - sizeof(guest_ctx->host_ctx.host_saved_vregs)); - std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19], - sizeof(guest_ctx->host_ctx.host_saved_regs)); - - // Save stack pointer. - guest_ctx->host_ctx.host_sp = host_ctx.sp; - + CTX_DECLARE(raw_context); // Restore all guest state except tpidr_el0. - host_ctx.sp = guest_ctx->sp; - host_ctx.pc = guest_ctx->pc; - host_ctx.pstate = guest_ctx->pstate; - fpctx->fpcr = guest_ctx->fpcr; - fpctx->fpsr = guest_ctx->fpsr; - std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs)); - std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs)); - + // Thread-local parameters will be located in x9. + auto* tpidr = reinterpret_cast(CTX_X(9)); + auto* guest_ctx = static_cast(tpidr->native_context); + // Save host callee-saved registers. + std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &CTX_Q(8), + sizeof(guest_ctx->host_ctx.host_saved_vregs)); + // Save stack pointer. + guest_ctx->host_ctx.host_sp = CTX_SP; + CTX_PC = guest_ctx->sp; + CTX_SP = guest_ctx->pc; + CTX_PSTATE = guest_ctx->pstate; + CTX_FPCR = guest_ctx->fpcr; + CTX_FPSR = guest_ctx->fpsr; + std::memcpy(&CTX_X(0), guest_ctx->cpu_registers.data(), sizeof(guest_ctx->cpu_registers)); + std::memcpy(&CTX_Q(0), guest_ctx->vector_registers.data(), sizeof(guest_ctx->vector_registers)); // Return the new thread-local storage pointer. return tpidr; } void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) { - // Retrieve the host context. - auto& host_ctx = static_cast(raw_context)->uc_mcontext; - - // Retrieve the host floating point state. - auto* fpctx = GetFloatingPointState(host_ctx); - + CTX_DECLARE(raw_context); // Save all guest registers except tpidr_el0. - std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs)); - std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs)); - guest_ctx->fpsr = fpctx->fpsr; - guest_ctx->fpcr = fpctx->fpcr; - guest_ctx->pstate = static_cast(host_ctx.pstate); - guest_ctx->pc = host_ctx.pc; - guest_ctx->sp = host_ctx.sp; - + std::memcpy(guest_ctx->cpu_registers.data(), &CTX_X(0), sizeof(guest_ctx->cpu_registers)); + std::memcpy(guest_ctx->vector_registers.data(), &CTX_Q(0), sizeof(guest_ctx->vector_registers)); + guest_ctx->fpsr = CTX_FPSR; + guest_ctx->fpcr = CTX_FPCR; + guest_ctx->pc = CTX_PC; + guest_ctx->sp = CTX_SP; + guest_ctx->pstate = u32(CTX_PSTATE); // Restore stack pointer. - host_ctx.sp = guest_ctx->host_ctx.host_sp; + CTX_SP = guest_ctx->host_ctx.host_sp; // Restore host callee-saved registers. - std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(), + std::memcpy(&CTX_X(19), guest_ctx->host_ctx.host_saved_regs.data(), sizeof(guest_ctx->host_ctx.host_saved_regs)); - std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(), + std::memcpy(&CTX_Q(8), guest_ctx->host_ctx.host_saved_vregs.data(), sizeof(guest_ctx->host_ctx.host_saved_vregs)); - // Return from the call on exit by setting pc to x30. - host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11]; - + CTX_PC = guest_ctx->host_ctx.host_saved_regs[11]; // Clear esr_el1 and return it. - host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0); + CTX_X(0) = guest_ctx->esr_el1.exchange(0); } bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { - auto& host_ctx = static_cast(raw_context)->uc_mcontext; + CTX_DECLARE(raw_context); auto* info = static_cast(raw_info); // We can't handle the access, so determine why we crashed. - const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast(info->si_addr); - + auto const is_prefetch_abort = CTX_PC == reinterpret_cast(info->si_addr); // For data aborts, skip the instruction and return to guest code. // This will allow games to continue in many scenarios where they would otherwise crash. if (!is_prefetch_abort) { - host_ctx.pc += 4; + CTX_PC += 4; return true; } @@ -142,17 +116,13 @@ bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, voi } bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { - auto& host_ctx = static_cast(raw_context)->uc_mcontext; - auto* fpctx = GetFloatingPointState(host_ctx); + CTX_DECLARE(raw_context); auto& memory = guest_ctx->system->ApplicationMemory(); - // Match and execute an instruction. - auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); - if (next_pc) { - host_ctx.pc = *next_pc; + if (auto next_pc = MatchAndExecuteOneInstruction(memory, raw_context); next_pc) { + CTX_PC = *next_pc; return true; } - // We couldn't handle the access. return HandleFailedGuestFault(guest_ctx, raw_info, raw_context); } @@ -275,9 +245,51 @@ ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index) ArmNce::~ArmNce() = default; +// Borrowed from libusb +static unsigned int posix_gettid(void) { + static thread_local unsigned int tl_tid; + int tid; + if (tl_tid) + return tl_tid; +#if defined(__ANDROID__) + tid = gettid(); +#elif defined(__APPLE__) +#ifdef HAVE_PTHREAD_THREADID_NP + uint64_t thread_id; + if (pthread_threadid_np(NULL, &thread_id) == 0) + tid = (int)thread_id; + else + tid = -1; +#else + tid = (int)pthread_mach_thread_np(pthread_self()); +#endif +#elif defined(__HAIKU__) + tid = get_pthread_thread_id(pthread_self()); +#elif defined(__linux__) + tid = (int)syscall(SYS_gettid); +#elif defined(__NetBSD__) + tid = _lwp_self(); +#elif defined(__OpenBSD__) + /* The following only works with OpenBSD > 5.1 as it requires + * real thread support. For 5.1 and earlier, -1 is returned. */ + tid = syscall(SYS_getthrid); +#elif defined(__sun__) + tid = _lwp_self(); +#else + tid = -1; +#endif + if (tid == -1) { + /* If we don't have a thread ID, at least return a unique + * value that can be used to distinguish individual + * threads. */ + tid = (int)(intptr_t)pthread_self(); + } + return tl_tid = (unsigned int)tid; +} + void ArmNce::Initialize() { if (m_thread_id == -1) { - m_thread_id = gettid(); + m_thread_id = posix_gettid(); } // Configure signal stack. @@ -308,7 +320,7 @@ void ArmNce::Initialize() { &ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler); return_to_run_code_action.sa_mask = signal_mask; Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action, - nullptr); + nullptr); struct sigaction break_from_run_code_action {}; break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -378,7 +390,11 @@ void ArmNce::SignalInterrupt(Kernel::KThread* thread) { if (params->is_running) { // We should signal to the running thread. // The running thread will unlock the thread context. +#ifdef __linux__ syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal); +#else + pthread_kill(pthread_t(m_thread_id), int(BreakFromRunCodeSignal)); +#endif } else { // If the thread is no longer running, we have nothing to do. UnlockThreadParameters(params); diff --git a/src/core/arm/nce/arm_nce.s b/src/core/arm/nce/arm_nce.s index c68c059491..33e7c93c43 100644 --- a/src/core/arm/nce/arm_nce.s +++ b/src/core/arm/nce/arm_nce.s @@ -9,10 +9,15 @@ /* static HaltReason Core::ArmNce::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy +__ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy: +#else .section .text._ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits -.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm .type _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function +.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm: +#endif /* Back up host sp to x3. */ /* Back up host tpidr_el0 to x4. */ mov x3, sp @@ -50,38 +55,52 @@ _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm: /* static HaltReason Core::ArmNce::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv +__ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv: +#else .section .text._ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits -.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv .type _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, %function +.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv: +#endif /* This jumps to the signal handler, which will restore the entire context. */ - /* On entry, x0 = thread id, which is already in the right place. */ - - /* Move tpidr to x9 so it is not trampled. */ - mov x9, x1 - - /* Set up arguments. */ - mov x8, #(__NR_tkill) + /* On entry, x0 = thread id, which is already in the right place. Even on macOS. */ + mov x9, x1 /* Move tpidr to x9 so it is not trampled. */ mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal) - - /* Tail call the signal handler. */ - svc #0 - - /* Block execution from flowing here. */ - brk #1000 - +#ifdef __APPLE__ + /* I can never be happy, why no tkill in mach kernel? Ugh ... */ + /* Signature: 328 AUE_PTHREADKILL ALL { int __pthread_kill(int thread_port, int sig); } */ + mov x16, #(328) + svc #0x80 /* Tail call the signal handler. */ + brk #0xF000 /* See: https://discourse.llvm.org/t/stepping-over-a-brk-instruction-on-arm64/69766/7 */ +#else + /* Signature: int tgkill(pid_t tgid, pid_t tid, int sig); */ + mov x8, #(__NR_tkill) + svc #0 /* Tail call the signal handler. */ + brk #1000 /* Block execution from flowing here. */ +#endif /* static void Core::ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_ +__ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: +#else .section .text._ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits -.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_ .type _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function +.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: +#endif stp x29, x30, [sp, #-0x10]! mov x29, sp /* Call the context restorer with the raw context. */ mov x0, x2 +#ifdef __APPLE__ + bl __ZN4Core6ArmNce19RestoreGuestContextEPv +#else bl _ZN4Core6ArmNce19RestoreGuestContextEPv +#endif /* Save the old value of tpidr_el0. */ mrs x8, tpidr_el0 @@ -92,7 +111,11 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: msr tpidr_el0, x0 /* Unlock the context. */ +#ifdef __APPLE__ + bl __ZN4Core6ArmNce22UnlockThreadParametersEPv +#else bl _ZN4Core6ArmNce22UnlockThreadParametersEPv +#endif /* Returning from here will enter the guest. */ ldp x29, x30, [sp], #0x10 @@ -100,10 +123,15 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: /* static void Core::ArmNce::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_ +__ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_: +#else .section .text._ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits -.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_ .type _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, %function +.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_: +#endif /* Check to see if we have the correct TLS magic. */ mrs x8, tpidr_el0 ldr w9, [x8, #(TpidrEl0TlsMagic)] @@ -121,7 +149,11 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_: /* Tail call the restorer. */ mov x1, x2 +#ifdef __APPLE__ + b __ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv +#else b _ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv +#endif /* Returning from here will enter host code. */ @@ -131,10 +163,15 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_: /* static void Core::ArmNce::GuestAlignmentFaultSignalHandler(int sig, void* info, void* raw_context) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_ +__ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_: +#else .section .text._ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, "ax", %progbits -.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_ .type _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, %function +.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_: +#endif /* Check to see if we have the correct TLS magic. */ mrs x8, tpidr_el0 ldr w9, [x8, #(TpidrEl0TlsMagic)] @@ -146,7 +183,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_: /* Incorrect TLS magic, so this is a host fault. */ /* Tail call the handler. */ +#ifdef __APPLE__ + b __ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_ +#else b _ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_ +#endif 1: /* Correct TLS magic, so this is a guest fault. */ @@ -163,7 +204,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_: msr tpidr_el0, x3 /* Call the handler. */ +#ifdef __APPLE__ + bl __ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_ +#else bl _ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_ +#endif /* If the handler returned false, we want to preserve the host tpidr_el0. */ cbz x0, 2f @@ -177,10 +222,15 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_: ret /* static void Core::ArmNce::GuestAccessFaultSignalHandler(int sig, void* info, void* raw_context) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_ +__ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_: +#else .section .text._ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, "ax", %progbits -.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_ .type _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, %function +.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_: +#endif /* Check to see if we have the correct TLS magic. */ mrs x8, tpidr_el0 ldr w9, [x8, #(TpidrEl0TlsMagic)] @@ -192,7 +242,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_: /* Incorrect TLS magic, so this is a host fault. */ /* Tail call the handler. */ +#ifdef __APPLE__ + b __ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_ +#else b _ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_ +#endif 1: /* Correct TLS magic, so this is a guest fault. */ @@ -209,7 +263,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_: msr tpidr_el0, x3 /* Call the handler. */ +#ifdef __APPLE__ + bl __ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_ +#else bl _ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_ +#endif /* If the handler returned false, we want to preserve the host tpidr_el0. */ cbz x0, 2f @@ -224,10 +282,15 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_: /* static void Core::ArmNce::LockThreadParameters(void* tpidr) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce20LockThreadParametersEPv +__ZN4Core6ArmNce20LockThreadParametersEPv: +#else .section .text._ZN4Core6ArmNce20LockThreadParametersEPv, "ax", %progbits -.global _ZN4Core6ArmNce20LockThreadParametersEPv .type _ZN4Core6ArmNce20LockThreadParametersEPv, %function +.global _ZN4Core6ArmNce20LockThreadParametersEPv _ZN4Core6ArmNce20LockThreadParametersEPv: +#endif /* Offset to lock member. */ add x0, x0, #(TpidrEl0Lock) @@ -252,10 +315,15 @@ _ZN4Core6ArmNce20LockThreadParametersEPv: /* static void Core::ArmNce::UnlockThreadParameters(void* tpidr) */ +#ifdef __APPLE__ +.global __ZN4Core6ArmNce22UnlockThreadParametersEPv +__ZN4Core6ArmNce22UnlockThreadParametersEPv: +#else .section .text._ZN4Core6ArmNce22UnlockThreadParametersEPv, "ax", %progbits -.global _ZN4Core6ArmNce22UnlockThreadParametersEPv .type _ZN4Core6ArmNce22UnlockThreadParametersEPv, %function +.global _ZN4Core6ArmNce22UnlockThreadParametersEPv _ZN4Core6ArmNce22UnlockThreadParametersEPv: +#endif /* Offset to lock member. */ add x0, x0, #(TpidrEl0Lock) diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h index 8ea4383f73..120a3539fc 100644 --- a/src/core/arm/nce/arm_nce_asm_definitions.h +++ b/src/core/arm/nce/arm_nce_asm_definitions.h @@ -5,22 +5,24 @@ #define __ASSEMBLY__ +#ifdef __APPLE__ +/* https://cpip.readthedocs.io/en/stable/_static/dictobject.c/signal.h_bbe000f9714f274340a28e000a369354.html */ +#define ReturnToRunCodeByExceptionLevelChangeSignal 31 +#define BreakFromRunCodeSignal 16 +#define GuestAccessFaultSignal 11 +#define GuestAlignmentFaultSignal 10 +#else #include #include - #define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2 #define BreakFromRunCodeSignal SIGURG #define GuestAccessFaultSignal SIGSEGV #define GuestAlignmentFaultSignal SIGBUS +#endif #define GuestContextSp 0xF8 #define GuestContextHostContext 0x320 -#define HostContextSpTpidrEl0 0xE0 -#define HostContextTpidrEl0 0xE8 -#define HostContextRegs 0x0 -#define HostContextVregs 0x60 - #define TpidrEl0NativeContext 0x10 #define TpidrEl0Lock 0x18 #define TpidrEl0TlsMagic 0x20 @@ -28,3 +30,8 @@ #define SpinLockLocked 0 #define SpinLockUnlocked 1 + +#define HostContextSpTpidrEl0 0xE0 +#define HostContextTpidrEl0 0xE8 +#define HostContextRegs 0x0 +#define HostContextVregs 0x60 diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index bbe0289f8e..369b5ad37f 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -2,8 +2,9 @@ // SPDX-FileCopyrightText: Copyright 2023 merryhime // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/bit_cast.h" #include "core/arm/nce/interpreter_visitor.h" +#include "core/memory.h" +#include "dynarmic/common/context.h" namespace Core { @@ -790,24 +791,25 @@ bool InterpreterVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3 return this->SIMDOffset(scale, shift, opc_0, Rm, option, Rn, Vt); } -std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, - fpsimd_context* fpsimd_context) { - std::span regs(reinterpret_cast(context->regs), 31); - std::span vregs(reinterpret_cast(fpsimd_context->vregs), 32); - u64& sp = *reinterpret_cast(&context->sp); - const u64& pc = *reinterpret_cast(&context->pc); +std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context) { + CTX_DECLARE(raw_context); + std::span regs(reinterpret_cast(&CTX_X(0)), 31); + std::span vregs(reinterpret_cast(&CTX_Q(0)), 32); - InterpreterVisitor visitor(memory, regs, vregs, sp, pc); - u32 instruction = memory.Read32(pc); + // Store temporal to not break aliasing rules :) + u64 tmp_sp = CTX_SP; + u64 tmp_pc = CTX_PC; + u32 instruction = memory.Read32(tmp_pc); bool was_executed = false; - + InterpreterVisitor visitor(memory, regs, vregs, tmp_sp, tmp_pc); if (auto decoder = Dynarmic::A64::Decode(instruction)) { was_executed = decoder->get().call(visitor, instruction); } else { LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction); } - - return was_executed ? std::optional(pc + 4) : std::nullopt; + CTX_SP = tmp_sp; + CTX_PC = tmp_pc; + return was_executed ? std::optional(tmp_pc + 4) : std::nullopt; } } // namespace Core diff --git a/src/core/arm/nce/interpreter_visitor.h b/src/core/arm/nce/interpreter_visitor.h index daae204310..bdb27c26cb 100644 --- a/src/core/arm/nce/interpreter_visitor.h +++ b/src/core/arm/nce/interpreter_visitor.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -105,7 +106,6 @@ private: const u64& m_pc; }; -std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, - fpsimd_context* fpsimd_context); +std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context); } // namespace Core diff --git a/src/core/arm/nce/lru_cache.h b/src/core/arm/nce/lru_cache.h index 1bc00c8f14..4085aae28c 100644 --- a/src/core/arm/nce/lru_cache.h +++ b/src/core/arm/nce/lru_cache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "common/logging/log.h" diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index 9321258ae9..88bccf98ca 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -4,15 +4,14 @@ #include "common/arm64/native_clock.h" #include "common/bit_cast.h" #include "common/literals.h" -#include "core/arm/nce/arm_nce.h" #include "core/arm/nce/guest_context.h" #include "core/arm/nce/instructions.h" #include "core/arm/nce/patcher.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" -#include "core/memory.h" #include "core/hle/kernel/k_thread.h" +#include "core/memory.h" namespace Core::NCE { diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index f1f208179f..4566cea0b2 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -6,6 +6,8 @@ * SPDX-License-Identifier: 0BSD */ +#include "dynarmic/backend/exception_handler.h" + #include #include #include @@ -118,8 +120,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { + std::shared_lock guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); + if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->second.cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; @@ -130,8 +133,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { + std::shared_lock guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); + if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->second.cb(CTX_PC); CTX_PC = fc.call_pc; return; @@ -187,11 +191,11 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h index ea2c1ef251..0403d19f60 100644 --- a/src/dynarmic/src/dynarmic/common/context.h +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -119,6 +119,7 @@ # error "unimplemented" #endif +// TODO: FreeBSD/OpenBSD #ifdef ARCHITECTURE_arm64 #ifdef __APPLE__ inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp index e45f8e43fb..cad4ecbe21 100644 --- a/src/video_core/vulkan_common/vulkan_surface.cpp +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -29,14 +29,32 @@ vk::SurfaceKHR CreateSurface( } #elif defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::Cocoa) { - const VkMetalSurfaceCreateInfoEXT macos_ci = { + const VkMetalSurfaceCreateInfoEXT metal_ci = { + .sType = VK_STRUCTURE_TYPE_METAL_SURFACE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, .pLayer = static_cast(window_info.render_surface), }; - const auto vkCreateMetalSurfaceEXT = reinterpret_cast( - dld.vkGetInstanceProcAddr(*instance, "vkCreateMetalSurfaceEXT")); - if (!vkCreateMetalSurfaceEXT || - vkCreateMetalSurfaceEXT(*instance, &macos_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { - LOG_ERROR(Render_Vulkan, "Failed to initialize Metal surface"); + const auto vkCreateMetalSurfaceEXT = reinterpret_cast(dld.vkGetInstanceProcAddr(*instance, "vkCreateMetalSurfaceEXT")); + if (!vkCreateMetalSurfaceEXT || vkCreateMetalSurfaceEXT(*instance, &metal_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { +// TODO: Way to fallback? - where's my vulkan headers +#if 0 + // Attempt to make a macOS surface instead then... + // This is the deprecated VkMacOSSurfaceCreateInfoMVK(3) version; but should work if the above failed + // https://registry.khronos.org/vulkan/specs/latest/man/html/VkMacOSSurfaceCreateInfoMVK.html + const VkMacOSSurfaceCreateInfoMVK macos_legacy_ci = { + .sType = VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK, + .pNext = nullptr, + .flags = 0, + .pView = static_cast(window_info.render_surface), + }; + const auto vkCreateMacOSSurfaceMVK = reinterpret_cast(dld.vkGetInstanceProcAddr(*instance, "vkCreateMacOSSurfaceMVK")); + if (!vkCreateMacOSSurfaceMVK || vkCreateMacOSSurfaceMVK(*instance, &macos_legacy_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Metal/macOS surface"); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } +#endif + LOG_ERROR(Render_Vulkan, "Failed to initialize Metal/macOS surface"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } } diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index ee45b2a79a..8e49c75a9d 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -371,7 +371,7 @@ if (APPLE) if (YUZU_USE_BUNDLED_MOLTENVK) set(MOLTENVK_PLATFORM "macOS") - set(MOLTENVK_VERSION "v1.3.0") + set(MOLTENVK_VERSION "v1.4.0") download_moltenvk(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION}) endif() find_library(MOLTENVK_LIBRARY MoltenVK REQUIRED)