From 67902c01cac65f53cec5704e47157e1520a3f037 Mon Sep 17 00:00:00 2001 From: jbm11208 <81182113+jbm11208@users.noreply.github.com> Date: Tue, 20 May 2025 12:00:20 -0400 Subject: [PATCH 1/2] Shader JIT Multithreading --- src/video_core/shader/shader_jit.cpp | 125 ++++++++++++++++++++++++--- src/video_core/shader/shader_jit.h | 28 +++++- 2 files changed, 138 insertions(+), 15 deletions(-) diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp index 644586226..ff6119db5 100644 --- a/src/video_core/shader/shader_jit.cpp +++ b/src/video_core/shader/shader_jit.cpp @@ -1,4 +1,4 @@ -// Copyright 2016 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -16,11 +16,79 @@ #if CITRA_ARCH(x86_64) #include "video_core/shader/shader_jit_x64_compiler.h" #endif +#include namespace Pica::Shader { -JitEngine::JitEngine() = default; -JitEngine::~JitEngine() = default; +JitEngine::JitEngine() { + stub_shader = std::make_unique(); + // Optionally, compile a minimal stub shader here if needed + StartThreadPool(std::thread::hardware_concurrency()); +} + +JitEngine::~JitEngine() { + StopThreadPool(); +} + +void JitEngine::StartThreadPool(size_t num_threads) { + stop_threads = false; + for (size_t i = 0; i < num_threads; ++i) { + thread_pool.emplace_back([this]() { ThreadWorker(); }); + } +} + +void JitEngine::StopThreadPool() { + { + std::lock_guard lock(queue_mutex); + stop_threads = true; + } + queue_cv.notify_all(); + for (auto& t : thread_pool) { + if (t.joinable()) + t.join(); + } + thread_pool.clear(); +} + +void JitEngine::ThreadWorker() { + while (true) { + std::function job; + { + std::unique_lock lock(queue_mutex); + queue_cv.wait(lock, [this]() { return stop_threads || !compile_queue.empty(); }); + if (stop_threads && compile_queue.empty()) + return; + job = std::move(compile_queue.front()); + compile_queue.pop(); + } + job(); + } +} + +void JitEngine::EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy) { + // WARNING: Copying ShaderSetup across threads may be unsafe if it contains raw pointers or + // non-trivial resources. Consider refactoring to only copy the necessary data for compilation. + auto promise = std::make_shared>>(); + { + std::lock_guard lock(queue_mutex); + compile_queue.emplace([this, cache_key, setup_copy, promise]() mutable { + auto shader = std::make_unique(); + shader->Compile(&setup_copy.program_code, &setup_copy.swizzle_data); + { + std::lock_guard lock2(cache_mutex); + if (cache.size() >= MAX_CACHE_SIZE) { + EvictLRU(); + } + promise->set_value(std::move(shader)); + cache[cache_key] = promise->get_future().share(); + lru_list.push_front(cache_key); + } + }); + // Store the future in the cache immediately so SetupBatch can wait on it + cache[cache_key] = promise->get_future().share(); + } + queue_cv.notify_one(); +} void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); @@ -28,23 +96,54 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { const u64 code_hash = setup.GetProgramCodeHash(); const u64 swizzle_hash = setup.GetSwizzleDataHash(); - const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash); - auto iter = cache.find(cache_key); - if (iter != cache.end()) { - setup.cached_shader = iter->second.get(); - } else { - auto shader = std::make_unique(); - shader->Compile(&setup.program_code, &setup.swizzle_data); - setup.cached_shader = shader.get(); - cache.emplace_hint(iter, cache_key, std::move(shader)); + + std::shared_future> shader_future; + { + std::lock_guard lock(cache_mutex); + auto iter = cache.find(cache_key); + if (iter != cache.end()) { + shader_future = iter->second; + UpdateLRU(cache_key); + } else { + // Compile synchronously and store the result + auto shader = std::make_unique(); + shader->Compile(&setup.program_code, &setup.swizzle_data); + auto ready_future = std::make_shared>>(); + ready_future->set_value(std::move(shader)); + shader_future = ready_future->get_future().share(); + cache[cache_key] = shader_future; + lru_list.push_front(cache_key); + } } + // Wait for the shader to be ready (if compiling in background) + setup.cached_shader = shader_future.get().get(); +} + +void JitEngine::EvictLRU() { + if (lru_list.empty()) { + return; + } + const u64 key = lru_list.back(); + lru_list.pop_back(); + cache.erase(key); +} + +void JitEngine::UpdateLRU(u64 key) { + auto it = std::find(lru_list.begin(), lru_list.end(), key); + if (it != lru_list.end()) { + lru_list.erase(it); + } + lru_list.push_front(key); } MICROPROFILE_DECLARE(GPU_Shader); void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const { - ASSERT(setup.cached_shader != nullptr); + // Null check: skip draw if shader is not ready + if (!setup.cached_shader) { + return; + } MICROPROFILE_SCOPE(GPU_Shader); diff --git a/src/video_core/shader/shader_jit.h b/src/video_core/shader/shader_jit.h index 2f3e77b02..82ead6939 100644 --- a/src/video_core/shader/shader_jit.h +++ b/src/video_core/shader/shader_jit.h @@ -1,4 +1,4 @@ -// Copyright 2016 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -7,8 +7,14 @@ #include "common/arch.h" #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64) +#include +#include +#include #include +#include +#include #include +#include #include "common/common_types.h" #include "video_core/shader/shader.h" @@ -25,7 +31,25 @@ public: void Run(const ShaderSetup& setup, ShaderUnit& state) const override; private: - std::unordered_map> cache; + static constexpr size_t MAX_CACHE_SIZE = 1000; // Maximum number of shaders to cache + std::unordered_map>> cache; + std::list lru_list; // Track LRU order of shaders + mutable std::mutex cache_mutex; + + // Parallel compilation support + std::vector thread_pool; + std::queue> compile_queue; + std::mutex queue_mutex; + std::condition_variable queue_cv; + bool stop_threads = false; + std::unique_ptr stub_shader; + + void EvictLRU(); + void UpdateLRU(u64 key); + void ThreadWorker(); + void EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy); + void StartThreadPool(size_t num_threads); + void StopThreadPool(); }; } // namespace Pica::Shader From 477d673235da3e0e3815b5a1014b550eecee0a80 Mon Sep 17 00:00:00 2001 From: jbm11208 <81182113+jbm11208@users.noreply.github.com> Date: Wed, 21 May 2025 08:31:31 -0400 Subject: [PATCH 2/2] Remove Unused Function --- src/video_core/shader/shader_jit.cpp | 25 ------------------------- src/video_core/shader/shader_jit.h | 1 - 2 files changed, 26 deletions(-) diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp index ff6119db5..72af05a85 100644 --- a/src/video_core/shader/shader_jit.cpp +++ b/src/video_core/shader/shader_jit.cpp @@ -65,31 +65,6 @@ void JitEngine::ThreadWorker() { } } -void JitEngine::EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy) { - // WARNING: Copying ShaderSetup across threads may be unsafe if it contains raw pointers or - // non-trivial resources. Consider refactoring to only copy the necessary data for compilation. - auto promise = std::make_shared>>(); - { - std::lock_guard lock(queue_mutex); - compile_queue.emplace([this, cache_key, setup_copy, promise]() mutable { - auto shader = std::make_unique(); - shader->Compile(&setup_copy.program_code, &setup_copy.swizzle_data); - { - std::lock_guard lock2(cache_mutex); - if (cache.size() >= MAX_CACHE_SIZE) { - EvictLRU(); - } - promise->set_value(std::move(shader)); - cache[cache_key] = promise->get_future().share(); - lru_list.push_front(cache_key); - } - }); - // Store the future in the cache immediately so SetupBatch can wait on it - cache[cache_key] = promise->get_future().share(); - } - queue_cv.notify_one(); -} - void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.entry_point = entry_point; diff --git a/src/video_core/shader/shader_jit.h b/src/video_core/shader/shader_jit.h index 82ead6939..76b54b609 100644 --- a/src/video_core/shader/shader_jit.h +++ b/src/video_core/shader/shader_jit.h @@ -47,7 +47,6 @@ private: void EvictLRU(); void UpdateLRU(u64 key); void ThreadWorker(); - void EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy); void StartThreadPool(size_t num_threads); void StopThreadPool(); };