Move Shader JIT Multithreading to a Separate Branch
This commit is contained in:
parent
6e090f428c
commit
f831d9ed47
2 changed files with 15 additions and 138 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright Citra Emulator Project / Azahar Emulator Project
|
// Copyright 2016 Citra Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
|
@ -16,79 +16,11 @@
|
||||||
#if CITRA_ARCH(x86_64)
|
#if CITRA_ARCH(x86_64)
|
||||||
#include "video_core/shader/shader_jit_x64_compiler.h"
|
#include "video_core/shader/shader_jit_x64_compiler.h"
|
||||||
#endif
|
#endif
|
||||||
#include <future>
|
|
||||||
|
|
||||||
namespace Pica::Shader {
|
namespace Pica::Shader {
|
||||||
|
|
||||||
JitEngine::JitEngine() {
|
JitEngine::JitEngine() = default;
|
||||||
stub_shader = std::make_unique<JitShader>();
|
JitEngine::~JitEngine() = default;
|
||||||
// Optionally, compile a minimal stub shader here if needed
|
|
||||||
StartThreadPool(std::thread::hardware_concurrency());
|
|
||||||
}
|
|
||||||
|
|
||||||
JitEngine::~JitEngine() {
|
|
||||||
StopThreadPool();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::StartThreadPool(size_t num_threads) {
|
|
||||||
stop_threads = false;
|
|
||||||
for (size_t i = 0; i < num_threads; ++i) {
|
|
||||||
thread_pool.emplace_back([this]() { ThreadWorker(); });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::StopThreadPool() {
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(queue_mutex);
|
|
||||||
stop_threads = true;
|
|
||||||
}
|
|
||||||
queue_cv.notify_all();
|
|
||||||
for (auto& t : thread_pool) {
|
|
||||||
if (t.joinable())
|
|
||||||
t.join();
|
|
||||||
}
|
|
||||||
thread_pool.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::ThreadWorker() {
|
|
||||||
while (true) {
|
|
||||||
std::function<void()> job;
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
|
||||||
queue_cv.wait(lock, [this]() { return stop_threads || !compile_queue.empty(); });
|
|
||||||
if (stop_threads && compile_queue.empty())
|
|
||||||
return;
|
|
||||||
job = std::move(compile_queue.front());
|
|
||||||
compile_queue.pop();
|
|
||||||
}
|
|
||||||
job();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy) {
|
|
||||||
// WARNING: Copying ShaderSetup across threads may be unsafe if it contains raw pointers or
|
|
||||||
// non-trivial resources. Consider refactoring to only copy the necessary data for compilation.
|
|
||||||
auto promise = std::make_shared<std::promise<std::unique_ptr<JitShader>>>();
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(queue_mutex);
|
|
||||||
compile_queue.emplace([this, cache_key, setup_copy, promise]() mutable {
|
|
||||||
auto shader = std::make_unique<JitShader>();
|
|
||||||
shader->Compile(&setup_copy.program_code, &setup_copy.swizzle_data);
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock2(cache_mutex);
|
|
||||||
if (cache.size() >= MAX_CACHE_SIZE) {
|
|
||||||
EvictLRU();
|
|
||||||
}
|
|
||||||
promise->set_value(std::move(shader));
|
|
||||||
cache[cache_key] = promise->get_future().share();
|
|
||||||
lru_list.push_front(cache_key);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
// Store the future in the cache immediately so SetupBatch can wait on it
|
|
||||||
cache[cache_key] = promise->get_future().share();
|
|
||||||
}
|
|
||||||
queue_cv.notify_one();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
|
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
|
||||||
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
||||||
|
|
@ -96,54 +28,23 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
|
||||||
|
|
||||||
const u64 code_hash = setup.GetProgramCodeHash();
|
const u64 code_hash = setup.GetProgramCodeHash();
|
||||||
const u64 swizzle_hash = setup.GetSwizzleDataHash();
|
const u64 swizzle_hash = setup.GetSwizzleDataHash();
|
||||||
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
|
|
||||||
|
|
||||||
std::shared_future<std::unique_ptr<JitShader>> shader_future;
|
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
|
||||||
auto iter = cache.find(cache_key);
|
auto iter = cache.find(cache_key);
|
||||||
if (iter != cache.end()) {
|
if (iter != cache.end()) {
|
||||||
shader_future = iter->second;
|
setup.cached_shader = iter->second.get();
|
||||||
UpdateLRU(cache_key);
|
|
||||||
} else {
|
} else {
|
||||||
// Compile synchronously and store the result
|
|
||||||
auto shader = std::make_unique<JitShader>();
|
auto shader = std::make_unique<JitShader>();
|
||||||
shader->Compile(&setup.program_code, &setup.swizzle_data);
|
shader->Compile(&setup.program_code, &setup.swizzle_data);
|
||||||
auto ready_future = std::make_shared<std::promise<std::unique_ptr<JitShader>>>();
|
setup.cached_shader = shader.get();
|
||||||
ready_future->set_value(std::move(shader));
|
cache.emplace_hint(iter, cache_key, std::move(shader));
|
||||||
shader_future = ready_future->get_future().share();
|
|
||||||
cache[cache_key] = shader_future;
|
|
||||||
lru_list.push_front(cache_key);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// Wait for the shader to be ready (if compiling in background)
|
|
||||||
setup.cached_shader = shader_future.get().get();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::EvictLRU() {
|
|
||||||
if (lru_list.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const u64 key = lru_list.back();
|
|
||||||
lru_list.pop_back();
|
|
||||||
cache.erase(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitEngine::UpdateLRU(u64 key) {
|
|
||||||
auto it = std::find(lru_list.begin(), lru_list.end(), key);
|
|
||||||
if (it != lru_list.end()) {
|
|
||||||
lru_list.erase(it);
|
|
||||||
}
|
|
||||||
lru_list.push_front(key);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DECLARE(GPU_Shader);
|
MICROPROFILE_DECLARE(GPU_Shader);
|
||||||
|
|
||||||
void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const {
|
void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const {
|
||||||
// Null check: skip draw if shader is not ready
|
ASSERT(setup.cached_shader != nullptr);
|
||||||
if (!setup.cached_shader) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(GPU_Shader);
|
MICROPROFILE_SCOPE(GPU_Shader);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright Citra Emulator Project / Azahar Emulator Project
|
// Copyright 2016 Citra Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
|
@ -7,14 +7,8 @@
|
||||||
#include "common/arch.h"
|
#include "common/arch.h"
|
||||||
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
|
||||||
|
|
||||||
#include <functional>
|
|
||||||
#include <future>
|
|
||||||
#include <list>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
|
||||||
#include <thread>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <queue>
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
|
|
@ -31,25 +25,7 @@ public:
|
||||||
void Run(const ShaderSetup& setup, ShaderUnit& state) const override;
|
void Run(const ShaderSetup& setup, ShaderUnit& state) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t MAX_CACHE_SIZE = 1000; // Maximum number of shaders to cache
|
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
|
||||||
std::unordered_map<u64, std::shared_future<std::unique_ptr<JitShader>>> cache;
|
|
||||||
std::list<u64> lru_list; // Track LRU order of shaders
|
|
||||||
mutable std::mutex cache_mutex;
|
|
||||||
|
|
||||||
// Parallel compilation support
|
|
||||||
std::vector<std::thread> thread_pool;
|
|
||||||
std::queue<std::function<void()>> compile_queue;
|
|
||||||
std::mutex queue_mutex;
|
|
||||||
std::condition_variable queue_cv;
|
|
||||||
bool stop_threads = false;
|
|
||||||
std::unique_ptr<JitShader> stub_shader;
|
|
||||||
|
|
||||||
void EvictLRU();
|
|
||||||
void UpdateLRU(u64 key);
|
|
||||||
void ThreadWorker();
|
|
||||||
void EnqueueCompilation(u64 cache_key, ShaderSetup setup_copy);
|
|
||||||
void StartThreadPool(size_t num_threads);
|
|
||||||
void StopThreadPool();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
} // namespace Pica::Shader
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue