diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..79a4a609c8 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -189,13 +190,40 @@ inline void PushImageDescriptors(TextureCache& texture_cache, const VideoCommon::ImageViewId image_view_id{(views++)->id}; const VideoCommon::SamplerId sampler_id{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; + const bool wants_compare{sampler.IsCompareEnabled() || desc.is_depth}; + const bool image_mutable{image_view.HasMutableImageFormat()}; + const auto depth_view = + wants_compare && image_mutable ? image_view.AcquireDepthCompareView(desc.type) + : std::optional{}; + const bool use_depth_view = depth_view.has_value(); + const VkImageView vk_image_view{ + use_depth_view ? depth_view->view : image_view.Handle(desc.type)}; const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && !image_view.SupportsAnisotropy()}; - const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() - : sampler.Handle()}; + VkSampler vk_sampler{}; + if (use_fallback_sampler) { + if (wants_compare && !use_depth_view) { + vk_sampler = sampler.HandleWithoutCompareDefaultAnisotropy(); + } else { + vk_sampler = sampler.HandleWithDefaultAnisotropy(); + } + } else if (wants_compare && !use_depth_view) { + vk_sampler = sampler.HandleWithoutCompare(); + } else { + vk_sampler = sampler.Handle(); + } guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); + if (wants_compare) { + if (use_depth_view) { + LOG_DEBUG(Render_Vulkan, "Shadow binding uses view format {}", + static_cast(depth_view->format)); + } else { + LOG_DEBUG(Render_Vulkan, + "Shadow binding falling back to manual compare (mutable={} compare_supported={})", + image_mutable ? 1 : 0, sampler.IsCompareEnabled() ? 1 : 0); + } + } rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5b52e88727..d488345e0e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -861,21 +862,48 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched msaa_copy_pass = std::make_unique( device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); } - if (!device.IsKhrImageFormatListSupported()) { - return; - } + const auto add_unique = [](std::vector& formats, VkFormat format) { + if (format == VK_FORMAT_UNDEFINED) { + return; + } + if (std::ranges::find(formats, format) != formats.end()) { + return; + } + formats.push_back(format); + }; + const auto add_pixel_format = [&](std::vector& formats, PixelFormat image_format, + PixelFormat view_format) { + if (!VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) { + return; + } + const auto vk_format = + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, view_format).format; + add_unique(formats, vk_format); + }; for (size_t index_a = 0; index_a < VideoCore::Surface::MaxPixelFormat; index_a++) { const auto image_format = static_cast(index_a); if (IsPixelFormatASTC(image_format) && !device.IsOptimalAstcSupported()) { - view_formats[index_a].push_back(VK_FORMAT_A8B8G8R8_UNORM_PACK32); + add_unique(view_formats[index_a], VK_FORMAT_A8B8G8R8_UNORM_PACK32); } for (size_t index_b = 0; index_b < VideoCore::Surface::MaxPixelFormat; index_b++) { const auto view_format = static_cast(index_b); - if (VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) { - const auto view_info = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, view_format); - view_formats[index_a].push_back(view_info.format); + if (!VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) { + continue; } + const auto vk_format = + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, view_format).format; + add_unique(view_formats[index_a], vk_format); + } + const auto surface_type = VideoCore::Surface::GetFormatType(image_format); + if (surface_type == VideoCore::Surface::SurfaceType::Depth || + surface_type == VideoCore::Surface::SurfaceType::DepthStencil) { + add_pixel_format(view_formats[index_a], image_format, PixelFormat::D32_FLOAT); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::D32_FLOAT_S8_UINT); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::D24_UNORM_S8_UINT); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::S8_UINT_D24_UNORM); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::X8_D24_UNORM); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::D16_UNORM); + add_pixel_format(view_formats[index_a], image_format, PixelFormat::R32_FLOAT); } } } @@ -1511,6 +1539,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info, runtime->ViewFormats(info.format))), aspect_mask(ImageAspectMask(info.format)) { + has_mutable_format = runtime->ViewFormats(info.format).size() > 1; if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { switch (Settings::values.accelerate_astc.GetValue()) { case Settings::AstcDecodeMode::Gpu: @@ -2008,7 +2037,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr}, device{&runtime.device}, image_handle{image.Handle()}, - samples(ConvertSampleCount(image.info.num_samples)) { + samples(ConvertSampleCount(image.info.num_samples)), + image_format{image.info.format}, image_has_mutable_format{image.HasMutableFormat()} { using Shader::TextureType; const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); @@ -2102,7 +2132,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, - buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)}, image_format{info.format} {} ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, device{&runtime.device} { @@ -2123,6 +2153,105 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageV ImageView::~ImageView() = default; +std::optional ImageView::AcquireDepthCompareView( + Shader::TextureType texture_type) { + if (!image_handle || !image_has_mutable_format) { + return std::nullopt; + } + const auto surface_type = VideoCore::Surface::GetFormatType(image_format); + if (surface_type != VideoCore::Surface::SurfaceType::Depth && + surface_type != VideoCore::Surface::SurfaceType::DepthStencil) { + return std::nullopt; + } + const size_t index = static_cast(texture_type); + if (depth_compare_views[index]) { + return DepthSampledView{*depth_compare_views[index], depth_compare_formats[index]}; + } + + std::array candidate_formats{}; + size_t candidate_count = 0; + const auto enqueue_candidate = [&](PixelFormat candidate) { + if (candidate == PixelFormat::Invalid) { + return; + } + if (std::ranges::find(candidate_formats.begin(), + candidate_formats.begin() + candidate_count, + candidate) != candidate_formats.begin() + candidate_count) { + return; + } + candidate_formats[candidate_count++] = candidate; + }; + + enqueue_candidate(image_format); + switch (image_format) { + case PixelFormat::D32_FLOAT_S8_UINT: + enqueue_candidate(PixelFormat::D32_FLOAT); + break; + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + case PixelFormat::X8_D24_UNORM: + enqueue_candidate(PixelFormat::D24_UNORM_S8_UINT); + enqueue_candidate(PixelFormat::X8_D24_UNORM); + enqueue_candidate(PixelFormat::D32_FLOAT); + enqueue_candidate(PixelFormat::D32_FLOAT_S8_UINT); + break; + case PixelFormat::D16_UNORM: + enqueue_candidate(PixelFormat::D16_UNORM); + break; + default: + break; + } + enqueue_candidate(PixelFormat::D32_FLOAT); + + const auto try_candidate = [&](PixelFormat candidate) + -> std::optional { + if (!VideoCore::Surface::IsViewCompatible(image_format, candidate, false, true)) { + return std::nullopt; + } + const auto format_info = + MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, candidate); + if (!device->SupportsDepthCompare(format_info.format)) { + return std::nullopt; + } + auto subresource_range = MakeSubresourceRange(this); + subresource_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + const VkImageViewUsageCreateInfo image_view_usage{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .pNext = nullptr, + .usage = ImageUsageFlags(format_info, candidate), + }; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &image_view_usage, + .flags = 0, + .image = image_handle, + .viewType = ImageViewType(texture_type), + .format = format_info.format, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = subresource_range, + }; + vk::ImageView view = device->GetLogical().CreateImageView(ci); + if (device->HasDebuggingToolAttached()) { + view.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str()); + } + depth_compare_views[index] = std::move(view); + depth_compare_formats[index] = format_info.format; + return DepthSampledView{*depth_compare_views[index], depth_compare_formats[index]}; + }; + + for (size_t i = 0; i < candidate_count; ++i) { + if (auto sampled_view = try_candidate(candidate_formats[i])) { + return sampled_view; + } + } + return std::nullopt; +} + VkImageView ImageView::DepthView() { if (!image_handle) { return VK_NULL_HANDLE; @@ -2236,7 +2365,9 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - const auto create_sampler = [&](const f32 anisotropy) { + compare_enabled = tsc.depth_compare_enabled != 0; + + const auto create_sampler = [&](const f32 anisotropy, VkBool32 compare_enable) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, @@ -2250,7 +2381,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t .mipLodBias = tsc.LodBias(), .anisotropyEnable = static_cast(anisotropy > 1.0f ? VK_TRUE : VK_FALSE), .maxAnisotropy = anisotropy, - .compareEnable = tsc.depth_compare_enabled, + .compareEnable = compare_enable, .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), @@ -2260,11 +2391,19 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t }); }; - sampler = create_sampler(max_anisotropy); + sampler = create_sampler(max_anisotropy, compare_enabled ? VK_TRUE : VK_FALSE); + if (compare_enabled) { + sampler_no_compare = create_sampler(max_anisotropy, VK_FALSE); + } const f32 max_anisotropy_default = static_cast(1U << tsc.max_anisotropy); if (max_anisotropy > max_anisotropy_default) { - sampler_default_anisotropy = create_sampler(max_anisotropy_default); + sampler_default_anisotropy = + create_sampler(max_anisotropy_default, compare_enabled ? VK_TRUE : VK_FALSE); + if (compare_enabled) { + sampler_default_anisotropy_no_compare = + create_sampler(max_anisotropy_default, VK_FALSE); + } } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cd11cc8fc7..777311b69e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include #include "video_core/texture_cache/texture_cache_base.h" @@ -171,6 +173,10 @@ public: return (this->*current_image).UsageFlags(); } + [[nodiscard]] bool HasMutableFormat() const noexcept { + return has_mutable_format; + } + /// Returns true when the image is already initialized and mark it as initialized [[nodiscard]] bool ExchangeInitialization() noexcept { return std::exchange(initialized, true); @@ -202,6 +208,7 @@ private: std::vector storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; + bool has_mutable_format = false; std::unique_ptr scale_framebuffer; std::unique_ptr scale_view; @@ -236,12 +243,24 @@ public: [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); + struct DepthSampledView { + VkImageView view; + VkFormat format; + }; + + [[nodiscard]] std::optional AcquireDepthCompareView( + Shader::TextureType texture_type); + [[nodiscard]] bool IsRescaled() const noexcept; [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { return *image_views[static_cast(texture_type)]; } + [[nodiscard]] bool HasMutableImageFormat() const noexcept { + return image_has_mutable_format; + } + [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -275,6 +294,8 @@ private: std::array image_views; std::unique_ptr storage_views; + std::array depth_compare_views; + std::array depth_compare_formats{}; vk::ImageView depth_view; vk::ImageView stencil_view; vk::ImageView color_view; @@ -283,6 +304,8 @@ private: VkImageView render_target = VK_NULL_HANDLE; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; u32 buffer_size = 0; + PixelFormat image_format{}; + bool image_has_mutable_format = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; @@ -296,16 +319,35 @@ public: } [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { - return *sampler_default_anisotropy; + return sampler_default_anisotropy ? *sampler_default_anisotropy : *sampler; } [[nodiscard]] bool HasAddedAnisotropy() const noexcept { return static_cast(sampler_default_anisotropy); } + [[nodiscard]] bool IsCompareEnabled() const noexcept { + return compare_enabled; + } + + [[nodiscard]] VkSampler HandleWithoutCompare() const noexcept { + return sampler_no_compare ? *sampler_no_compare + : Handle(); + } + + [[nodiscard]] VkSampler HandleWithoutCompareDefaultAnisotropy() const noexcept { + if (sampler_default_anisotropy_no_compare) { + return *sampler_default_anisotropy_no_compare; + } + return HandleWithoutCompare(); + } + private: vk::Sampler sampler; vk::Sampler sampler_default_anisotropy; + vk::Sampler sampler_no_compare; + vk::Sampler sampler_default_anisotropy_no_compare; + bool compare_enabled = false; }; class Framebuffer { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 230348f21a..71c2a0db8e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -895,6 +895,45 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } +bool Device::SupportsDepthCompare(VkFormat format, FormatType format_type) const { + const auto it = format_properties.find(format); + if (it == format_properties.end()) { + UNIMPLEMENTED_MSG("Unimplemented depth compare format query={}", format); + return false; + } + +#ifdef VK_FORMAT_FEATURE_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT + const auto legacy_usage = GetFormatFeatures(it->second, format_type); + if ((legacy_usage & VK_FORMAT_FEATURE_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT) != 0) { + return true; + } +#endif + + VkFormatProperties3 props3{ + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, + .pNext = nullptr, + }; + VkFormatProperties2 props2{ + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + .pNext = &props3, + }; + physical.GetFormatProperties2(format, props2); + + const auto select_features = [&]() -> VkFormatFeatureFlags2 { + switch (format_type) { + case FormatType::Linear: + return props3.linearTilingFeatures; + case FormatType::Optimal: + return props3.optimalTilingFeatures; + case FormatType::Buffer: + return props3.bufferFeatures; + } + UNREACHABLE_MSG("Invalid format type={}", static_cast(format_type)); + }; + const VkFormatFeatureFlags2 features2 = select_features(); + return (features2 & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT) != 0; +} + std::string Device::GetDriverName() const { switch (properties.driver.driverID) { case VK_DRIVER_ID_AMD_PROPRIETARY: diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cb13f28523..f9009dd6ec 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -210,6 +210,9 @@ public: VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + [[nodiscard]] bool SupportsDepthCompare( + VkFormat format, FormatType format_type = FormatType::Optimal) const; + /// Returns true if a format is supported. bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index b77d01711a..4b3a4c0d8b 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -289,6 +289,7 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { X(vkDestroySurfaceKHR); X(vkGetPhysicalDeviceFeatures2); X(vkGetPhysicalDeviceProperties2); + X(vkGetPhysicalDeviceFormatProperties2); X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); X(vkGetPhysicalDeviceSurfaceFormatsKHR); X(vkGetPhysicalDeviceSurfacePresentModesKHR); @@ -898,6 +899,11 @@ VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const no return properties; } +void PhysicalDevice::GetFormatProperties2(VkFormat format, + VkFormatProperties2& properties) const noexcept { + dld->vkGetPhysicalDeviceFormatProperties2(physical_device, format, &properties); +} + std::vector PhysicalDevice::EnumerateDeviceExtensionProperties() const { u32 num; dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 39396b3279..dbd4dbddbe 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -165,6 +165,7 @@ struct InstanceDispatch { PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr{}; PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2{}; PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{}; + PFN_vkGetPhysicalDeviceFormatProperties2 vkGetPhysicalDeviceFormatProperties2{}; PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{}; PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{}; PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; @@ -1104,6 +1105,8 @@ public: VkFormatProperties GetFormatProperties(VkFormat) const noexcept; + void GetFormatProperties2(VkFormat, VkFormatProperties2&) const noexcept; + std::vector EnumerateDeviceExtensionProperties() const; std::vector GetQueueFamilyProperties() const;