mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2024-11-24 08:02:46 +00:00
Texture Cache: Fix downscaling and correct memory comsumption.
This commit is contained in:
parent
b60966041c
commit
425ab9ef4b
|
@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
|
|||
}
|
||||
}
|
||||
|
||||
bool Image::Scale() {
|
||||
bool Image::Scale(bool up_scale) {
|
||||
const auto format_type = GetFormatType(info.format);
|
||||
const GLenum attachment = [format_type] {
|
||||
switch (format_type) {
|
||||
|
@ -944,14 +944,25 @@ bool Image::Scale() {
|
|||
const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
|
||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||
const u32 src_level_width = std::max(1u, original_width >> level);
|
||||
const u32 src_level_height = std::max(1u, original_height >> level);
|
||||
const u32 dst_level_width = std::max(1u, scaled_width >> level);
|
||||
const u32 dst_level_height = std::max(1u, scaled_height >> level);
|
||||
const u32 src_level_width =
|
||||
std::max(1u, (up_scale ? original_width : scaled_width) >> level);
|
||||
const u32 src_level_height =
|
||||
std::max(1u, (up_scale ? original_height : scaled_height) >> level);
|
||||
const u32 dst_level_width =
|
||||
std::max(1u, (up_scale ? scaled_width : original_width) >> level);
|
||||
const u32 dst_level_height =
|
||||
std::max(1u, (up_scale ? scaled_height : original_height) >> level);
|
||||
|
||||
if (up_scale) {
|
||||
glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
|
||||
glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
|
||||
layer);
|
||||
} else {
|
||||
glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level,
|
||||
layer);
|
||||
glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer);
|
||||
}
|
||||
|
||||
glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
|
||||
glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
|
||||
layer);
|
||||
glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
|
||||
0, dst_level_width, dst_level_height, mask, filter);
|
||||
}
|
||||
|
@ -959,7 +970,12 @@ bool Image::Scale() {
|
|||
if (scissor_test != GL_FALSE) {
|
||||
glEnablei(GL_SCISSOR_TEST, 0);
|
||||
}
|
||||
current_texture = upscaled_backup.handle;
|
||||
if (up_scale) {
|
||||
current_texture = upscaled_backup.handle;
|
||||
} else {
|
||||
current_texture = texture.handle;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -981,6 +997,7 @@ bool Image::ScaleUp() {
|
|||
flags &= ~ImageFlagBits::Rescaled;
|
||||
return false;
|
||||
}
|
||||
scale_count++;
|
||||
if (!Scale()) {
|
||||
flags &= ~ImageFlagBits::Rescaled;
|
||||
return false;
|
||||
|
@ -996,7 +1013,11 @@ bool Image::ScaleDown() {
|
|||
if (!runtime->resolution.active) {
|
||||
return false;
|
||||
}
|
||||
current_texture = texture.handle;
|
||||
scale_count++;
|
||||
if (!Scale(false)) {
|
||||
flags &= ~ImageFlagBits::Rescaled;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -205,7 +205,7 @@ private:
|
|||
|
||||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
bool Scale();
|
||||
bool Scale(bool up_scale = true);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTexture upscaled_backup;
|
||||
|
|
|
@ -592,7 +592,8 @@ struct RangedBarrierRange {
|
|||
}
|
||||
|
||||
void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
|
||||
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) {
|
||||
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
|
||||
bool up_scaling = true) {
|
||||
const bool is_2d = info.type == ImageType::e2D;
|
||||
const auto resources = info.resources;
|
||||
const VkExtent2D extent{
|
||||
|
@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con
|
|||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
|
||||
vk_filter](vk::CommandBuffer cmdbuf) {
|
||||
vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
|
||||
const VkOffset2D src_size{
|
||||
.x = static_cast<s32>(extent.width),
|
||||
.y = static_cast<s32>(extent.height),
|
||||
.x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
|
||||
.y = static_cast<s32>(is_2d && up_scaling ? extent.height
|
||||
: resolution.ScaleUp(extent.height)),
|
||||
};
|
||||
const VkOffset2D dst_size{
|
||||
.x = static_cast<s32>(resolution.ScaleUp(extent.width)),
|
||||
.y = static_cast<s32>(is_2d ? resolution.ScaleUp(extent.height) : extent.height),
|
||||
.x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
|
||||
.y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
|
||||
: extent.height),
|
||||
};
|
||||
boost::container::small_vector<VkImageBlit, 4> regions;
|
||||
regions.reserve(resources.levels);
|
||||
|
@ -1134,6 +1137,7 @@ bool Image::ScaleUp() {
|
|||
if (!resolution.active) {
|
||||
return false;
|
||||
}
|
||||
scale_count++;
|
||||
const auto& device = runtime->device;
|
||||
const bool is_2d = info.type == ImageType::e2D;
|
||||
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||
|
@ -1161,8 +1165,10 @@ bool Image::ScaleUp() {
|
|||
using namespace VideoCommon;
|
||||
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
|
||||
|
||||
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
|
||||
scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
|
||||
if (!scale_view) {
|
||||
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
|
||||
scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
|
||||
}
|
||||
auto* view_ptr = scale_view.get();
|
||||
|
||||
const Region2D src_region{
|
||||
|
@ -1178,7 +1184,10 @@ bool Image::ScaleUp() {
|
|||
.height = scaled_height,
|
||||
};
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||
if (!scale_framebuffer) {
|
||||
scale_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||
}
|
||||
const auto color_view = scale_view->Handle(Shader::TextureType::Color2D);
|
||||
|
||||
runtime->blit_image_helper.BlitColor(
|
||||
|
@ -1186,7 +1195,10 @@ bool Image::ScaleUp() {
|
|||
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
|
||||
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
|
||||
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
|
||||
if (!scale_framebuffer) {
|
||||
scale_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||
}
|
||||
runtime->blit_image_helper.BlitDepthStencil(
|
||||
scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(),
|
||||
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
|
||||
|
@ -1209,6 +1221,67 @@ bool Image::ScaleDown() {
|
|||
if (!resolution.active) {
|
||||
return false;
|
||||
}
|
||||
const auto& device = runtime->device;
|
||||
const bool is_2d = info.type == ImageType::e2D;
|
||||
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
|
||||
if (aspect_mask == 0) {
|
||||
aspect_mask = ImageAspectMask(info.format);
|
||||
}
|
||||
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
|
||||
const PixelFormat format = StorageFormat(info.format);
|
||||
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
|
||||
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
|
||||
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
|
||||
} else {
|
||||
using namespace VideoCommon;
|
||||
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
|
||||
|
||||
if (!normal_view) {
|
||||
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
|
||||
normal_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
|
||||
}
|
||||
auto* view_ptr = normal_view.get();
|
||||
|
||||
const Region2D src_region{
|
||||
.start = {0, 0},
|
||||
.end = {static_cast<s32>(scaled_width), static_cast<s32>(scaled_height)},
|
||||
};
|
||||
const Region2D dst_region{
|
||||
.start = {0, 0},
|
||||
.end = {static_cast<s32>(info.size.width), static_cast<s32>(info.size.height)},
|
||||
};
|
||||
const VkExtent2D extent{
|
||||
.width = scaled_width,
|
||||
.height = scaled_height,
|
||||
};
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
if (!normal_framebuffer) {
|
||||
normal_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||
}
|
||||
const auto color_view = normal_view->Handle(Shader::TextureType::Color2D);
|
||||
|
||||
runtime->blit_image_helper.BlitColor(
|
||||
normal_framebuffer.get(), color_view, dst_region, src_region,
|
||||
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
|
||||
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
|
||||
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
if (!normal_framebuffer) {
|
||||
normal_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
|
||||
}
|
||||
runtime->blit_image_helper.BlitDepthStencil(
|
||||
normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(),
|
||||
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
|
||||
} else {
|
||||
// TODO: Use helper blits where applicable
|
||||
flags &= ~ImageFlagBits::Rescaled;
|
||||
LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
ASSERT(info.type != ImageType::Linear);
|
||||
current_image = *original_image;
|
||||
return true;
|
||||
|
|
|
@ -148,6 +148,9 @@ private:
|
|||
|
||||
std::unique_ptr<Framebuffer> scale_framebuffer;
|
||||
std::unique_ptr<ImageView> scale_view;
|
||||
|
||||
std::unique_ptr<Framebuffer> normal_framebuffer;
|
||||
std::unique_ptr<ImageView> normal_view;
|
||||
};
|
||||
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
|
|
|
@ -60,8 +60,8 @@ namespace {
|
|||
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
|
||||
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
|
||||
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
|
||||
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{},
|
||||
scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
|
||||
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
|
||||
scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
|
||||
cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
|
||||
if (info.type == ImageType::e3D) {
|
||||
slice_offsets = CalculateSliceOffsets(info);
|
||||
|
|
|
@ -77,6 +77,10 @@ struct ImageBase {
|
|||
void CheckBadOverlapState();
|
||||
void CheckAliasState();
|
||||
|
||||
bool HasScaled() {
|
||||
return scale_count > 0;
|
||||
}
|
||||
|
||||
ImageInfo info;
|
||||
|
||||
u32 guest_size_bytes = 0;
|
||||
|
@ -84,6 +88,7 @@ struct ImageBase {
|
|||
u32 converted_size_bytes = 0;
|
||||
u32 scale_rating = 0;
|
||||
u64 scale_tick = 0;
|
||||
u32 scale_count = 0;
|
||||
ImageFlagBits flags = ImageFlagBits::CpuModified;
|
||||
|
||||
GPUVAddr gpu_addr = 0;
|
||||
|
|
|
@ -854,8 +854,8 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
|||
}
|
||||
|
||||
template <class P>
|
||||
u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
|
||||
const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f;
|
||||
u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
|
||||
const f32 add_to_size = Settings::values.resolution_info.up_factor;
|
||||
const bool sign = std::signbit(add_to_size);
|
||||
const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
const u64 tentative_size = image_size_bytes * static_cast<u64>(std::abs(add_to_size));
|
||||
|
@ -865,11 +865,14 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
|
|||
|
||||
template <class P>
|
||||
bool TextureCache<P>::ScaleUp(Image& image) {
|
||||
const bool has_copy = image.HasScaled();
|
||||
const bool rescaled = image.ScaleUp();
|
||||
if (!rescaled) {
|
||||
return false;
|
||||
}
|
||||
total_used_memory += GetScaledImageSizeBytes(image);
|
||||
if (!has_copy) {
|
||||
total_used_memory += GetScaledImageSizeBytes(image);
|
||||
}
|
||||
InvalidateScale(image);
|
||||
return true;
|
||||
}
|
||||
|
@ -880,7 +883,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
|
|||
if (!rescaled) {
|
||||
return false;
|
||||
}
|
||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||
const bool has_copy = image.HasScaled();
|
||||
if (!has_copy) {
|
||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||
}
|
||||
InvalidateScale(image);
|
||||
return true;
|
||||
}
|
||||
|
@ -1391,13 +1397,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
|||
"Trying to unregister an already registered image");
|
||||
image.flags &= ~ImageFlagBits::Registered;
|
||||
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
if ((IsPixelFormatASTC(image.info.format) &&
|
||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||
True(image.flags & ImageFlagBits::Converted)) {
|
||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||
}
|
||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||
lru_cache.Free(image.lru_index);
|
||||
const auto& clear_page_table =
|
||||
[this, image_id](
|
||||
|
@ -1478,6 +1477,16 @@ template <class P>
|
|||
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
|
||||
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
||||
image.flags |= ImageFlagBits::Tracked;
|
||||
if (image.HasScaled()) {
|
||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||
}
|
||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
if ((IsPixelFormatASTC(image.info.format) &&
|
||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||
True(image.flags & ImageFlagBits::Converted)) {
|
||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||
}
|
||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
||||
return;
|
||||
|
|
|
@ -331,7 +331,7 @@ private:
|
|||
void InvalidateScale(Image& image);
|
||||
bool ScaleUp(Image& image);
|
||||
bool ScaleDown(Image& image);
|
||||
u64 GetScaledImageSizeBytes(Image& image);
|
||||
u64 GetScaledImageSizeBytes(ImageBase& image);
|
||||
|
||||
Runtime& runtime;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
|
Loading…
Reference in a new issue