mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2024-12-27 21:56:42 +00:00
Revamp Kepler Memory to use a subegine to manage uploads
This commit is contained in:
parent
b5889cbd6f
commit
a91d3fc639
|
@ -3,6 +3,8 @@ add_library(video_core STATIC
|
||||||
dma_pusher.h
|
dma_pusher.h
|
||||||
debug_utils/debug_utils.cpp
|
debug_utils/debug_utils.cpp
|
||||||
debug_utils/debug_utils.h
|
debug_utils/debug_utils.h
|
||||||
|
engines/engine_upload.cpp
|
||||||
|
engines/engine_upload.h
|
||||||
engines/fermi_2d.cpp
|
engines/fermi_2d.cpp
|
||||||
engines/fermi_2d.h
|
engines/fermi_2d.h
|
||||||
engines/kepler_compute.cpp
|
engines/kepler_compute.cpp
|
||||||
|
|
44
src/video_core/engines/engine_upload.cpp
Normal file
44
src/video_core/engines/engine_upload.cpp
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "video_core/engines/engine_upload.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
|
namespace Tegra::Engines::Upload {
|
||||||
|
|
||||||
|
void State::ProcessExec(const bool is_linear) {
|
||||||
|
write_offset = 0;
|
||||||
|
copy_size = regs.line_length_in * regs.line_count;
|
||||||
|
inner_buffer.resize(copy_size);
|
||||||
|
linear = is_linear;
|
||||||
|
}
|
||||||
|
|
||||||
|
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||||
|
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
|
||||||
|
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
|
||||||
|
write_offset += sub_copy_size;
|
||||||
|
if (is_last_call) {
|
||||||
|
const GPUVAddr address{regs.dest.Address()};
|
||||||
|
if (linear) {
|
||||||
|
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
|
||||||
|
} else {
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
||||||
|
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||||
|
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
||||||
|
std::vector<u8> tmp_buffer(dst_size);
|
||||||
|
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||||
|
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
|
||||||
|
regs.dest.y, regs.dest.BlockHeight(), copy_size,
|
||||||
|
inner_buffer.data(), tmp_buffer.data());
|
||||||
|
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Tegra::Engines::Upload
|
74
src/video_core/engines/engine_upload.h
Normal file
74
src/video_core/engines/engine_upload.h
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
|
#include "common/bit_field.h"
|
||||||
|
#include "common/common_funcs.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
class MemoryManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Tegra::Engines::Upload {
|
||||||
|
|
||||||
|
struct Data {
|
||||||
|
u32 line_length_in;
|
||||||
|
u32 line_count;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 address_high;
|
||||||
|
u32 address_low;
|
||||||
|
u32 pitch;
|
||||||
|
union {
|
||||||
|
BitField<0, 4, u32> block_width;
|
||||||
|
BitField<4, 4, u32> block_height;
|
||||||
|
BitField<8, 4, u32> block_depth;
|
||||||
|
};
|
||||||
|
u32 width;
|
||||||
|
u32 height;
|
||||||
|
u32 depth;
|
||||||
|
u32 z;
|
||||||
|
u32 x;
|
||||||
|
u32 y;
|
||||||
|
|
||||||
|
GPUVAddr Address() const {
|
||||||
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 BlockWidth() const {
|
||||||
|
return 1U << block_width.Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 BlockHeight() const {
|
||||||
|
return 1U << block_height.Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 BlockDepth() const {
|
||||||
|
return 1U << block_depth.Value();
|
||||||
|
}
|
||||||
|
} dest;
|
||||||
|
};
|
||||||
|
|
||||||
|
class State {
|
||||||
|
public:
|
||||||
|
State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {}
|
||||||
|
~State() = default;
|
||||||
|
|
||||||
|
void ProcessExec(const bool is_linear);
|
||||||
|
void ProcessData(const u32 data, const bool is_last_call);
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 write_offset = 0;
|
||||||
|
u32 copy_size = 0;
|
||||||
|
std::vector<u8> inner_buffer;
|
||||||
|
bool linear;
|
||||||
|
Data& regs;
|
||||||
|
MemoryManager& memory_manager;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Tegra::Engines::Upload
|
|
@ -14,9 +14,8 @@
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
|
||||||
MemoryManager& memory_manager)
|
: system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
|
||||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
|
|
||||||
|
|
||||||
KeplerMemory::~KeplerMemory() = default;
|
KeplerMemory::~KeplerMemory() = default;
|
||||||
|
|
||||||
|
@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
||||||
|
|
||||||
switch (method_call.method) {
|
switch (method_call.method) {
|
||||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||||
ProcessExec();
|
upload_state.ProcessExec(regs.exec.linear != 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case KEPLERMEMORY_REG_INDEX(data): {
|
case KEPLERMEMORY_REG_INDEX(data): {
|
||||||
ProcessData(method_call.argument, method_call.IsLastCall());
|
bool is_last_call = method_call.IsLastCall();
|
||||||
|
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||||
|
if (is_last_call) {
|
||||||
|
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void KeplerMemory::ProcessExec() {
|
|
||||||
state.write_offset = 0;
|
|
||||||
state.copy_size = regs.line_length_in * regs.line_count;
|
|
||||||
state.inner_buffer.resize(state.copy_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
|
||||||
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
|
||||||
std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size);
|
|
||||||
state.write_offset += sub_copy_size;
|
|
||||||
if (is_last_call) {
|
|
||||||
const GPUVAddr address{regs.dest.Address()};
|
|
||||||
if (regs.exec.linear != 0) {
|
|
||||||
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
|
|
||||||
} else {
|
|
||||||
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
|
||||||
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
|
||||||
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
|
||||||
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
|
||||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
|
||||||
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
|
||||||
std::vector<u8> tmp_buffer(dst_size);
|
|
||||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
|
||||||
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
|
|
||||||
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
|
|
||||||
state.inner_buffer.data(), tmp_buffer.data());
|
|
||||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
|
||||||
}
|
|
||||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/engines/engine_upload.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
@ -20,10 +21,6 @@ namespace Tegra {
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace VideoCore {
|
|
||||||
class RasterizerInterface;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
||||||
|
@ -31,8 +28,7 @@ namespace Tegra::Engines {
|
||||||
|
|
||||||
class KeplerMemory final {
|
class KeplerMemory final {
|
||||||
public:
|
public:
|
||||||
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
KeplerMemory(Core::System& system, MemoryManager& memory_manager);
|
||||||
MemoryManager& memory_manager);
|
|
||||||
~KeplerMemory();
|
~KeplerMemory();
|
||||||
|
|
||||||
/// Write the value to the register identified by method.
|
/// Write the value to the register identified by method.
|
||||||
|
@ -45,42 +41,7 @@ public:
|
||||||
struct {
|
struct {
|
||||||
INSERT_PADDING_WORDS(0x60);
|
INSERT_PADDING_WORDS(0x60);
|
||||||
|
|
||||||
u32 line_length_in;
|
Upload::Data upload;
|
||||||
u32 line_count;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
u32 address_high;
|
|
||||||
u32 address_low;
|
|
||||||
u32 pitch;
|
|
||||||
union {
|
|
||||||
BitField<0, 4, u32> block_width;
|
|
||||||
BitField<4, 4, u32> block_height;
|
|
||||||
BitField<8, 4, u32> block_depth;
|
|
||||||
};
|
|
||||||
u32 width;
|
|
||||||
u32 height;
|
|
||||||
u32 depth;
|
|
||||||
u32 z;
|
|
||||||
u32 x;
|
|
||||||
u32 y;
|
|
||||||
|
|
||||||
GPUVAddr Address() const {
|
|
||||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
|
||||||
address_low);
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 BlockWidth() const {
|
|
||||||
return 1U << block_width.Value();
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 BlockHeight() const {
|
|
||||||
return 1U << block_height.Value();
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 BlockDepth() const {
|
|
||||||
return 1U << block_depth.Value();
|
|
||||||
}
|
|
||||||
} dest;
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
union {
|
union {
|
||||||
|
@ -96,28 +57,17 @@ public:
|
||||||
};
|
};
|
||||||
} regs{};
|
} regs{};
|
||||||
|
|
||||||
struct {
|
|
||||||
u32 write_offset = 0;
|
|
||||||
u32 copy_size = 0;
|
|
||||||
std::vector<u8> inner_buffer;
|
|
||||||
} state{};
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
|
||||||
MemoryManager& memory_manager;
|
MemoryManager& memory_manager;
|
||||||
|
Upload::State upload_state;
|
||||||
void ProcessExec();
|
|
||||||
void ProcessData(u32 data, bool is_last_call);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
||||||
"Field " #field_name " has invalid position")
|
"Field " #field_name " has invalid position")
|
||||||
|
|
||||||
ASSERT_REG_POSITION(line_length_in, 0x60);
|
ASSERT_REG_POSITION(upload, 0x60);
|
||||||
ASSERT_REG_POSITION(line_count, 0x61);
|
|
||||||
ASSERT_REG_POSITION(dest, 0x62);
|
|
||||||
ASSERT_REG_POSITION(exec, 0x6C);
|
ASSERT_REG_POSITION(exec, 0x6C);
|
||||||
ASSERT_REG_POSITION(data, 0x6D);
|
ASSERT_REG_POSITION(data, 0x6D);
|
||||||
#undef ASSERT_REG_POSITION
|
#undef ASSERT_REG_POSITION
|
||||||
|
|
|
@ -37,7 +37,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
|
||||||
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
||||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
|
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
|
||||||
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
|
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
|
||||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
|
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU::~GPU() = default;
|
GPU::~GPU() = default;
|
||||||
|
|
Loading…
Reference in a new issue