Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions pcsx2/GS/Renderers/DX11/GSDevice11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2864,20 +2864,17 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
{
// Requires a copy of the RT.
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true);

if (!draw_rt_clone)
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
}

if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
config.ps.IsFeedbackLoopDepth())
m_features.depth_feedback == GSDevice::DepthFeedbackSupport::Depth && config.ps.IsFeedbackLoopDepth())
{
// Requires a copy of the DS.
// Used as "bind ds" flag when texture barrier is unsupported for tex is fb.
draw_ds_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true);
if (!draw_rt_clone)
if (!draw_ds_clone)
Console.Warning("D3D11: Failed to allocate temp texture for DS copy.");
}

Expand All @@ -2889,7 +2886,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);

SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.require_one_barrier, config.require_full_barrier, false);
config.require_one_barrier, config.require_full_barrier);

if (config.blend_multi_pass.enable)
{
Expand All @@ -2916,7 +2913,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)

SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
false, config.alpha_second_pass.require_full_barrier);
}

if (colclip_rt)
Expand All @@ -2938,7 +2935,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)

void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
const bool one_barrier, const bool full_barrier)
{
if (draw_rt_clone || draw_ds_clone)
{
Expand All @@ -2949,20 +2946,23 @@ void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,

auto CopyAndBind = [&](GSVector4i drawarea) {
if (draw_rt_clone)
{
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
if ((one_barrier || full_barrier))
PSSetShaderResource(2, draw_rt_clone);
if (config.tex && config.tex == draw_rt)
PSSetShaderResource(0, draw_rt_clone);
}
if (draw_ds_clone)
{
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
if ((one_barrier || full_barrier) && draw_rt_clone)
PSSetShaderResource(2, draw_rt_clone);
if ((one_barrier || full_barrier) && draw_ds_clone)
PSSetShaderResource(4, draw_ds_clone);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone);
}
};

const GSVector4i rtsize(0, 0, draw_rt->GetWidth(), draw_rt->GetHeight());
const GSVector4i rtsize(0, 0, (draw_rt ? draw_rt : draw_ds)->GetWidth(), (draw_rt ? draw_rt : draw_ds)->GetHeight());

if (m_features.multidraw_fb_copy && full_barrier)
if (full_barrier)
{
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const u32 indices_per_prim = config.indices_per_prim;
Expand All @@ -2985,8 +2985,7 @@ void GSDevice11::SendHWDraw(const GSHWDrawConfig& config,
}

// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
if (!skip_first_barrier)
CopyAndBind(ProcessCopyArea(rtsize, config.drawarea));
CopyAndBind(ProcessCopyArea(rtsize, config.drawarea));
}

DrawIndexedPrimitive();
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/Renderers/DX11/GSDevice11.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ class GSDevice11 final : public GSDevice
void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier, const bool skip_first_barrier);
const bool one_barrier, const bool full_barrier);

void ClearSamplerCache() override;

Expand Down
167 changes: 103 additions & 64 deletions pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,10 @@ bool GSDeviceOGL::CheckFeatures()
if (!GLAD_GL_ARB_texture_barrier)
{
glTextureBarrier = ReplaceGL::TextureBarrier;
// Switch to fallback.
m_features.multidraw_fb_copy = true;
Host::AddOSDMessage(
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
"GL_ARB_texture_barrier is not supported, blending will be slower.", Host::OSD_ERROR_DURATION);
}

if (!GLAD_GL_ARB_direct_state_access)
Expand Down Expand Up @@ -787,18 +789,20 @@ bool GSDeviceOGL::CheckFeatures()
}

if (GSConfig.OverrideTextureBarriers == 0)
{
m_features.texture_barrier = m_features.framebuffer_fetch; // Force Disabled
m_features.multidraw_fb_copy = false;
Host::AddOSDMessage(
"Texture Barrier is disabled, blending will not be accurate.", Host::OSD_ERROR_DURATION);
}
else if (GSConfig.OverrideTextureBarriers == 1)
{
m_features.texture_barrier = true; // Force Enabled
m_features.multidraw_fb_copy = false;
}
else
m_features.texture_barrier = m_features.framebuffer_fetch || GLAD_GL_ARB_texture_barrier;
if (!m_features.texture_barrier)
{
Host::AddOSDMessage(
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
}

m_features.multidraw_fb_copy = false;
m_features.provoking_vertex_last = true;
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
m_features.bptc_textures =
Expand All @@ -823,6 +827,18 @@ bool GSDeviceOGL::CheckFeatures()
m_features.depth_feedback = GSDevice::DepthFeedbackSupport::None;
}
}
else if (m_features.multidraw_fb_copy)
{
if (GSConfig.DepthFeedbackMode == GSDepthFeedbackMode::Depth ||
GSConfig.DepthFeedbackMode == GSDepthFeedbackMode::Auto)
{
m_features.depth_feedback = GSDevice::DepthFeedbackSupport::Depth;
}
else
{
m_features.depth_feedback = GSDevice::DepthFeedbackSupport::None;
}
}
else
{
m_features.depth_feedback = GSDevice::DepthFeedbackSupport::None;
Expand Down Expand Up @@ -2588,11 +2604,20 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (config.pal)
CommitClear(config.pal, true);

GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();

GSTexture* primid_texture = nullptr;
GSTexture* draw_rt_clone = nullptr;
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
GSTexture* draw_rt_clone = nullptr;
GSTexture* draw_ds_clone = nullptr;
GSTexture* primid_texture = nullptr;

ScopedGuard recycle_temp_textures([&]() {
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (draw_ds_clone)
Recycle(draw_ds_clone);
if (primid_texture)
Recycle(primid_texture);
});

if (colclip_rt)
{
Expand Down Expand Up @@ -2640,6 +2665,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}

// Destination Alpha Setup
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
switch (config.destination_alpha)
{
case GSHWDrawConfig::DestinationAlphaMode::Off:
Expand All @@ -2654,7 +2680,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}
break;
case GSHWDrawConfig::DestinationAlphaMode::StencilOne:
if (m_features.texture_barrier)
if (m_features.texture_barrier || multidraw_fb_copy)
{
// Cleared after RT bind.
break;
Expand Down Expand Up @@ -2695,10 +2721,10 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
PSSetShaderResource(1, config.pal);
if (m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier))
PSSetShaderResource(2, colclip_rt ? colclip_rt : config.rt);
if (m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier) && config.ps.IsFeedbackLoopDepth())
PSSetShaderResource(4, m_features.depth_feedback == GSDevice::DepthFeedbackSupport::DepthAsRT ? config.ds_as_rt :
m_features.depth_feedback == GSDevice::DepthFeedbackSupport::Depth ? config.ds : nullptr);

const bool depth_feedback = m_features.depth_feedback == GSDevice::DepthFeedbackSupport::Depth;
if (m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier) && config.ps.IsFeedbackLoopDepth() &&
(depth_feedback || m_features.depth_feedback == GSDevice::DepthFeedbackSupport::DepthAsRT))
PSSetShaderResource(4, depth_feedback ? config.ds : config.ds_as_rt);
SetupSampler(config.sampler);

if (m_vs_cb_cache.Update(config.cb_vs))
Expand Down Expand Up @@ -2833,38 +2859,37 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
glTextureBarrier();
}

if (draw_rt && (config.require_one_barrier || (config.tex && config.tex == config.rt)) && !m_features.texture_barrier)
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)) &&
!m_features.texture_barrier)
{
// Requires a copy of the RT.
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true);
if (draw_rt_clone)
{
GL_PUSH("GL: Copy RT to temp texture {%d,%d %dx%d}",
config.drawarea.left, config.drawarea.top,
config.drawarea.width(), config.drawarea.height());
const GSVector4i snapped_drawarea = ProcessCopyArea(GSVector4i(0, 0, rtsize.x, rtsize.y), config.drawarea);
CopyRect(draw_rt, draw_rt_clone, snapped_drawarea, snapped_drawarea.left, snapped_drawarea.top);
if (config.require_one_barrier)
PSSetShaderResource(2, draw_rt_clone);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone);
}
else
if (!draw_rt_clone)
Console.Warning("GL: Failed to allocate temp texture for RT copy.");
}

if (draw_ds && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy)) &&
!m_features.texture_barrier && depth_feedback && config.ps.IsFeedbackLoopDepth())
{
// Requires a copy of the DS.
draw_ds_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_ds->GetFormat(), true);
if (!draw_ds_clone)
Console.Warning("GL: Failed to allocate temp texture for DS copy.");
}

OMSetRenderTargets(draw_rt, draw_ds_as_rt, draw_ds, &config.scissor);
OMSetColorMaskState(config.colormask);
SetupOM(config.depth);

// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && (m_features.texture_barrier || multidraw_fb_copy))
{
constexpr GLint clear_color = 1;
glClearBufferiv(GL_STENCIL, 0, &clear_color);
}

SendHWDraw(config, config.require_one_barrier, config.require_full_barrier);
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
config.require_one_barrier, config.require_full_barrier);

if (config.blend_multi_pass.enable)
{
Expand Down Expand Up @@ -2911,14 +2936,10 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
OMSetBlendState();
}
SetupOM(config.alpha_second_pass.depth);
SendHWDraw(config, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier);
SendHWDraw(config, draw_rt_clone, draw_rt, draw_ds_clone, draw_ds,
m_features.texture_barrier ? config.alpha_second_pass.require_one_barrier : false, config.alpha_second_pass.require_full_barrier);
}

if (primid_texture)
Recycle(primid_texture);
if (draw_rt_clone)
Recycle(draw_rt_clone);

if (colclip_rt)
{
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
Expand All @@ -2936,47 +2957,57 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}
}

void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, bool full_barrier)
void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier)
{
if (!m_features.texture_barrier) [[unlikely]]
{
DrawIndexedPrimitive();
return;
}

#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
#endif

auto CopyAndBind = [&](GSVector4i drawarea) {
if (draw_rt_clone)
{
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
if ((one_barrier || full_barrier))
PSSetShaderResource(2, draw_rt_clone);
if (config.tex && config.tex == draw_rt)
PSSetShaderResource(0, draw_rt_clone);
}
if (draw_ds_clone)
{
CopyRect(draw_ds, draw_ds_clone, drawarea, drawarea.left, drawarea.top);
PSSetShaderResource(4, draw_ds_clone);
}
};

const GSVector4i rtsize(0, 0, (draw_rt ? draw_rt : draw_ds)->GetWidth(), (draw_rt ? draw_rt : draw_ds)->GetHeight());

if (full_barrier)
{
pxAssert(config.drawlist && !config.drawlist->empty());

GL_PUSH("Split the draw");
#if defined(_DEBUG)
// Check how draw call is split.
std::map<size_t, size_t> frequency;
for (const auto& it : *config.drawlist)
++frequency[it];

std::string message;
for (const auto& it : frequency)
message += " " + std::to_string(it.first) + "(" + std::to_string(it.second) + ")";

GL_PERF("Split single draw (%d primitives) into %zu draws: consecutive draws(frequency):%s",
config.nindices / config.indices_per_prim, config.drawlist->size(), message.c_str());
#endif

const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());

g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(draw_list_size));
if (m_features.texture_barrier)
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(draw_list_size));
else
pxAssert(config.drawlist_bbox && static_cast<u32>(config.drawlist_bbox->size()) == draw_list_size);

for (u32 n = 0, p = 0; n < draw_list_size; n++)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
glTextureBarrier();

if (m_features.texture_barrier)
glTextureBarrier();
else
{
const GSVector4i original_bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea);
CopyAndBind(ProcessCopyArea(rtsize, original_bbox));
}

DrawIndexedPrimitive(p, count);
p += count;
}
Expand All @@ -2986,8 +3017,16 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, boo

if (one_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
glTextureBarrier();
if (m_features.texture_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
glTextureBarrier();
}
else
{
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
CopyAndBind(ProcessCopyArea(rtsize, config.drawarea));
}
}

DrawIndexedPrimitive();
Expand Down
5 changes: 3 additions & 2 deletions pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,9 @@ class GSDeviceOGL final : public GSDevice
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);

void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, bool full_barrier);

void SendHWDraw(const GSHWDrawConfig& config,
GSTexture* draw_rt_clone, GSTexture* draw_rt, GSTexture* draw_ds_clone, GSTexture* draw_ds,
const bool one_barrier, const bool full_barrier);
void SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox);

void IASetVAO(GLuint vao);
Expand Down
Loading