Skip to content

Commit a5a63f9

Browse files
committed
GS/DX: Add SW AF
1 parent ac69bb2 commit a5a63f9

File tree

11 files changed

+283
-54
lines changed

11 files changed

+283
-54
lines changed

bin/resources/shaders/dx11/tfx.fx

Lines changed: 146 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,148 @@ cbuffer cb1
219219
float RcpScaleFactor;
220220
};
221221

222+
#if (PS_AUTOMATIC_LOD != 1) && (PS_MANUAL_LOD == 1)
223+
float manual_lod(float uv_w)
224+
{
225+
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
226+
float K = LODParams.x;
227+
float L = LODParams.y;
228+
float bias = LODParams.z;
229+
float max_lod = LODParams.w;
230+
231+
float gs_lod = K - log2(abs(uv_w)) * L;
232+
// FIXME max useful ?
233+
//return max(min(gs_lod, max_lod) - bias, 0.0f);
234+
return min(gs_lod, max_lod) - bias;
235+
}
236+
#endif
237+
238+
#if PS_ANISOTROPIC_FILTERING > 1
239+
float4 sample_c_af(float2 uv, float uv_w)
240+
{
241+
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
242+
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
243+
float2 sz;
244+
Texture.GetDimensions(sz.x, sz.y);
245+
float2 dX = ddx(uv) * sz;
246+
float2 dY = ddy(uv) * sz;
247+
248+
// Calculate Ellipse Transform
249+
bool d_zero = length(dX) == 0 || length(dY) == 0;
250+
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
251+
bool d_per = dot(dX, dY) == 0;
252+
bool d_inf_nan = any(isinf(dX) | isinf(dY) | isnan(dX) | isnan(dY));
253+
// TODO: check if we might cause inf/nan
254+
if (!(d_zero || d_par || d_per || d_inf_nan))
255+
{
256+
float A = dX.y * dX.y + dY.y * dY.y;
257+
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
258+
float C = dX.x * dX.x + dY.x * dY.x;
259+
float f = (dX.x * dY.y - dY.x * dX.y);
260+
float F = f * f;
261+
262+
float p = A - C;
263+
float q = A + C;
264+
float t = sqrt(p * p + B * B);
265+
266+
float2 new_dX = float2(
267+
sqrt(F * (t + p) / (t * (q + t))),
268+
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
269+
);
270+
271+
float2 new_dY = float2(
272+
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
273+
sqrt(F * (t + p) / (t * (q - t)))
274+
);
275+
276+
d_inf_nan = any(isinf(new_dX) | isinf(new_dY) | isnan(new_dX) | isnan(new_dY));
277+
if (!d_inf_nan)
278+
{
279+
dX = new_dX;
280+
dY = new_dY;
281+
}
282+
}
283+
284+
// Compute AF values
285+
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
286+
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
287+
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
288+
bool is_major_x = squared_length_x > squared_length_y;
289+
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
290+
float length_major = sqrt(squared_length_major);
291+
292+
float aniso_ratio;
293+
float length_lod;
294+
float2 aniso_line;
295+
if (length_major <= 1.0f)
296+
{
297+
// A zero length_major would result in NaN Lod and break sampling.
298+
// A small length_major would result in aniso_ratio getting clamped to 1.
299+
// Perform isotropic filtering instead.
300+
aniso_ratio = 1.0f;
301+
length_lod = length_major;
302+
aniso_line = float2(0, 0);
303+
}
304+
else
305+
{
306+
float norm_major = 1.0f / length_major;
307+
308+
float2 aniso_line_dir = float2(
309+
(is_major_x ? dX.x : dY.x) * norm_major,
310+
(is_major_x ? dX.y : dY.y) * norm_major
311+
);
312+
313+
aniso_ratio = squared_length_major / determinant;
314+
315+
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
316+
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
317+
{
318+
// ratio is clamped - Lod is based on ratio (preserves area)
319+
aniso_ratio = PS_ANISOTROPIC_FILTERING;
320+
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
321+
}
322+
else
323+
{
324+
// ratio not clamped - Lod is based on area
325+
length_lod = determinant / length_major;
326+
}
327+
328+
// clamp to top Lod
329+
if (length_lod < 1.0f)
330+
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
331+
332+
aniso_ratio = round(aniso_ratio);
333+
aniso_line = aniso_line_dir * 0.5 * length_major * (1.0f / sz);
334+
}
335+
336+
#if PS_AUTOMATIC_LOD == 1
337+
float lod = log2(length_lod);
338+
#elif PS_MANUAL_LOD == 1
339+
float lod = manual_lod(uv_w);
340+
#else
341+
float lod = 0; // No Lod
342+
#endif
343+
344+
float4 colour;
345+
if (aniso_ratio == 1.0f)
346+
colour = Texture.SampleLevel(TextureSampler, uv, lod);
347+
else
348+
{
349+
float4 num = float4(0, 0, 0, 0);
350+
for (int i = 0; i < aniso_ratio; i++)
351+
{
352+
float2 d = -aniso_line + (0.5 + i) * (2.0 * aniso_line) / aniso_ratio;
353+
float2 uv_sample = uv + d;
354+
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
355+
num += sample_colour;
356+
}
357+
358+
colour = num / aniso_ratio;
359+
}
360+
return colour;
361+
}
362+
#endif
363+
222364
float4 sample_c(float2 uv, float uv_w, int2 xy)
223365
{
224366
#if PS_TEX_IS_FB == 1
@@ -251,21 +393,12 @@ float4 sample_c(float2 uv, float uv_w, int2 xy)
251393
#endif
252394
#endif
253395

254-
#if PS_AUTOMATIC_LOD == 1
396+
#if PS_ANISOTROPIC_FILTERING > 1
397+
return sample_c_af(uv, uv_w);
398+
#elif PS_AUTOMATIC_LOD == 1
255399
return Texture.Sample(TextureSampler, uv);
256400
#elif PS_MANUAL_LOD == 1
257-
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
258-
float K = LODParams.x;
259-
float L = LODParams.y;
260-
float bias = LODParams.z;
261-
float max_lod = LODParams.w;
262-
263-
float gs_lod = K - log2(abs(uv_w)) * L;
264-
// FIXME max useful ?
265-
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
266-
float lod = min(gs_lod, max_lod) - bias;
267-
268-
return Texture.SampleLevel(TextureSampler, uv, lod);
401+
return Texture.SampleLevel(TextureSampler, uv, manual_lod(uv_w));
269402
#else
270403
return Texture.SampleLevel(TextureSampler, uv, 0); // No lod
271404
#endif

pcsx2-qt/Settings/GraphicsHardwareRenderingSettingsTab.ui

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@
6565
</widget>
6666
</item>
6767
<item row="1" column="0">
68+
<widget class="QCheckBox" name="swAnsio">
69+
<property name="text">
70+
<string>Shader Ansiotropic Filtering</string>
71+
</property>
72+
</widget>
73+
</item>
74+
<item row="1" column="1">
6875
<widget class="QCheckBox" name="enableHWFixes">
6976
<property name="text">
7077
<string>Manual Hardware Renderer Fixes</string>

pcsx2-qt/Settings/GraphicsSettingsWidget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* settings_dialog,
120120
connect(m_hw.trilinearFiltering, &QComboBox::currentIndexChanged, this,
121121
&GraphicsSettingsWidget::onTrilinearFilteringChanged);
122122
onTrilinearFilteringChanged();
123+
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_hw.swAnsio, "EmuCore/GS", "SWAnisotropy", true);
123124

124125
//////////////////////////////////////////////////////////////////////////
125126
// SW Settings

pcsx2/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,7 @@ struct Pcsx2Config
864864
GSCASMode CASMode = DEFAULT_CAS_MODE;
865865
u8 Dithering = 2;
866866
u8 MaxAnisotropy = 0;
867+
bool SWAnisotropy = true;
867868
u8 TVShader = 0;
868869
s16 GetSkipCountFunctionId = -1;
869870
s16 BeforeDrawFunctionId = -1;

pcsx2/GS/Renderers/DX11/D3D.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,8 +496,8 @@ wil::com_ptr_nothrow<ID3DBlob> D3D::CompileShader(D3D::ShaderType type, D3D_FEAT
496496
break;
497497
}
498498

499-
static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3;
500-
static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG | D3DCOMPILE_DEBUG_NAME_FOR_SOURCE;
499+
static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_IEEE_STRICTNESS;
500+
static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG | D3DCOMPILE_DEBUG_NAME_FOR_SOURCE | D3DCOMPILE_IEEE_STRICTNESS;
501501

502502
wil::com_ptr_nothrow<ID3DBlob> blob;
503503
wil::com_ptr_nothrow<ID3DBlob> error_blob;

pcsx2/GS/Renderers/DX11/GSDevice11.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,9 +1765,11 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
17651765
IASetInputLayout(i->second.il.get());
17661766
}
17671767

1768-
void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
1768+
void GSDevice11::SetupPS(const PixelShaderSelector& ps_sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
17691769
{
1770-
auto i = std::as_const(m_ps).find(sel);
1770+
const GSHWDrawConfig::PSSelector& sel = ps_sel.ps;
1771+
1772+
auto i = std::as_const(m_ps).find(ps_sel);
17711773

17721774
if (i == m_ps.end())
17731775
{
@@ -1837,8 +1839,10 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
18371839
sm.AddMacro("PS_COLOR_FEEDBACK", sel.color_feedback);
18381840
sm.AddMacro("PS_DEPTH_FEEDBACK", sel.depth_feedback);
18391841

1842+
sm.AddMacro("PS_ANISOTROPIC_FILTERING", ps_sel.sw_ansio ? ps_sel.sw_ansio_level : 0);
1843+
18401844
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
1841-
i = m_ps.try_emplace(sel, std::move(ps)).first;
1845+
i = m_ps.try_emplace(ps_sel, std::move(ps)).first;
18421846
}
18431847

18441848
if (cb && m_ps_cb_cache.Update(*cb))
@@ -1855,6 +1859,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
18551859
pxAssert(ssel.biln == 0);
18561860
}
18571861

1862+
if (ps_sel.sw_ansio)
1863+
ssel.aniso = false;
1864+
18581865
auto i = std::as_const(m_ps_ss).find(ssel.key);
18591866

18601867
if (i != m_ps_ss.end())
@@ -2825,8 +2832,13 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
28252832
if (config.pal)
28262833
PSSetShaderResource(1, config.pal);
28272834

2835+
PixelShaderSelector pss;
2836+
pss.ps = config.ps;
2837+
pss.sw_ansio = (GSConfig.SWAnisotropy & config.sampler.aniso);
2838+
pss.sw_ansio_level = pss.sw_ansio ? GSConfig.MaxAnisotropy : 0;
2839+
28282840
SetupVS(config.vs, &config.cb_vs);
2829-
SetupPS(config.ps, &config.cb_ps, config.sampler);
2841+
SetupPS(pss, &config.cb_ps, config.sampler);
28302842

28312843
if (primid_texture)
28322844
{
@@ -2840,7 +2852,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
28402852

28412853
config.ps.date = 3;
28422854
config.alpha_second_pass.ps.date = 3;
2843-
SetupPS(config.ps, nullptr, config.sampler);
2855+
SetupPS(pss, nullptr, config.sampler);
28442856
PSSetShaderResource(3, primid_texture);
28452857
}
28462858

@@ -2896,22 +2908,23 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
28962908
config.ps.no_color1 = config.blend_multi_pass.no_color1;
28972909
config.ps.blend_hw = config.blend_multi_pass.blend_hw;
28982910
config.ps.dither = config.blend_multi_pass.dither;
2899-
SetupPS(config.ps, &config.cb_ps, config.sampler);
2911+
SetupPS(pss, &config.cb_ps, config.sampler);
29002912
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend_multi_pass.blend), config.blend_multi_pass.blend.constant);
29012913
DrawIndexedPrimitive();
29022914
}
29032915

29042916
if (config.alpha_second_pass.enable)
29052917
{
2918+
pss.ps = config.alpha_second_pass.ps;
29062919
if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref)
29072920
{
29082921
config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref;
2909-
SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler);
2922+
SetupPS(pss, &config.cb_ps, config.sampler);
29102923
}
29112924
else
29122925
{
29132926
// ps cbuffer hasn't changed, so don't bother checking
2914-
SetupPS(config.alpha_second_pass.ps, nullptr, config.sampler);
2927+
SetupPS(pss, nullptr, config.sampler);
29152928
}
29162929

29172930
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);

pcsx2/GS/Renderers/DX11/GSDevice11.h

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ class GSDevice11 final : public GSDevice
2525
{
2626
public:
2727
using VSSelector = GSHWDrawConfig::VSSelector;
28-
using PSSelector = GSHWDrawConfig::PSSelector;
2928
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
3029
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
3130

@@ -49,6 +48,38 @@ class GSDevice11 final : public GSDevice
4948
};
5049
static_assert(sizeof(OMBlendSelector) == sizeof(u64));
5150

51+
struct alignas(8) PixelShaderSelector
52+
{
53+
GSHWDrawConfig::PSSelector ps;
54+
55+
union
56+
{
57+
struct
58+
{
59+
u32 sw_ansio : 1;
60+
u32 sw_ansio_level : 5;
61+
};
62+
63+
u32 key;
64+
};
65+
66+
__fi bool operator==(const PixelShaderSelector& p) const { return BitEqual(*this, p); }
67+
__fi bool operator!=(const PixelShaderSelector& p) const { return !BitEqual(*this, p); }
68+
69+
__fi PixelShaderSelector() { std::memset(this, 0, sizeof(*this)); }
70+
};
71+
static_assert(sizeof(PixelShaderSelector) == 16, "Pixel shader selector is 24 bytes");
72+
73+
struct PixelShaderSelectorHash
74+
{
75+
std::size_t operator()(const PixelShaderSelector& e) const noexcept
76+
{
77+
std::size_t hash = 0;
78+
HashCombine(hash, e.ps.key_hi, e.ps.key_lo, e.key);
79+
return hash;
80+
}
81+
};
82+
5283
class ShaderMacro
5384
{
5485
struct mcstr
@@ -247,7 +278,7 @@ class GSDevice11 final : public GSDevice
247278
std::unordered_map<u32, GSVertexShader11> m_vs;
248279
wil::com_ptr_nothrow<ID3D11Buffer> m_vs_cb;
249280
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11GeometryShader>> m_gs;
250-
std::unordered_map<PSSelector, wil::com_ptr_nothrow<ID3D11PixelShader>, GSHWDrawConfig::PSSelectorHash> m_ps;
281+
std::unordered_map<PixelShaderSelector, wil::com_ptr_nothrow<ID3D11PixelShader>, PixelShaderSelectorHash> m_ps;
251282
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
252283
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
253284
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
@@ -350,7 +381,7 @@ class GSDevice11 final : public GSDevice
350381
void UpdateSubresource(ID3D11Buffer* buffer, const void* cb_uniforms, void* cached_cb_uniforms, size_t cb_uniforms_size);
351382

352383
void SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb);
353-
void SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel);
384+
void SetupPS(const PixelShaderSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel);
354385
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
355386

356387
void RenderHW(GSHWDrawConfig& config) override;

0 commit comments

Comments
 (0)