@@ -286,3 +286,66 @@ def test_exits_nonzero_when_issues_found(self, tmp_path, monkeypatch, capsys):
286286 assert "similar_name" in out
287287 # Silence unused import warning.
288288 _ = (MagicMock , patch , fact_checker )
289+
290+ def test_reconfigures_stdio_to_utf8_on_windows (self ):
291+ """Windows fact_checker --stdin must decode payload as UTF-8.
292+
293+ Without this, Python defaults stdio to the system ANSI codepage
294+ (cp1252/cp1251/cp950), which mojibakes non-ASCII text before
295+ pattern parsing sees it.
296+ """
297+ import io
298+ import sys
299+
300+ from mempalace .fact_checker import _reconfigure_stdio_utf8_on_windows
301+
302+ class _ReconfigurableStringIO (io .StringIO ):
303+ def __init__ (self , initial_value = "" ):
304+ super ().__init__ (initial_value )
305+ self .reconfigure_calls = []
306+
307+ def reconfigure (self , ** kwargs ):
308+ self .reconfigure_calls .append (kwargs )
309+
310+ stdin = _ReconfigurableStringIO ()
311+ stdout = _ReconfigurableStringIO ()
312+ stderr = _ReconfigurableStringIO ()
313+ with (
314+ patch .object (sys , "platform" , "win32" ),
315+ patch .object (sys , "stdin" , stdin ),
316+ patch .object (sys , "stdout" , stdout ),
317+ patch .object (sys , "stderr" , stderr ),
318+ ):
319+ _reconfigure_stdio_utf8_on_windows ()
320+
321+ # Per-stream errors policy: stdin uses surrogateescape so a stray
322+ # malformed byte from a redirected file does not crash the read,
323+ # stdout/stderr use replace so an extracted fact carrying a
324+ # surrogate half does not crash mid-print.
325+ assert stdin .reconfigure_calls == [{"encoding" : "utf-8" , "errors" : "surrogateescape" }]
326+ assert stdout .reconfigure_calls == [{"encoding" : "utf-8" , "errors" : "replace" }]
327+ assert stderr .reconfigure_calls == [{"encoding" : "utf-8" , "errors" : "replace" }]
328+
329+ def test_reconfigure_stdio_is_noop_off_windows (self ):
330+ """Linux/macOS already default to UTF-8 stdio -- helper must not touch streams."""
331+ import io
332+ import sys
333+
334+ from mempalace .fact_checker import _reconfigure_stdio_utf8_on_windows
335+
336+ class _ReconfigurableStringIO (io .StringIO ):
337+ def __init__ (self ):
338+ super ().__init__ ()
339+ self .reconfigure_calls = []
340+
341+ def reconfigure (self , ** kwargs ):
342+ self .reconfigure_calls .append (kwargs )
343+
344+ stdin = _ReconfigurableStringIO ()
345+ with (
346+ patch .object (sys , "platform" , "linux" ),
347+ patch .object (sys , "stdin" , stdin ),
348+ ):
349+ _reconfigure_stdio_utf8_on_windows ()
350+
351+ assert stdin .reconfigure_calls == []
0 commit comments