Azure · slister1001 · Apr 10, 2025 · Apr 10, 2025
@@ -241,7 +241,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
         p = p / DEFAULT_EVALUATION_RESULTS_FILE_NAME
 
     with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
-        json.dump(data_dict, f)
+        json.dump(data_dict, f, ensure_ascii=False)
 
     print(f'Evaluation results saved to "{p.resolve()}".\n')
 

@@ -0,0 +1 @@
+{"query":"태양에서 가장 가까운 행성은?","response":"태양에서 가장 가까운 행성은 수성입니다.","ground_truth":"태양에서 가장 가까운 행성은 수성(Mercury)입니다. 수성은 태양계의 첫 번째 행성으로, 태양에 가장 가까운 궤도를 돌고 있습니다"}
@@ -90,6 +90,10 @@ def questions_answers_file():
 def questions_answers_basic_file():
     return _get_file("questions_answers_basic.jsonl")
 
+@pytest.fixture
+def questions_answers_korean_file():
+    return _get_file("questions_answers_korean.jsonl")
+
 
 def _target_fn(query):
     """An example target function."""
@@ -851,3 +855,22 @@ def test_target_failure_error_message(self, questions_file):
             )
 
         assert "Evaluation target failed to produce any results. Please check the logs at " in str(exc_info.value)
+
+    def test_evaluate_korean_characters_result(self, questions_answers_korean_file):
+        output_path = "eval_test_results_korean.jsonl"
+
+        result = evaluate(
+            data=questions_answers_korean_file,
+            evaluators={"g": F1ScoreEvaluator()},
+            output_path=output_path,
+        )
+
+        assert result is not None
+
+        with open(questions_answers_korean_file, "r", encoding="utf-8") as f:
+            first_line = f.readline()
+            data_from_file = json.loads(first_line)
+
+        assert result["rows"][0]["inputs.query"] == data_from_file["query"]
+
+        os.remove(output_path)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"query":"태양에서 가장 가까운 행성은?","response":"태양에서 가장 가까운 행성은 수성입니다.","ground_truth":"태양에서 가장 가까운 행성은 수성(Mercury)입니다. 수성은 태양계의 첫 번째 행성으로, 태양에 가장 가까운 궤도를 돌고 있습니다"}