[Fix] Add a dedicated HLE dataset for text-only samples (#1515)

TianhaoLiang2000 · web-flow · commit 2d5b16cab1be · 2026-04-29T12:34:30.000+08:00
* Add dedicated HLE dataset for text-only samples

* Sort dataset imports with isort
diff --git a/vlmeval/dataset/__init__.py b/vlmeval/dataset/__init__.py
@@ -44,6 +44,7 @@
 from .GUI.vbgd import VBGD
 from .GUI.venusbench import VenusBench_GD
 from .hipho import HiPhODataset
+from .hle import HLEDataset
 from .image_base import ImageBaseDataset, img_root_map
 from .image_caption import ImageCaptionDataset
 from .image_ccocr import CCOCRDataset
@@ -275,7 +276,7 @@ def evaluate(self, eval_file, **judge_kwargs):
     HRBenchDataset, CRPE, MathVerse, NaturalBenchDataset, MIABench,
     OlympiadBench, SeePhys, WildVision, MMMath, QSpatial, Dynamath, GSM8KVDataset, MMGenBench, VizWiz,  # noqa: E501
     MMNIAH, CMMMU, VLRewardBench, WeMath, LogicVista, MMMUProDataset,
-    CreationMMBenchDataset, ImageShortQADataset, MMAlignBench, OmniDocBench,
+    CreationMMBenchDataset, HLEDataset, ImageShortQADataset, MMAlignBench, OmniDocBench,
     VLM2Bench, VMCBenchDataset, EMMADataset, MME_CoT, MOAT, MedXpertQA_MM_test,
     LEGO, MMSci_Captioning, Physics_yale, ScreenSpot_Pro, ScreenSpot, VenusBench_GD,
     ScreenSpotV2, OSWorld_G, VBGD, MMIFEval, Spatial457, VisuLogic, CVBench, PathVQA_VAL,
diff --git a/vlmeval/dataset/hle.py b/vlmeval/dataset/hle.py
@@ -0,0 +1,47 @@
+import hashlib
+import io
+from base64 import b64decode
+
+from PIL import Image
+
+from .image_shortqa import ImageShortQADataset
+
+
+class HLEDataset(ImageShortQADataset):
+    HLE_BLANK_IMAGE_MD5 = 'b8718c65c71b998e9132229ac4b7c8a4'
+
+    DATASET_URL = {
+        'hle': 'https://opencompass.openxlab.space/utils/VLMEval/hle.tsv',
+    }
+
+    DATASET_MD5 = {
+        'hle': 'a83cbdbea89f27c2aa5b8f34a8894b72',
+    }
+
+    def _is_hle_blank_image(self, image):
+        if not isinstance(image, str):
+            return False
+
+        if hashlib.md5(image.encode('utf-8')).hexdigest() == self.HLE_BLANK_IMAGE_MD5:
+            return True
+
+        try:
+            img = Image.open(io.BytesIO(b64decode(image))).convert('RGB')
+        except Exception:
+            return False
+
+        colors = img.getcolors(maxcolors=8)
+        if colors is None or len(colors) != 1:
+            return False
+
+        _, color = colors[0]
+        return img.size == (101, 93) and color == (255, 255, 255)
+
+    def build_prompt(self, line):
+        if isinstance(line, int):
+            line = self.data.iloc[line]
+
+        if 'image' in line and self._is_hle_blank_image(line['image']):
+            return [dict(type='text', value=line['question'])]
+        else:
+            return super().build_prompt(line)
diff --git a/vlmeval/dataset/image_shortqa.py b/vlmeval/dataset/image_shortqa.py
@@ -75,11 +75,6 @@ class ImageShortQADataset(ImageBaseDataset):
         'LiveMMBench_Reasoning': '',
         'LiveMMBench_Reasoning_circular': '',
         'LiveMMBench_Spatial': '',
-        'hle': 'https://opencompass.openxlab.space/utils/VLMEval/hle.tsv',
-    }
-
-    DATASET_MD5 = {
-        'hle': 'a83cbdbea89f27c2aa5b8f34a8894b72',
     }
 
     def build_prompt(self, line):