Skip to content

Commit 2d5b16c

Browse files
[Fix] Add a dedicated HLE dataset for text-only samples (#1515)
* Add dedicated HLE dataset for text-only samples * Sort dataset imports with isort
1 parent e7d64cf commit 2d5b16c

3 files changed

Lines changed: 49 additions & 6 deletions

File tree

vlmeval/dataset/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .GUI.vbgd import VBGD
4545
from .GUI.venusbench import VenusBench_GD
4646
from .hipho import HiPhODataset
47+
from .hle import HLEDataset
4748
from .image_base import ImageBaseDataset, img_root_map
4849
from .image_caption import ImageCaptionDataset
4950
from .image_ccocr import CCOCRDataset
@@ -275,7 +276,7 @@ def evaluate(self, eval_file, **judge_kwargs):
275276
HRBenchDataset, CRPE, MathVerse, NaturalBenchDataset, MIABench,
276277
OlympiadBench, SeePhys, WildVision, MMMath, QSpatial, Dynamath, GSM8KVDataset, MMGenBench, VizWiz, # noqa: E501
277278
MMNIAH, CMMMU, VLRewardBench, WeMath, LogicVista, MMMUProDataset,
278-
CreationMMBenchDataset, ImageShortQADataset, MMAlignBench, OmniDocBench,
279+
CreationMMBenchDataset, HLEDataset, ImageShortQADataset, MMAlignBench, OmniDocBench,
279280
VLM2Bench, VMCBenchDataset, EMMADataset, MME_CoT, MOAT, MedXpertQA_MM_test,
280281
LEGO, MMSci_Captioning, Physics_yale, ScreenSpot_Pro, ScreenSpot, VenusBench_GD,
281282
ScreenSpotV2, OSWorld_G, VBGD, MMIFEval, Spatial457, VisuLogic, CVBench, PathVQA_VAL,

vlmeval/dataset/hle.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import hashlib
2+
import io
3+
from base64 import b64decode
4+
5+
from PIL import Image
6+
7+
from .image_shortqa import ImageShortQADataset
8+
9+
10+
class HLEDataset(ImageShortQADataset):
11+
HLE_BLANK_IMAGE_MD5 = 'b8718c65c71b998e9132229ac4b7c8a4'
12+
13+
DATASET_URL = {
14+
'hle': 'https://opencompass.openxlab.space/utils/VLMEval/hle.tsv',
15+
}
16+
17+
DATASET_MD5 = {
18+
'hle': 'a83cbdbea89f27c2aa5b8f34a8894b72',
19+
}
20+
21+
def _is_hle_blank_image(self, image):
22+
if not isinstance(image, str):
23+
return False
24+
25+
if hashlib.md5(image.encode('utf-8')).hexdigest() == self.HLE_BLANK_IMAGE_MD5:
26+
return True
27+
28+
try:
29+
img = Image.open(io.BytesIO(b64decode(image))).convert('RGB')
30+
except Exception:
31+
return False
32+
33+
colors = img.getcolors(maxcolors=8)
34+
if colors is None or len(colors) != 1:
35+
return False
36+
37+
_, color = colors[0]
38+
return img.size == (101, 93) and color == (255, 255, 255)
39+
40+
def build_prompt(self, line):
41+
if isinstance(line, int):
42+
line = self.data.iloc[line]
43+
44+
if 'image' in line and self._is_hle_blank_image(line['image']):
45+
return [dict(type='text', value=line['question'])]
46+
else:
47+
return super().build_prompt(line)

vlmeval/dataset/image_shortqa.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,6 @@ class ImageShortQADataset(ImageBaseDataset):
7575
'LiveMMBench_Reasoning': '',
7676
'LiveMMBench_Reasoning_circular': '',
7777
'LiveMMBench_Spatial': '',
78-
'hle': 'https://opencompass.openxlab.space/utils/VLMEval/hle.tsv',
79-
}
80-
81-
DATASET_MD5 = {
82-
'hle': 'a83cbdbea89f27c2aa5b8f34a8894b72',
8378
}
8479

8580
def build_prompt(self, line):

0 commit comments

Comments
 (0)