ai-engineering-from-scratch-zh/phases/19-capstone-projects/12-video-understanding-pipeline/quiz.json at main · fancyboi999/ai-engineering-from-scratch-zh · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
  "lesson": "12-video-understanding-pipeline",
  "title": "毕业项目 12 —— 视频理解流水线（场景、问答、搜索）",
  "questions": [
    {
      "stage": "pre",
      "question": "即便长上下文 VLM 能原生读取一段 2 小时的视频，为什么仍然需要场景级索引？",
      "options": [
        "长上下文 VLM 无法流式输出",
        "向量索引不支持视频文件",
        "把 100 小时视频摄入为可查询语料需要场景级检索，即便单个视频能被整段读取",
        "场景切换是 VLM 唯一能理解的东西"
      ],
      "correct": 2,
      "explanation": ""
    },
    {
      "stage": "pre",
      "question": "在多向量索引中，每个场景获得哪三种向量类型？",
      "options": [
        "字幕嵌入、关键帧嵌入和转写嵌入",
        "音频波形、梅尔频谱图和 MFCC",
        "哈希、gzip 和 CRC",
        "姿态、深度和光流"
      ],
      "correct": 0,
      "explanation": ""
    },
    {
      "stage": "check",
      "question": "查询时，来自三条检索流的结果是如何合并的？",
      "options": [
        "只挑字幕命中",
        "对余弦相似度取平均",
        "对三个排序列表做倒数排名融合（reciprocal rank fusion）",
        "取并集而不打分"
      ],
      "correct": 2,
      "explanation": ""
    },
    {
      "stage": "check",
      "question": "时间定位（temporal grounding）步骤在顶部场景内部精炼什么？",
      "options": [
        "来自 Whisper 的转写词级时间戳",
        "存储在 Qdrant 中的向量数量",
        "包含答案的 (start, end) 时间戳窗口",
        "关键帧嵌入的维度"
      ],
      "correct": 2,
      "explanation": ""
    },
    {
      "stage": "check",
      "question": "因为是已知的幻觉高发区，哪一类问题被单独报告？",
      "options": [
        "关于场景的描述性问题",
        "说话人识别问题",
        "翻译问题",
        "计数和动作类型问题，VLM 在此会数错或把事件顺序弄错"
      ],
      "correct": 3,
      "explanation": ""
    },
    {
      "stage": "post",
      "question": "细则在留出集上衡量哪个定位指标？",
      "options": [
        "时间定位的交并比（IoU）",
        "接受率",
        "困惑度",
        "MRR@10"
      ],
      "correct": 0,
      "explanation": ""
    },
    {
      "stage": "post",
      "question": "为什么流水线要求答案中带有引用的时间戳？",
      "options": [
        "它们能降低嵌入维度",
        "Whisper 输出要求时间戳",
        "Qdrant 载荷要求时间戳",
        "它们让观看者跳到精确的 (video_id, start, end)，以便用户核验论断"
      ],
      "correct": 3,
      "explanation": ""
    }
  ]
}