ISTQB-Certified-Tester-AI-Testing-Study-App/questions.js at main · AutomateThePlanet/ISTQB-Certified-Tester-AI-Testing-Study-App · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
window.questions = [
  {
    "id": 1,
    "question": "Which of the following statements provides the BEST example of the 'AI Effect'?",
    "options": [
      "a) People lose their jobs as AI-based systems perform their roles cheaper and better",
      "b) Competitive computer games lose popularity as AI-based systems always win",
      "c) Rule-based expert systems for medical diagnosis are no longer considered to be AI",
      "d) People believe AI will take over the world, as shown in films"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. People in many occupations may lose their jobs to AI based systems, but this is simply progress, not the 'AI Effect'",
      "b": "Is not correct. For some computer games, AI-based systems can outplay humans, but there is little evidence of a drop-off in the popularity of such games",
      "c": "Is correct. The 'AI Effect' is defined as the change in the definition of AI as technology advances. Rule-based systems for medical diagnosis were popular examples of AI in the 1970s and 1980s but are often not considered AI today",
      "d": "Is not correct. The gullibility of cinema goers believing killer robots will take over the world is not the 'AI Effect'"
    }
  },
  {
    "id": 2,
    "question": "Which of the following options is NOT a technology used to implement AI?",
    "options": [
      "a) Support vector machine",
      "b) Decision tree",
      "c) Evolutionary reasoning",
      "d) Bayesian optimization"
    ],
    "correct_answer": "C",
    "k_level": "K1",
    "points": "1",
    "justification": {
      "a": "Is not correct. Support vector machines are a form of machine learning",
      "b": "Is not correct. Decision trees are a form of machine learning",
      "c": "Is correct. There is no such AI technology as evolutionary reasoning. Such a term is occasionally used in discussions related to biological evolution-based rules were considered AI a few decades ago",
      "d": "Is not correct. Bayesian optimization is a form of machine learning"
    }
  },
  {
    "id": 3,
    "question": "Which of the following statements about the hardware used to implement AI-based systems is MOST likely to be CORRECT?",
    "options": [
      "a) The processors used to train a mobile recommendation system must be the same as the processors on the mobile phone",
      "b) Graphical processing units (GPUs) are a reasonable choice to implement an AI-based computer vision system",
      "c) Deep learning systems need to be trained, evaluated, and tested using AI-specific chips",
      "d) It is always best to choose processors with more bits to achieve sufficient accuracy for AI based systems"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The two activities of training a ML model and inference from that model are quite different so there is normally no reason that they should be performed on the same processors",
      "b": "Is correct. GPUs are designed for the parallel processing of images using thousands of cores, which is close to what is required for an AI based computer vision system that would most likely be implemented as a neural network",
      "c": "Is not correct. It is still possible to train, evaluate and test a simple deep-learning system on a PC with limited GPU support – so specific chips for AI are not needed, but they would be far faster",
      "d": "Is not correct. Many AI-based systems are not focused on exact calculations, but rather on probabilistic determinations and so the accuracy of processors with many bits is often unnecessary"
    }
  },
  {
    "id": 4,
    "question": "There are a number of good quality pre-trained models available in the market and you want to use one of them for an image-based classifier. You have decided to ask the provider of the model about the data used for training the model and its format.  Which of the following statements is the BEST example of a risk that you are trying to mitigate by asking these questions?",
    "options": [
      "a) Bad classification accuracy of the pre-trained models",
      "b) Differences in the data used to train the model and the operational data",
      "c) Performance efficiency issues of the pre-trained model",
      "d) Lack of explainability of the pre-trained model compared to that of a model trained by you"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The question mentions the good quality of the pre-trained model, so this risk should be negligible",
      "b": "Is correct. The data used to train the model should be similar to the data used for making the predictions",
      "c": "Is not correct. Performance does not appear to be an issue in this situation",
      "d": "Is not correct. Explainability does not appear to be an issue in this situation, nor can it be achieved by looking at the training data and its format"
    }
  },
  {
    "id": 5,
    "question": "Which of the following statements is MOST likely to be specifying a requirement for autonomy in an AI-based system?",
    "options": [
      "a) The system shall maintain a safe distance to other vehicles until the brake or accelerator is pushed by the driver",
      "b) The system shall learn the preferred style of response to emails by remotely monitoring the email traffic",
      "c) The system shall compare its predictions of house prices with actual selling prices to determine if it needs to be retrained",
      "d) It shall be possible to modify the system's behavior to work with different types of users in less than a day"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. This requirement defines the human interventions that define the end of the system working autonomously",
      "b": "Is not correct. This requirement is specifying a required function for how the system shall perform self-learning",
      "c": "Is not correct. This requirement is specifying how the system will manage concept drift, in this case most likely caused by the house market changing",
      "d": "Is not correct. This is specifying an adaptability requirement – the maximum time it should take to make a change to the system"
    }
  },
  {
    "id": 6,
    "question": "Which of the following statements about bias in AI-based systems is NOT correct?",
    "options": [
      "a) Bias may be caused by users of a book recommendation system making choices that deliberately cause the system to make poor suggestions",
      "b) Bias may be caused in the employee age of death prediction system by collecting the training data from a dataset of patients who are all retired",
      "c) Bias may be caused in the creditworthiness system by using training data obtained from those who own and use a credit card",
      "d) Bias may be caused in the navigation system by using a route planning algorithm that is too complex to be explained to typical users"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Bias can be caused by users deliberately poisoning the self-learning of an AI-based system",
      "b": "Is not correct. Bias can be caused when the training data does not correctly match those who the system will be applied to. For instance, employees will typically be younger than retired patients",
      "c": "Is not correct. Bias can be caused when the training data does not correctly match those who the system will be applied to. For instance, most people using credit cards are already considered creditworthy, which is a typical example of sample bias",
      "d": "Is correct. If the algorithm cannot be explained, then it lacks explainability, but that does not mean it is biased nor unbiased"
    }
  },
  {
    "id": 7,
    "question": "Which of the following is MOST likely to be an example of reward hacking?",
    "options": [
      "a) The programmer's assistant tool optimizes the code to provide reduced response times, while still ensuring that functional requirements are met",
      "b) An anesthetic supply device with a goal of keeping patients stable during surgery supplies too many doses and patients do not wake up as quickly as expected",
      "c) The third-party development organization paid their AI programmers based on the number of lines of code they write",
      "d) A type of AI used to play competitive computer games against humans that is focused on getting the highest score"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. It appears that the tool is achieving its two goals with there being no detrimental effects, so this is unlikely to be 'reward hacking'",
      "b": "Is correct. This could be 'reward hacking' if the system achieves one goal to the detriment of others, in this case the need for patients to wake up",
      "c": "Is not correct. Reward hacking is not a form of paying AI developers",
      "d": "Is not correct. Some game-playing AI-based systems are driven by a reward function, but this is not known as 'reward hacking'"
    }
  },
  {
    "id": 9,
    "question": "Which of the following statements BEST describes classification and regression as part of supervised learning?",
    "options": [
      "a) Regression is checking that the ML model test results do not change when the same test data is executed",
      "b) Classification is the grouping of unlabeled data into separate classes",
      "c) Classification is the labelling of the data for training the ML model",
      "d) Regression is predicting the number of classes that are output by the ML model"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Regression in the context of supervised learning is generally when the ML model outputs a numeric result",
      "b": "Is correct. Classification is when input data to a ML model is classified into one of a few predefined classes",
      "c": "Is not correct. Training data needs to be labelled for training in supervised learning, but this activity is not known as classification. It is simply labelling",
      "d": "Is not correct. Regression is when the output from the ML model is numeric, but the output is not a number of classes"
    }
  },
  {
    "id": 10,
    "question": "Which of the following options BEST describes an example of reinforcement learning?",
    "options": [
      "a) The mobile game app updates its feedback, response timing and the number of user options it provides based on how much the players spend",
      "b) The language translation app searches the internet to find text provided in multiple languages to improve its translation function",
      "c) The factory quality control system uses video cameras and audio analysis to identify manufactured items that are faulty based on monitoring a human quality control operative",
      "d) The software component test prediction system uses a range of quality measures to identify which components are likely to contain the most defects"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. The amount spent can be considered the reward function for this system, with the system changing its behavior to increase the amount spent",
      "b": "Is not correct. The app is using text in what can be considered a source language and a 'correct' translation of this source. Therefore, it is relying on a form of supervised learning with no reward function mentioned",
      "c": "Is not correct. The system is using the human quality control operative as a form of 'gold' standard and so is relying on a form of supervised learning",
      "d": "Is not correct. There is no suggestion that any reward function is used, instead it is most likely that the prediction system bases its determination of defects on past experience. Therefore, it is probably also relying on a supervised learning system"
    }
  },
  {
    "id": 11,
    "question": "You have been asked for your opinion on the ML approach to be used for a new system that is part of the traffic management for a SMART city.  The idea is that the new system will control the traffic lights in the city to ensure traffic flows easily through and around the city.  Which of the following approaches do you expect MOST likely to succeed?",
    "options": [
      "a) Unsupervised learning that is based on identifying clusters around the city where the traffic density is higher than average",
      "b) A supervised learning regression solution based on thousands of journeys labelled with both journey length and duration",
      "c) Reinforcement learning that is based on a reward function that penalizes solutions that result in higher levels of traffic congestion",
      "d) A supervised learning classification solution that is based on drivers and passengers submitting their favorite routes for traversing the city"
    ],
    "correct_answer": "C",
    "k_level": "K3",
    "points": "2",
    "justification": {
      "a": "Is not correct. It should be possible for the unsupervised learning system to identify areas that are congested, but this alone will not provide the solution",
      "b": "Is not correct. A regression solution is unlikely to provide us with what we want as the predicted speed of individual journeys will not provide an overall solution to citywide congestion",
      "c": "Is correct. A continually improving reinforcement learning system with a reward function based on lower levels of congestion as a measure of success is valid for this type of system",
      "d": "Is not correct. This solution is dependent on volunteers submitting subjective opinions that will most likely result in a solution that changes back and forth as the system adopts favorite routes that then become congested"
    }
  },
  {
    "id": 12,
    "question": "When performing testing of a trained model, an ML engineer found that the model was highly accurate when evaluated with validation data but that it performed poorly with independent test data.  Which of the following options is MOST likely to cause this situation?",
    "options": [
      "a) Underfitting",
      "b) Concept drift",
      "c) Overfitting",
      "d) Poor acceptance criteria"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The model performs well on validation data, so it is not a case of underfitting",
      "b": "Is not correct. Concept drift refers to changes after the model training and validation stage",
      "c": "Is correct. The bad performance on test data and good on validation data suggests overfitting",
      "d": "Is not correct. Poor acceptance criteria should be consistent with different sets of data, so are unlikely to lead to a difference between the test results with validation data and independent test data"
    }
  },
  {
    "id": 13,
    "question": "Which of the following is an example of a challenge that is likely to be encountered in the course of developing and testing an ML solution?",
    "options": [
      "a) Data anonymization operations typically require knowledge of various ML algorithms",
      "b) The data used might be unstructured data",
      "c) A large percentage of the budget gets spent just in data preparation",
      "d) The data pipeline scalability is a challenge when training the model"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Data anonymization operations do not require knowledge of ML algorithms",
      "b": "Is not correct. Unstructured data is not a challenge. Images, audio, free-flowing text are all examples of unstructured data",
      "c": "Is correct. Up to 36% of ML workflow effort may be spent in data preparation",
      "d": "Is not correct. Scalability typically is a requirement at deployment, rather than when training"
    }
  },
  {
    "id": 14,
    "question": "The data scientist has complained that the model cannot be trained with one particular algorithm, although other algorithms work with the same training data.  Which of the following options is the MOST likely reason for this?",
    "options": [
      "a) Wrong data",
      "b) Missing data",
      "c) Badly labelled data",
      "d) Insufficient data"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that the data is correct",
      "b": "Is not correct. Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that there is no missing data",
      "c": "Is not correct. Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that the data is correctly labelled",
      "d": "Is correct. Since models based on some learning algorithms can be trained with the data. However, if it does not work for one particular algorithm, it is MOST likely to be that the quantity of the data that is not sufficient for that particular algorithm"
    }
  },
  {
    "id": 15,
    "question": "DataSure is a start-up with a product that promises to improve the quality of ML models. DataSure claim that this improvement comes from checking if the data has been labeled correctly.  Which of the following defects is MOST likely to have been prevented by using this product?",
    "options": [
      "a) The model will have security vulnerabilities",
      "b) The model will have poor accuracy",
      "c) The model will not fulfill its intended function",
      "d) The model will produce biased outputs"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Data privacy and security issues are not being handled. Hence the product is not going to prevent security issues",
      "b": "Is correct. Mislabeled data results in reduced accuracy of the ML model",
      "c": "Is not correct. A model not being fit for purpose arises from incorrect or unfair data, not mislabeled data",
      "d": "Is not correct. A biased model results from incomplete data, unbalanced data, unfair data, data lacking diversity, or duplicate data, rather than from mislabeled data"
    }
  },
  {
    "id": 16,
    "question": "An ML engineer, upon finding insufficient training data, is rotating labeled images to create additional training data.  Which of the following approaches to labeling is being applied in this above example?",
    "options": [
      "a) Crowdsourcing",
      "b) Augmentation",
      "c) AI-based labeling",
      "d) Outsourcing"
    ],
    "correct_answer": "B",
    "k_level": "K1",
    "points": "1",
    "justification": {
      "a": "Is not correct. Crowdsourcing is when you use a large number of people to provide some work. In this case only one person is performing the task",
      "b": "Is correct. Augmentation is being performed here by transforming existing labelled data",
      "c": "Is not correct. AI is not being used for labeling of the data",
      "d": "Is not correct. The ML engineer has not outsourced the task to a third party"
    }
  },
  {
    "id": 18,
    "question": "ThermalSpace is a solution provider that helps thermal power plants to optimize their power output. Their solution is based on an ML model created using past data with clearly marked output. The model helps determine the amount of electricity to be generated at a given time of the day.  To determine the quality of the model using ML functional performance metrics, which of the following metrics is MOST likely to be used?",
    "options": [
      "a) R-squared",
      "b) Precision",
      "c) Recall",
      "d) False Positives"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. It is a supervised regression problem because the model outputs a continuous value, the amount of electricity to be generated, which uses the R-Squared or MSE/RMSE metric",
      "b": "Is not correct. This is a metric for classification",
      "c": "Is not correct. This is a metric for classification",
      "d": "Is not correct. This is a metric for classification"
    }
  },
  {
    "id": 19,
    "question": "KnowYourPet is an app utilizing ML to determine whether a pet is hungry or not. It is understood that a dog is likely to be not hungry most of the time, as reflected in the training data. If the dog is mis-diagnosed as hungry then it may lead to overfeeding of the dog and this could lead to serious health issues.  Which of the following metrics would you choose for determining the suitability of the model under test?",
    "options": [
      "a) Accuracy",
      "b) Precision",
      "c) Recall",
      "d) F1-score"
    ],
    "correct_answer": "B",
    "k_level": "K4",
    "points": "2",
    "justification": {
      "a": "Is not correct. Accuracy is not useful when there is an imbalance in the expected classes and the not hungry class dominates in this case",
      "b": "Is correct. Precision should be used because the cost of false-positives (overfeeding the dog) is high (serious health issues)",
      "c": "Is not correct. Recall is useful when the positives should not be missed. In this case, precision is also important (see b) and hence recall alone is not very useful. F1-score is a better choice",
      "d": "Is not correct. F1-score is useful when there is an imbalance in the expected classes and when precision and recall are similarly important, but in this case precision appears to be far more important than recall"
    }
  },
  {
    "id": 20,
    "question": "Which of the following options BEST describes a deep neural net?",
    "options": [
      "a) It is comprised of a hierarchical structure of neurons with the lowest (deepest) neurons making most of the decisions",
      "b) It is comprised of connected neurons where each neuron has an associated bias and each connection has an associated weight",
      "c) It is made up several layers with each layer (except input and output layers) connected to each other layer and errors are propagated backwards through the network",
      "d) It is made up of layers of neurons, each of which generates an activation value based on the other neurons in the same layer"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. A neural network does not have a hierarchical structure",
      "b": "Is correct. As with the human brain, an artificial neural network is comprised of connected neurons. To perform its calculation of an activation value, each neuron is assigned a bias and each connection is assigned a weight",
      "c": "Is not correct. A neural network is made up of several layers and errors are propagated backwards through the network, but the layers of a neural network are only connected to the next layers (not each other layer)",
      "d": "Is not correct. A neural network is made up of layers of neurons, but the activation value is based on the neurons in the preceding layer (not the same layer)"
    }
  },
  {
    "id": 21,
    "question": "Which of the following statements CORRECTLY describes a test coverage measure for neural networks?",
    "options": [
      "a) Value change coverage is based on individual neurons being seen to affect the overall output of the neural network",
      "b) Threshold coverage is based on neurons outputting an activation value greater than a pre set value between zero and one",
      "c) Neuron coverage is a measure of the proportion of neurons that are activated at any time during the testing",
      "d) Sign change coverage measures the coverage of neurons that output both positive, negative and zero activation values"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Value change coverage is a measure of the proportion of neurons activated where their activation values differ by more than a preset change amount. It is not concerned with the overall output of the neural network",
      "b": "Is correct. Threshold coverage measures the proportion of neurons activated during testing with a value greater than a preset threshold value",
      "c": "Is not correct. All neurons are potentially 'activated' each time a neural network is 'run', however the values output by the neurons change, which is what is measured by neuron coverage (coverage achieved by a value greater than zero)",
      "d": "Is not correct. Sign change coverage is a measure of the proportion of neurons activated with both positive and negative activation values, but not zero activation values"
    }
  },
  {
    "id": 22,
    "question": "Which of the following requirements for an AI-based system is MOST likely to cause a significant challenge in testing?",
    "options": [
      "a) The system shall be more accurate than the system it is replacing",
      "b) The AI component in the system shall have 100% accuracy",
      "c) A human operator should be able to override the system in 1 second",
      "d) The system shall mimic the human emotions of a typical game player"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. This is a specific requirement with a test oracle, so should not usually cause a testing challenge",
      "b": "Is not correct. This may be a difficult requirement to achieve but should not create a testing challenge",
      "c": "Is not correct. This is a testable requirement",
      "d": "Is correct. This requirement is extremely complex to test without defining all human emotions and how the system might mimic them"
    }
  },
  {
    "id": 23,
    "question": "Which of the following is a factor associated with the test data that can make the testing of AI based systems difficult?",
    "options": [
      "a) Sourcing big data with high velocity",
      "b) Sourcing data from a single source",
      "c) Sourcing data separately from the data scientists",
      "d) Sourcing data from public websites"
    ],
    "correct_answer": "A",
    "k_level": "K1",
    "points": "1",
    "justification": {
      "a": "Is correct. Sourcing data for AI systems that use large quantities of high-velocity data can be difficult",
      "b": "Is not correct. Sourcing consistent data from multiple sources can be difficult",
      "c": "Is not correct. Sourcing data separately is good practice as it prevents common failures with the data scientists",
      "d": "Is not correct. Sourcing data from public websites is straightforward"
    }
  },
  {
    "id": 24,
    "question": "Why would the accuracy of human decisions be considered in testing as well as the accuracy of AI-based systems?",
    "options": [
      "a) Intuitive human decisions can be made faster than a corresponding AI-based system in some situations",
      "b) Unethical decisions can be made by humans as well as AI-based systems",
      "c) The accuracy of human decisions is not relevant to testing AI-based systems",
      "d) Human decisions may be of lower quality when they have been recommended by an AI based system"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Speed of decision making is not related to accuracy",
      "b": "Is not correct. The ethical choices made by humans are not related to testing AI-based systems",
      "c": "Is not correct. The accuracy of human decisions is relevant as systems may make recommendations that humans approve or review",
      "d": "Is correct. Human decisions supported by recommendations by AI-based systems may be of lower quality than human decisions without recommendations from a system, and this should be considered in testing"
    }
  },
  {
    "id": 26,
    "question": "Which of the following statements BEST describes a testing challenge that specifically applies to a self-learning system?",
    "options": [
      "a) The system requires regular retraining and therefore requires regular testing",
      "b) The system is regularly released which means regression testing is required",
      "c) The system changes in such a way that tests that previously passed can fail",
      "d) The system requires a human operator, who is also required for testing"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. A system that requires regular retraining cannot be described as self-learning",
      "b": "Is not correct. A system that needs to be regularly released cannot be described as self-learning as it requires frequent releases to adapt to change",
      "c": "Is correct. Tests on a system that makes changes to itself may start to fail, even if they previously passed",
      "d": "Is not correct. A system that requires a human operator is unlikely to be self-learning"
    }
  },
  {
    "id": 27,
    "question": "Which of the following is NOT likely to be required to test a system for bias?",
    "options": [
      "a) Involving selected users that are known to be biased",
      "b) Measuring how changes in test inputs change test outputs",
      "c) Observing how production outputs correlate to production inputs",
      "d) Obtaining additional data from other sources"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. Users that exhibit bias are not required to test a system for bias because they do not help to determine whether the behavior of the system is biased",
      "b": "Is not correct. Measuring how test inputs change test outputs is important when testing for bias as it can show how the system is biased towards or against particular inputs",
      "c": "Is not correct. Measuring how production inputs change production outputs is important when testing for bias because different results might be seen in production",
      "d": "Is not correct. Obtaining external data sources can be essential when testing for bias in case the bias is based on 'hidden' variables"
    }
  },
  {
    "id": 28,
    "question": "Which of the following statements BEST describes how system complexity can create challenges when testing an AI-based system?",
    "options": [
      "a) Testing for bias may require data that the team does not have",
      "b) Manual generation of white-box tests can be difficult",
      "c) Determining whether a system is ethical can be subjective",
      "d) It can be difficult to find representative data to train a model"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Bias does not usually relate to system complexity",
      "b": "Is correct. Understanding how the system works and creating enough tests to achieve effective coverage are challenges caused by the complexity of AI-based systems",
      "c": "Is not correct. Ethics is not usually related to AI-based system complexity",
      "d": "Is not correct. Difficulty finding representative data to train a model is not related to testing or AI-based system complexity"
    }
  },
  {
    "id": 30,
    "question": "An ML engineer is trying to find exploitable inputs and then use these inputs to retrain the models to make them immune to these inputs.  Which of the following options BEST describes the approach being used by the ML engineer?",
    "options": [
      "a) Validation",
      "b) Adversarial testing",
      "c) Data pipeline testing",
      "d) Scalability testing"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. This is not validation as the exploitable inputs are being found and retraining is being done using those inputs",
      "b": "Is correct. This is an example of adversarial testing",
      "c": "Is not correct. There are not any data pipelines that are being tested in this situation",
      "d": "Is not correct. No scalability tests are being performed in this example"
    }
  },
  {
    "id": 31,
    "question": "A test manager has to select test techniques to be used for testing autonomous vehicle software. There are a large number of environmental conditions (>50) that need to be considered for seven vehicle functions.  Which of the following test techniques is MOST likely to be used when testing the variety of vehicle functions (VF) in different environmental conditions (EC)?",
    "options": [
      "a) A/B testing based on the VF and EC parameters",
      "b) Combination testing of all the parameters of VF and EC",
      "c) Pairwise testing of the relevant values of VF and EC",
      "d) Back-to-back testing of relevant VF and EC values"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. A/B testing is not useful for combinatorial testing",
      "b": "Is not correct. All combinations would be almost impossible to do in practice, resulting from the near infinite number of potential combinations",
      "c": "Is correct. Pairwise testing is best suited to reduce the number of combinations without sacrificing defect detection too much",
      "d": "Is not correct. Back-to-back testing is not useful for combinatorial testing"
    }
  },
  {
    "id": 32,
    "question": "A test manager decides to have a non-AI system with similar functionality to the AI based system under test (SUT) built to support system testing.  Which of the following statements is most likely to be CORRECT?",
    "options": [
      "a) The test manager has chosen back-to-back testing because it helps solve the test oracle problem by using a pseudo-oracle",
      "b) The test manager has chosen A/B testing because it helps solve the test oracle problem by using a pseudo-oracle",
      "c) The test manager has chosen back-to-back testing because the non-functional requirements of the SUT can be verified against the pseudo-oracle",
      "d) The test manager has chosen A/B testing because the non-functional requirements of the SUT can be verified against the pseudo-oracle"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. It is an example of back-to-back testing where the non-AI system is used as a pseudo-oracle",
      "b": "Is not correct. With A/B testing, we use a variant of the SUT to compare with the SUT",
      "c": "Is not correct. The resources and non-functional characteristics of the pseudo-oracle and the SUT are likely to be different, hence the alternate system cannot be used for non-functional testing",
      "d": "Is not correct. It is an example of back-to-back testing and also the resources and non-functional characteristics of the pseudo-oracle and the SUT are likely to be different"
    }
  },
  {
    "id": 34,
    "question": "System testing of an AI-based system is being planned. It has been suggested that exploratory testing is used in addition to scripted test techniques.  Which of the following scenarios is MOST likely to be an example of exploratory testing being performed?",
    "options": [
      "a) Training data is visualized using tools to look at various aspects of the data",
      "b) Tests written using equivalence partitioning during the previous test cycle are being run",
      "c) The Google 'ML test checklist' is being used",
      "d) ML functional performance metrics are being calculated"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. This is Exploratory Data Analysis which is an exploratory method",
      "b": "Is not correct. This is scripted testing",
      "c": "Is not correct. This is checklist-based testing",
      "d": "Is not correct. Calculating ML functional performance metrics is not exploratory testing"
    }
  },
  {
    "id": 36,
    "question": "Which one of the following statements is an example of a difference between a test environment for AI-based systems and a test environment for conventional systems?",
    "options": [
      "a) Test environments for AI-based systems may require some mechanism to determine how a particular decision is made",
      "b) Test environments for AI-based systems need simulators and virtual environments whereas conventional systems do not need these",
      "c) Test environments for AI-based systems need large amounts of data, whereas conventional systems do not need large amount of data",
      "d) GPUs are required for test environments for AI-based systems whereas conventional systems do not need these"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. Explainability mechanism may need to be provided for AI environments",
      "b": "Is not correct. Simulators and virtual environments are often required for conventional systems",
      "c": "Is not correct. Large amount of data may be required for conventional systems, as well",
      "d": "Is not correct. GPUs may be required for many other systems as well, for example, games"
    }
  },
  {
    "id": 37,
    "question": "In which of the following situations would AI be MOST useful when categorizing new defects?",
    "options": [
      "a) A small number of defects requires categorization on a new application",
      "b) A large number of defects is reported on a small application",
      "c) Minimal data is provided in typical defect reports",
      "d) A new development team needs to know the most appropriate developer to fix a defect"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Where a small number of defects requires categorization and there is no historical data, AI would not have training data to be used. Genetic algorithms and neural networks can be used for test generation, and even be combined. Clustering produces results applicable to test generation",
      "b": "Is correct. Where a large number of defects is reported on a small application there is most likely to be benefit and opportunity to identify duplicates",
      "c": "Is not correct. Where minimal data is provided in the defect reports, the usefulness of the tool will be lower, as less data will be available to the algorithm",
      "d": "Is not correct. For AI to recommend developers to fix defects it would need to be based on historical data. However, because a new development team is taking over, any recommendations would be inaccurate until historical data is available"
    }
  },
  {
    "id": 38,
    "question": "Which of the following is an AI tool MOST likely to use as the basis for generating functional test cases?",
    "options": [
      "a) A test charter",
      "b) A picture of the system as a flow chart",
      "c) Web server logs",
      "d) Crash reports"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. A test charter provides a focus for exploratory testing, and it rarely leads to the generation of test cases, even if an AI-based tool could interpret it",
      "b": "Is not correct. A flow chart could be used to generate tests, but it needs to be machine readable, rather than simply a picture",
      "c": "Is correct. Web server logs may reflect production use of the system and provide a way for AI to generate tests",
      "d": "Is not correct. Crash reports are unlikely to be used as they would describe unexpected failures rather than the functions performed by the application"
    }
  },
  {
    "id": 39,
    "question": "Which of the following options CORRECTLY states how an AI-based tool can perform optimization of regression test suites?",
    "options": [
      "a) By analyzing false positive test results",
      "b) By analyzing information from previous testing activities",
      "c) By using genetic algorithms to create new test cases",
      "d) By updating the expected results to counter concept drift"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The goal of regression testing optimization is to reduce the size, prioritize or augment a test suite, not to reduce false positives",
      "b": "Is correct. Optimization of regression test suites is performed by analyzing information on previous test executions",
      "c": "Is not correct. Per section 11.4 of the syllabus, regression test optimization is typically performed using previous test execution data. Using genetic algorithms to create new tests is unlikely to achieve the goal of optimizing the regression test suite",
      "d": "Is not correct. It is important to consider regression testing and concept drift together, however per section 11.4 concept drift is not related to regression test optimization using AI"
    }
  },
  {
    "id": 40,
    "question": "Which of the following options CORRECTLY states how an AI-based tool can perform defect prediction?",
    "options": [
      "a) Using natural language to ask developers where they predict defects will occur",
      "b) By analyzing the causes of defects raised on a similar code base",
      "c) By analyzing false positive defects",
      "d) Scanning code to identify defects using rules."
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. While natural language processing is an AI application, it is not used for defect prediction",
      "b": "Is correct. Defect prediction is performed by looking for correlations between code/process/people measures and defects on the same or a similar code base",
      "c": "Is not correct. The goal of defect prediction is not to identify defects with a false positive result. To analyze them would have little value",
      "d": "Is not correct. Defect prediction does not involve scanning of code using rules. This is static analysis"
    }
  },
  {
    "id": 41,
    "question": "Which of the following statements about AI is MOST likely to be CORRECT?",
    "options": [
      "a) An autonomous robot that can act as a worker in a house, shop or office is an example of general AI",
      "b) A robot exhibiting similar skill levels as a human is considered to have achieved singularity",
      "c) AI-based systems that support a range of test management functions are considered to possess general AI",
      "d) An AI-based system that cannot access the internet is said to exhibit narrow AI"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. General AI exhibits intelligent behavior comparable to a human and an autonomous robot that has a wide enough range of skills to perform as a worker in quite different environments is likely to be general AI",
      "b": "Is not correct. Singularity is that point when AI has surpassed humans – not when they are at similar levels",
      "c": "Is not correct. Performing test management would be considered by most as a single specialized set of tasks and would be classed as narrow AI",
      "d": "Is not correct. Narrow AI is being able to perform a single specialized task; it is not relevant whether the AI has access to the internet or not"
    }
  },
  {
    "id": 42,
    "question": "Which of the following statements is MOST likely to be describing a conventional system (as opposed to an AI-based system)?",
    "options": [
      "a) This system assigns customers into groups, based on their historical buying patterns",
      "b) This system controls the braking of the car dependent on its speed",
      "c) This system taught itself to recognize different words by listening to recordings",
      "d) This system detects anomalies from its experience of seeing anomalies in many X-rays"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. This system appears to be describing clustering of customers as would be performed by an unsupervised learning system; therefore, it is likely to be AI-based",
      "b": "Is correct. There is no evidence that this system implements AI",
      "c": "Is not correct. This system appears to have programmed itself by listening to recordings; therefore, it is likely to be AI-based",
      "d": "Is not correct. This system appears to be describing a form of supervised learning based on anomalies in X-rays; therefore, it is likely to be AI-based"
    }
  },
  {
    "id": 43,
    "question": "Which of the following options is NOT a framework used to develop AI-based software?",
    "options": [
      "a) scikit-learn",
      "b) CNTK",
      "c) MxNet",
      "d) EZPy-AI"
    ],
    "correct_answer": "D",
    "k_level": "K1",
    "points": "1",
    "justification": {
      "a": "Is not correct. This is a free software machine learning library for the Python programming language",
      "b": "Is not correct. This is the Microsoft Cognitive Toolkit (CNTK), an open-source deep-learning toolkit",
      "c": "Is not correct. This is a deep-learning open-source framework used by Amazon for AWS",
      "d": "Is correct. There is currently no AI development framework with this name"
    }
  },
  {
    "id": 44,
    "question": "Which of the following statements is MOST likely to be describing a system that includes the use of AI as a Service (AIaaS)?",
    "options": [
      "a) The image classifier identifies defects in the gyroscope casings produced by the company and was built using a transfer learning approach so that it is of high accuracy",
      "b) The underwater AI-based vehicle steering system uses a third-party obstacle avoidance component based on decision trees and Bayesian optimization",
      "c) The contract checker uses an exclusive algorithm for determining levels of legal liability, but the pricing part of the contract is separately checked by a generic contract pricing AI component",
      "d) The car rental pricing system is built using AI to support a demand-based algorithm and is hosted in the cloud and made available to all of the company's car rental offices"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The image classifier seems to be built for a highly specific problem area and is unlikely to be made widely available as AIaaS",
      "b": "Is not correct. Although the obstacle avoidance system is provided by a third party, it is highly likely to be embedded within the vehicle and it also appears to be a specialist component, so it is unlikely to be AIaaS",
      "c": "Is correct. The exclusive algorithm cannot be AIaaS, however the pricing part is generic and could well be provided as AIaaS",
      "d": "Is not correct. The described situation suggests that the system is made available across the web, but only internally to their own car rental offices, so it does not appear to be AIaaS"
    }
  },
  {
    "id": 45,
    "question": "Which of the following options is the MOST likely use of a regulatory standard/regulation for an AI based system?",
    "options": [
      "a) Use of ISO/PAS 21448 (SOTIF) for an unmanned autonomous submarine",
      "b) Use of GDPR for a bank loan decision-making system",
      "c) Use of ISO 26262 for a fully self-driving car",
      "d) Use of GDPR for a drone collision-avoidance system"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. SOTIF is for road vehicles, not submarines",
      "b": "Is correct. A bank loan decision-making system will work with personal data and this is covered by GDPR",
      "c": "Is not correct. A fully self-driving car is likely to include non-deterministic systems, which are not allowed by ISO 26262",
      "d": "Is not correct. Drone collision systems are unlikely to include any personal data, which is the focus of GDPR"
    }
  },
  {
    "id": 46,
    "question": "Which of the following statements about flexibility and adaptability is MOST likely to be CORRECT?",
    "options": [
      "a) Adaptability is important in unsupervised learning as it allows the ML model to learn from data without labels",
      "b) Flexibility is important in supervised learning as it allows the ML model to recognize meaning even when data is poorly labelled",
      "c) Adaptability is important in reinforcement learning systems as such systems must adapt themselves to optimize their reward function",
      "d) Flexibility is important in self-learning systems as it allows them to adapt themselves to unexpected changes in their environment"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. In unsupervised learning the system is expected to learn from unlabeled data and adaptability as a characteristic is associated with a system being changed rather than changing itself",
      "b": "Is not correct. Supervised learning is dependent on labelled data, but flexibility is not a characteristic associated with reading poorly labelled data",
      "c": "Is not correct. Reinforcement learning systems are expected to optimize the reward function, and adaptability as a characteristic is associated with a system that can be changed rather than a system that changes itself",
      "d": "Is correct. Flexibility is a characteristic associated with the ability of systems to be used in contexts outside the original requirements"
    }
  },
  {
    "id": 47,
    "question": "Which of the following statements about the evolution of AI-based systems is CORRECT?",
    "options": [
      "a) Self-learning AI-based systems that continue to work in the same operational environment are not expected to change their behavior",
      "b) Side effects are not a concern for AI-based systems that change themselves to cope with changes in their environment",
      "c) AI-based systems must change themselves to cope with changes in system requirements during development",
      "d) Self-learning systems that physically interact with people, need to be managed to ensure system changes are not dangerous"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Self-learning systems that work in the same operational environment could still optimize themselves and so change their behavior",
      "b": "Is not correct. AI-based systems that change themselves to adapt to changes in their environment could still cause negative side-effects",
      "c": "Is not correct. Evolution as a characteristic is not about system development, but about how an AI-based system changes after deployment in its operational environment",
      "d": "Is correct. If the self-learning system physically interacts with people, then any changes it makes to itself could potentially harm people"
    }
  },
  {
    "id": 49,
    "question": "Which of the following statements about the transparency, interpretability and explainability for AI based systems is MOST likely to be CORRECT?",
    "options": [
      "a) The search engine algorithm used for training students on search engine technology was selected as it was considered to be the most explainable",
      "b) The loan system was considered transparent as for each loan application it was clear to users how it decided whether to approve agree to the loan or not",
      "c) The doctors were happy with the level of interpretability of the rule-based oncology system as they could understand how the given rules were implemented in the system",
      "d) The drone operators were happy with the transparency of the control system as they felt that the system responded correctly to their instructions"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Understanding the underlying technology is considered to be interpretability rather than explainability",
      "b": "Is not correct. Understanding how the loan system made a decision is more likely to be explainability",
      "c": "Is correct. Understanding the technology underlying the oncology system is likely to be interpretability",
      "d": "Is not correct. The system responding correctly is simply referring to functional correctness – not transparency, interpretability or explainability"
    }
  },
  {
    "id": 50,
    "question": "Which of the following BEST describes the unsupervised approach to machine learning?",
    "options": [
      "a) Data and labels are analyzed to group them into clusters",
      "b) A system automatically learns by satisfying a fitness function",
      "c) A system teaches itself to meet objectives based on rewards",
      "d) Data are analyzed to identify patterns in the data"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. With unsupervised machine learning there are no labels",
      "b": "Is not correct. A reinforcement learning system uses a fitness function to drive its learning",
      "c": "Is not correct. A self-learning reinforcement system uses rewards to teach itself",
      "d": "Is correct. With unsupervised machine learning there are no labels and patterns are determined from the data itself"
    }
  },
  {
    "id": 52,
    "question": "Which of the following statements is LEAST likely to be used as a rationale for selecting an ML algorithm?",
    "options": [
      "a) The amount of memory available for training the translation system in the mobile device",
      "b) The maximum time allowed for retraining the embedded health monitoring system",
      "c) The number of measured characteristics used as the basis for a sports prediction system",
      "d) The number of expected clusters of customer types for a retail marketing system"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. It is unlikely that the ML algorithm will be trained on the mobile device",
      "b": "Is not correct. If the health monitoring system is embedded and needs to be retrained, we would likely set a maximum duration for this as the patient must either attend a facility for it to happen or be unmonitored while it is being done remotely",
      "c": "Is not correct. The number of features expected to be used by the model will affect the choice of model",
      "d": "Is not correct. Knowing the number of classes for clustering is a useful input when choosing the ML model"
    }
  },
  {
    "id": 53,
    "question": "Which of the following statements about the test dataset is CORRECT?",
    "options": [
      "a) The test dataset comes from a source totally different from the validation dataset",
      "b) The format of the test dataset is different from the format of the validation dataset",
      "c) The test dataset can be used as the validation dataset but not as the training dataset",
      "d) The test dataset should not be exposed to the model during the training process"
    ],
    "correct_answer": "D",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. The test dataset typically comes from the same source as the validation dataset",
      "b": "Is not correct. The test dataset and the validation dataset should both have the same format",
      "c": "Is not correct. The test dataset is used for neither training nor for validation",
      "d": "Is correct. Incorporation of the test dataset in training will lead to bias in the evaluation of model"
    }
  },
  {
    "id": 54,
    "question": "Which of the following options would MOST likely be a reason for poor labeling of data?",
    "options": [
      "a) Insufficient data",
      "b) Synthetic data",
      "c) Translation errors",
      "d) Algorithm chosen for the ML model"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Having insufficient data doesn't determine the likelihood of data mislabeling",
      "b": "Is not correct. Synthetic data does not lead to poor labeling",
      "c": "Is correct. Translation errors may lead to correctly labeled data in one language being mislabeled in a second language",
      "d": "Is not correct. Data labeling is not related to the choice of ML algorithm"
    }
  },
  {
    "id": 55,
    "question": "An ML team asserts that the ML functional performance metrics based on validation data collected as part of training an ML model are sufficient for determining the quality of the system.  Which of the following statements is a valid reason to show that this may be INCORRECT?",
    "options": [
      "a) The ML functional performance metrics may not work well if the ground truth is not correct",
      "b) The ML functional performance metrics cannot be used for measuring quality as these are tool dependent",
      "c) Validation data is biased resulting in skewed functional performance measurements",
      "d) Data may need to be transformed prior to training the model, so the functional performance measurements do not reflect the quality of the model"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. The model quality depends on labeling quality. Wrong labeling leads to wrong ground truth. For incorrectly labeled data the functional performance measurements may indicate a good quality model but it would be producing wrong outputs",
      "b": "Is not correct. Values of the ML functional performance metrics are not dependent on the tool used to measure them",
      "c": "Is not correct. Validation data may or may not have bias in it",
      "d": "Is not correct. Data transformation is often performed and it does not necessarily impact the quality of the model. Wrong transformations may result in data quality issues and subsequent model quality issues, but a general statement relating data transformation to poor model quality cannot be made"
    }
  },
  {
    "id": 56,
    "question": "Which of the following options regarding benchmark suites, BEST completes the following statement?",
    "options": [
      "a) ML benchmark suites help choose a particular model by indicating the time it takes to train",
      "b) ML benchmark suites help choose a particular model by indicating the time it takes to test",
      "c) ML benchmark suites help choose a particular model by indicating the time it takes to validate",
      "d) ML benchmark suites help choose a particular model by indicating the time it takes to deploy"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. Benchmark suites indicate the training time",
      "b": "Is not correct. Benchmark suites do not indicate the test time",
      "c": "Is not correct. Benchmark suites do not indicate the validation time",
      "d": "Is not correct. Benchmark suites do not indicate the deployment time"
    }
  },
  {
    "id": 57,
    "question": "Which of the following test levels provides the BEST choice for performing bias-related testing?",
    "options": [
      "a) Component testing",
      "b) Input data testing",
      "c) System testing",
      "d) Model testing"
    ],
    "correct_answer": "B",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct. Component testing is applied to non-model components and is conventional testing",
      "b": "Is correct. Input data testing is performed for testing bias, among other things",
      "c": "Is not correct. While independent bias testing can be also done as part of system testing, ideally it should be done before training the model",
      "d": "Is not correct. Model testing is used to check that the model alone meets any specified requirements, e.g., ML functional performance criteria and non-functional criteria"
    }
  },
  {
    "id": 58,
    "question": "Which of the following statements about the documentation of AI components is CORRECT?",
    "options": [
      "a) Because non-functional requirements are not a part of the documentation of an AI component, non-functional testing cannot be performed",
      "b) White-box testing of the interaction of AI and non-AI components is not possible if the interfaces are a part of the documentation",
      "c) Checking for bias in the data is made possible by including the source of the data in the documentation",
      "d) Self-adapting AI systems require each change made by the system to be fully documented"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct because non-functional requirements are a part of the documentation",
      "b": "Is not correct because the documentation of interfaces supports the white-box testing of component interactions",
      "c": "Is correct because bias testing on the data on the basis of its source and meta-data is possible",
      "d": "Is not correct because self-adapting AI systems rarely provide the documentation of the changes they make to themselves"
    }
  },
  {
    "id": 59,
    "question": "An ecommerce application recommends products to the user on the basis of their purchase history and purchases being made by other people using the site, among other factors. As the tester, you have been asked to measure the current conversion rate of recommendations to compare with the original required conversion rate.  Which of the following is the MOST likely underlying reason for this request?",
    "options": [
      "a) AI effect",
      "b) Adversarial attacks",
      "c) Concept drift",
      "d) Lack of fairness"
    ],
    "correct_answer": "C",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is not correct as AI effect refers to change in the perception of what is AI over time and not the accuracy of a given solution",
      "b": "Is not correct. An adversarial attack is where an attacker subtly perturbs valid inputs that are passed to the trained model to cause it to provide incorrect predictions. In this case the data is being gathered from the purchase history and not direct inputs from the users. Hence the chances of this being an adversarial attack are low",
      "c": "Is correct. This is an example of a model providing reduced accuracy because of changes in customer behavior",
      "d": "Is not correct. Fairness is using positively biased data for training which is not true in this case as we are dealing with a live system which was performing well in the past"
    }
  },
  {
    "id": 60,
    "question": "Which of the following options is MOST likely to be relevant when testing a system's autonomy?",
    "options": [
      "a) Testing over a sustained period of time",
      "b) Testing the accuracy of system predictions",
      "c) Testing how quickly the system can adapt",
      "d) Static analysis of training data"
    ],
    "correct_answer": "A",
    "k_level": "K2",
    "points": "1",
    "justification": {
      "a": "Is correct. This is relevant to testing a system's autonomy because it may be required to test over a specific period of time to check how often the system requires intervention",
      "b": "Is not correct. A system can be autonomous and very inaccurate",
      "c": "Is not correct. A system can be autonomous and not adaptable",
      "d": "Is not correct. Static analysis of training data is unlikely to be related to testing for autonomy"
    }
  },
  {
    "id": 61,
    "question": "Which of the following statements demonstrates how non-deterministic systems can create challenges in testing?",
    "options": [
      "a) Non-deterministic systems produce a different result each time, normally preventing the generation of expected results",
      "b) Non-deterministic systems are difficult to test because they are not explainable, which hinders the investigation and fixing of defects",
      "c) A system that is given the same inputs and initial state may produce different outputs, so can require multiple test executions",
      "d) Non-deterministic systems are usually biased and require additional tests to allow this bias to be excluded from the results"
    ],
    "correct_answer": "C",
    "k_level": "K2",