ITS-MOS-Agreement/mos_agreement/model.py at main · NTIA/ITS-MOS-Agreement · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import os
import yaml

import numpy as np
import pandas as pd
import scipy.integrate as integrate
import scipy.stats as stats

from scipy.special import binom


def mos_data_bounds(mos_var, average_vote_var, n_v, s_L=1, s_H=5, n_s=5):
    """
    mos_data_bounds

    Estimate performance bounds for RMSE and Correlation based on MOS data statistics.

    Data-driven bounds for datasets that include MOS and an associated variance value
    for each MOS value.

    Parameters
    ----------
    mos_var : np.float
        Variance estimate of MOS distribution.
    average_vote_var : np.float
        Estimate of the average vote variance across the dataset.
    n_v : int, float
        Average number of votes per file in the dataset.

    Returns
    -------
    rmse: np.float
        Estimate of RMSE bound between MOS and true quality.
    corr: np.float
        Estimate of Correlation bound between MOS and true quality.

    Raises
    ------
    ValueError
        _description_
    """
    quality_var = mos_var - average_vote_var / n_v
    rmse, corr = quality_distribution_bounds(
        quality_var=quality_var, expected_vote_var=average_vote_var, n_v=n_v
    )
    return rmse, corr


def mos_data_binovotes_bounds(mos_mean, mos_var, n_v, s_L=1, s_H=5, n_s=5):
    """
    mos_data_binovotes_bounds

    Estimate performance bounds for RMSE and Correlation based on MOS data statistics
    under BinoVotes voting model.

    Data-driven bounds for datasets that include only MOS values with no associated vote
    variance information. Instead this function assumes vote variance according to a
    BinoVotes voting model.

    Parameters
    ----------
    mos_mean : np.float
        Mean estimate of MOS distribution.
    mos_var : np.float
        Variance estimate of MOS distribution.
    n_v : int, float
        Average number of votes per file in the dataset.
    s_L : int, optional
        Lower value of rating scale, by default 1.
    s_H : int, optional
        Highest value of the rating scale, by default 5.
    n_s : int, optional
        Number of values in the rating scale, by default 5.

    Returns
    -------
    rmse: np.float
        Estimate of RMSE bound between MOS and true quality.
    corr: np.float
        Estimate of Correlation bound between MOS and true quality.

    Raises
    ------
    ValueError
        _description_
    """
    binovotes_average_vote_var = mos_data_binovotes_average_vote_var(
        mos_mean=mos_mean, mos_var=mos_var, n_v=n_v, s_L=s_L, s_H=s_H, n_s=n_s
    )
    rmse, corr = mos_data_bounds(
        mos_var=mos_var, average_vote_var=binovotes_average_vote_var, n_v=n_v
    )
    return rmse, corr


def mos_data_binovotes_average_vote_var(mos_mean, mos_var, n_v, s_L=1, s_H=5, n_s=5):
    """
    mos_data_binovotes_average_vote_var

    Estimate the average vote variance under BinoVotes voting model from MOS data.

    Parameters
    ----------
    mos_mean : np.float
        Mean estimate of MOS distribution.
    mos_var : np.float
        Variance estimate of MOS distribution.
    n_v : int, float
        Average number of votes per file in the dataset.
    s_L : int, optional
        Lower value of rating scale, by default 1.
    s_H : int, optional
        Highest value of the rating scale, by default 5.
    n_s : int, optional
        Number of values in the rating scale, by default 5.

    Returns
    -------
    average_vote_var: np.float
        Estimate of average vote variance across the dataset.

    Raises
    ------
    ValueError
        _description_
    """
    n_m = n_v * (n_s - 1)
    scale_factor = n_v / (n_m - 1)
    binovotes_average_vote_var = scale_factor * (
        (mos_mean - s_L) * (s_H - mos_mean) - mos_var
    )
    return binovotes_average_vote_var


def quality_distribution_bounds(
    quality_var,
    expected_vote_var,
    n_v,
):
    """
    quality_distribution_bounds

    Estimate performance bounds for RMSE and Correlation based on quality distribution
    statistics and an expected vote variance.

    Note that this function requires knowledge of the true quality distribution,
    which is not available in real subjective experiments. This function is primarily
    useful for simulations where the true quality distribution is known, or when
    estimates of the quality distribution can be made through the MOS distribution.

    Parameters
    ----------
    quality_var : np.float
        Variance value of quality distribution.
    expecte_vote_var : np.float
        Expected value of voting variance under a voting model across the entire voting
        scale.
    n_v : int, float
        Average number of votes per file in the dataset.
    s_L : int, optional
        Lower value of rating scale, by default 1.
    s_H : int, optional
        Highest value of the rating scale, by default 5.
    n_s : int, optional
        Number of values in the rating scale, by default 5.

    Returns
    -------
    rmse: np.float
        RMSE bound between MOS and true quality.
    corr: np.float
        Correlation bound between MOS and true quality.

    Raises
    ------
    ValueError
        _description_
    """
    rmse = np.sqrt(expected_vote_var / n_v)
    corr = np.sqrt(quality_var / (quality_var + expected_vote_var / n_v))
    return rmse, corr


def quality_distribution_binovotes_bounds(
    quality_mean, quality_var, n_v, s_L=1, s_H=5, n_s=5
):
    """
    quality_distribution_binovotes_bounds

    Estimate performance bounds for RMSE and Correlation based on quality distribution
    statistics under BinoVotes voting model.

    Note that this function requires knowledge of the true quality distribution,
    which is not available in real subjective experiments. This function is primarily
    useful for simulations where the true quality distribution is known, or when
    estimates of the quality distribution can be made through the MOS distribution.

    Parameters
    ----------
    quality_mean : np.float
        Mean value of quality distribution.
    quality_var : np.float
        Variance value of quality distribution.
    n_v : int, float
        Average number of votes per file in the dataset.
    s_L : int, optional
        Lower value of rating scale, by default 1.
    s_H : int, optional
        Highest value of rating scale, by default 5.
    n_s : int, optional
        Number of values in the rating scale, by default 5.

    Returns
    -------
    rmse: np.float
        RMSE bound between MOS and true quality.
    corr: np.float
        Correlation bound between MOS and true quality.

    Raises
    ------
    ValueError
        _description_
    """
    numerator = (quality_mean - s_L) * (s_H - quality_mean) - quality_var
    denominator = n_v * (n_s - 1)
    mse = numerator / denominator
    rmse = np.sqrt(mse)
    corr = np.sqrt(quality_var / (quality_var + mse))
    return rmse, corr


# ---------------------
# BinoVotes Simulations
# ---------------------
def binovotes(quality, n_v, step=1, s_L=1, s_H=5, seed=None):
    """
    binovotes

    Generate votes according to BinoVotes model.

    Parameters
    ----------
    quality : float
        Quality votes converge to.
    n_v : int
        Number of votes per file.
    step : int, optional
        Step size of rating scale, by default 1.
    s_L : int, optional
        Lower value of rating scale, by default 1.
    s_H : int, optional
        Highest value of the rating scale, by default 5.
    seed : _type_, optional
        Seed for random number generation, by default None.

    Returns
    -------
    _type_
        _description_
    """
    if seed is not None:
        np.random.seed(seed)
    rng = np.random.default_rng()

    # Define the binomial n value based off of the given scale
    scale = np.arange(s_L, (s_H + step), step)
    n_bino = len(scale) - 1

    # Convert from quality scale to probability of successful trial scale
    p_bino = (quality - s_L) / (s_H - s_L)
    # BinoVotes
    votes = s_L + step * rng.binomial(n_bino, p_bino, (n_v, quality.size))
    return votes


def binomos(mos=True, *args, **kwargs):
    """
    binomos

    Convenient wrapper to generate MOS scores from BinoVotes.

    Parameters
    ----------
    mos : bool, optional
        Flag to return MOS scores rather than individual votes via averaging,
        by default True.

    Returns
    -------
    np.array
        Generated MOS scores or individual votes.
    """
    votes = binovotes(*args, **kwargs)
    if mos:
        votes = np.mean(votes, 0)
    return votes