Skip to content

Commit 6926321

Browse files
committed
Fixed confusion: missing match & multi-pos
1 parent b1754b0 commit 6926321

10 files changed

Lines changed: 76 additions & 243 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<template>
22
<GButton title="Inspect">
3-
<i class="fa fa-search"></i>
3+
<i class="fa fa-info"></i>
44
</GButton>
55
</template>

server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/LayerComparison.kt

Lines changed: 15 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,7 @@ package org.ivdnt.galahad.evaluation.comparison
33
import org.ivdnt.galahad.annotations.Layer
44
import org.ivdnt.galahad.annotations.Term
55
import org.ivdnt.galahad.export.DocumentExport
6-
7-
/** An iterator that saves the current item. */
8-
class TermIterator : Iterator<Term?> {
9-
var current: Term? = null
10-
private set
11-
var chars: Int = 0
12-
private set
13-
14-
private val iter: Iterator<Term?>
15-
16-
constructor(iter: Iterator<Term?>) {
17-
this.iter = iter
18-
next()
19-
}
20-
21-
override fun hasNext(): Boolean = iter.hasNext()
22-
23-
override fun next(): Term? {
24-
// add length of now previous term
25-
if (current != null) {
26-
chars += (current as Term).token.length
27-
}
28-
current = if (iter.hasNext()) iter.next() else null
29-
return current
30-
}
31-
}
6+
import org.ivdnt.galahad.util.TermIterator
327

338
/**
349
* Match the [Layer.terms] of two layers based on their [WordForm] position (offset and length)
@@ -37,85 +12,44 @@ class TermIterator : Iterator<Term?> {
3712
* (Still, aggregating these matches is up to the (corpus/documents) evaluation classes)
3813
*/
3914
open class LayerComparison(
40-
private val hypothesis: Layer,
41-
private val reference: Layer,
15+
hypothesis: Layer,
16+
reference: Layer,
4217
private val filter: LayerFilter? = null,
4318
) {
4419
constructor(export: DocumentExport) : this(export.layer, export.sourceLayer)
4520

46-
val matches: MutableList<TermComparison> = ArrayList()
47-
private val hypoIter: TermIterator = TermIterator(hypothesis.terms.iterator())
48-
private val refIter: TermIterator = TermIterator(reference.terms.iterator())
21+
val matches: MutableList<TermComparison> = mutableListOf()
22+
private val hypoIter = TermIterator(hypothesis.terms.iterator())
23+
private val refIter = TermIterator(reference.terms.iterator())
4924

50-
/** Iterate through the terms of both layers simultaneously and compare them. */
5125
init {
52-
// While non null, compare
26+
// Iterate through the terms of both layers simultaneously while non-null and compare them.
5327
while (hypoIter.current != null && refIter.current != null) {
5428
if (hypoIter.chars == refIter.chars) {
5529
match(TermComparison(hypoIter.current!!, refIter.current!!))
5630
hypoIter.next()
5731
refIter.next()
5832
} else {
33+
// Mismatch: advance the iterator that is behind.
5934
if (refIter.chars < hypoIter.chars) {
60-
noMatch(refIter.current!!)
35+
match(TermComparison(Term.EMPTY, refIter.current!!))
6136
refIter.next()
6237
} else {
38+
match(TermComparison(hypoIter.current!!, Term.EMPTY))
6339
hypoIter.next()
6440
}
6541
}
6642
}
67-
// Only one or both are null. So refIter.current can be non-null.
68-
refIter.current?.let { noMatch(it) }
69-
// And add any remaining terms
70-
refIter.forEachRemaining { t -> noMatch(t!!) }
43+
// Either one, or both are null. Add any remaining terms.
44+
refIter.current?.let { match(TermComparison(Term.EMPTY, it)) }
45+
hypoIter.current?.let { match(TermComparison(it, Term.EMPTY)) }
46+
refIter.forEachRemaining { match(TermComparison(Term.EMPTY, it!!)) }
47+
hypoIter.forEachRemaining { match(TermComparison(it!!, Term.EMPTY)) }
7148
}
7249

7350
private fun match(comp: TermComparison) {
7451
if (filter?.filter(comp) != false) {
7552
matches.add(comp)
7653
}
7754
}
78-
79-
protected open fun noMatch(t: Term) {
80-
if (filter?.refTermFilter?.filter(t) != false) {
81-
matches.add(TermComparison(Term.EMPTY, t))
82-
}
83-
}
84-
85-
companion object {
86-
// It will mostly be a single punctuation mark, but this is more generic.
87-
val PUNCT: Regex = Regex("""\W$""")
88-
89-
private fun truncatedPCMatch(comp: TermComparison): Boolean {
90-
return truncatedPCMatch(comp.hyp, comp.ref)
91-
}
92-
93-
private fun truncatedPCMatch(t1: Term, t2: Term): Boolean {
94-
val a: String = t1.token
95-
val b: String = t2.token
96-
return truncatePC(a) == b || truncatePC(b) == a
97-
}
98-
99-
/** Truncate any punctuation at the end of the string. */
100-
fun truncatePC(str: String): String {
101-
return PUNCT.replace(str, "")
102-
}
103-
}
10455
}
105-
106-
class DocumentLayerComparison(
107-
private val hypothesisLayer: Layer,
108-
private val referenceLayer: Layer,
109-
private val layerFilter: LayerFilter? = null,
110-
) : LayerComparison(hypothesisLayer, referenceLayer, layerFilter) {
111-
112-
override fun noMatch(t: Term) {
113-
if (layerFilter?.refTermFilter?.filter(t) != false) {
114-
matches.add(TermComparison(hyp = Term.EMPTY, ref = t))
115-
}
116-
}
117-
118-
init {
119-
// matches.addAll(referenceTermsWithoutMatches.map { TermComparison(hypoTerm = Term.EMPTY, refTerm = it) })
120-
}
121-
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermComparison.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ data class TermComparison(
77
val hyp: Term, // Hypothesis
88
val ref: Term, // True reference
99
) {
10-
/**
11-
* Apply a removal regex transformation to the annotation before comparing. E.g. removing _ from lemmas.
12-
*/
10+
/** Apply a removal regex transformation to the annotation before comparing. E.g. removing _ from lemmas. */
1311
fun equal(annotation: Annotation, regex: Regex): Boolean {
1412
var refAnnot: String? = ref.annotations[annotation]
1513
var hypAnnot: String? = hyp.annotations[annotation]
@@ -31,7 +29,7 @@ data class TermComparison(
3129
}
3230

3331
companion object {
34-
const val MISSING_MATCH: String = "Missing match"
32+
const val MISSING_MATCH: String = "MISSING_MATCH"
3533

3634
private fun equal(ref: String?, hyp: String?): Boolean {
3735
if (ref == null) return true

server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermFilter.kt

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@ package org.ivdnt.galahad.evaluation.comparison
22

33
import org.ivdnt.galahad.annotations.Annotation
44
import org.ivdnt.galahad.annotations.Term
5-
import org.ivdnt.galahad.evaluation.confusion.MULTIPLE_POS
6-
import org.ivdnt.galahad.evaluation.confusion.OTHER_POS
7-
import org.ivdnt.galahad.evaluation.confusion.OTHER_POS_REGEX
85

96
interface TermFilter {
107
fun filter(term: Term): Boolean
@@ -30,13 +27,11 @@ class BasicTermFilter(private val annotation: Annotation, private val value: Str
3027
class HeadGroupTermFilter(private val annotation: Annotation, private val value: String) : TermFilter {
3128
// Filter methods
3229
private val multiFilter = { t: Term -> t.isMulti(annotation) }
33-
private val otherFilter = { t: Term -> t.annotations[annotation]?.contains(Regex(OTHER_POS_REGEX)) == true }
3430
private val singleFilter = { t: Term -> t.annotationHeadOrMissing(annotation) == value }
3531

3632
// Decide which filter to use on class initialization
3733
private val filterFunc: (Term) -> Boolean = when {
38-
(value.uppercase() == MULTIPLE_POS) -> multiFilter
39-
(value.uppercase() == OTHER_POS) -> otherFilter
34+
(value.uppercase() == "MULTIPLE") -> multiFilter
4035
else -> singleFilter
4136
}
4237

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt

Lines changed: 0 additions & 119 deletions
This file was deleted.

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/ConfusionTypes.kt

Lines changed: 0 additions & 6 deletions
This file was deleted.

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/DocumentConfusion.kt

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,45 @@ package org.ivdnt.galahad.evaluation.confusion
22

33
import com.fasterxml.jackson.annotation.JsonValue
44
import org.ivdnt.galahad.annotations.Annotation
5+
import org.ivdnt.galahad.annotations.Term
56
import org.ivdnt.galahad.evaluation.EvaluationEntry
67
import org.ivdnt.galahad.evaluation.comparison.LayerComparison
8+
import org.ivdnt.galahad.evaluation.comparison.TermComparison
79

810
/** Annotation confusion table of a document for two different tagger layers. */
911
class DocumentConfusion(
10-
@JsonValue
11-
val confusion: Map<Annotation, Map<String, Map<String, EvaluationEntry>>>
12+
@JsonValue val confusion: Map<Annotation, Map<String, Map<String, EvaluationEntry>>>
1213
) {
1314
companion object {
1415
private val ANNOTATIONS = arrayOf(Annotation.POS, Annotation.UPOS, Annotation.NER, Annotation.DEPREL)
1516

16-
fun create(layerComparison: LayerComparison): DocumentConfusion = DocumentConfusion(
17-
buildMap<Annotation, MutableMap<String, MutableMap<String, EvaluationEntry>>> {
18-
layerComparison.matches.forEach { termComparison ->
19-
ANNOTATIONS.forEach { annotation ->
20-
val reference = termComparison.ref.annotationHeadOrMissing(annotation)
21-
val hypothesis = termComparison.hyp.annotationHeadOrMissing(annotation)
22-
val entry = EvaluationEntry(1, mutableListOf(termComparison))
17+
fun create(layerComparison: LayerComparison): DocumentConfusion =
18+
DocumentConfusion(buildMap<Annotation, MutableMap<String, MutableMap<String, EvaluationEntry>>> {
19+
layerComparison.matches.forEach { comparison ->
20+
ANNOTATIONS.forEach { annotation ->
21+
val reference = comparison.ref.format(annotation)
22+
val hypothesis = comparison.hyp.format(annotation)
23+
val entry = EvaluationEntry(1, mutableListOf(comparison))
2324

24-
val entryMap = mutableMapOf(hypothesis to entry)
25-
val groupedMap = mutableMapOf(reference to entryMap)
25+
val entryMap = mutableMapOf(hypothesis to entry)
26+
val groupedMap = mutableMapOf(reference to entryMap)
2627

27-
merge(annotation, groupedMap) { oldAnnotationMap, _ ->
28-
oldAnnotationMap.apply {
29-
merge(reference, entryMap) { oldEntry, _ ->
30-
oldEntry.apply {
31-
merge(hypothesis, entry) { e1, e2 -> EvaluationEntry.add(e1, e2) }
32-
}
28+
merge(annotation, groupedMap) { oldAnnotationMap, _ ->
29+
oldAnnotationMap.apply {
30+
merge(reference, entryMap) { oldEntry, _ ->
31+
oldEntry.apply {
32+
merge(hypothesis, entry) { e1, e2 -> EvaluationEntry.add(e1, e2) }
3333
}
3434
}
3535
}
3636
}
3737
}
38-
}
39-
)
38+
}
39+
})
40+
41+
/** Format annotation for confusion matrix. Handle empty terms, empty annotations, and multiple analyses. */
42+
private fun Term.format(annotation: Annotation): String = if (this == Term.EMPTY) TermComparison.MISSING_MATCH
43+
else if (isMulti(annotation)) "MULTIPLE"
44+
else annotationHeadOrMissing(annotation)
4045
}
4146
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/metrics/MetricsSettings.kt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore
44
import com.fasterxml.jackson.annotation.JsonProperty
55
import org.ivdnt.galahad.annotations.Annotation
66
import org.ivdnt.galahad.annotations.Term
7-
import org.ivdnt.galahad.evaluation.comparison.LayerComparison.Companion.truncatePC
87
import org.ivdnt.galahad.evaluation.comparison.TermComparison
98
import org.ivdnt.galahad.evaluation.frequency.TokenFrequency
109

@@ -207,7 +206,7 @@ class FrequencyMetricsSettings(
207206

208207
override fun groupBy(term: Term): String {
209208
val freq = tokenFrequency.getFrequency(term.token.lowercase())
210-
val truncatedFreq = tokenFrequency.getFrequency(truncatePC(term.token.lowercase()))
209+
val truncatedFreq = tokenFrequency.getFrequency(term.token.lowercase())
211210
return if (freq == 0) {
212211
truncatedFreq.toString()
213212
} else {

0 commit comments

Comments
 (0)