Skip to content

Commit dfb2ff5

Browse files
committed
LayerComparison without offset
1 parent 49b70a6 commit dfb2ff5

9 files changed

Lines changed: 88 additions & 196 deletions

File tree

client/src/api/evaluation.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ export function getDistribution(corpus: UUID, hypothesis: string): Promise<Distr
4444
* @param reference Tagger job name as reference layer.
4545
*/
4646
export function getConfusion(corpus: UUID, hypothesis: string, reference: string): Promise<ConfusionResponse> {
47-
return axios.get(confusionPath(corpus, hypothesis), { params: { reference } })
47+
return axios.get(confusionPath(corpus), { params: { hypothesis, reference } })
4848
}
4949

5050
/**
@@ -54,7 +54,7 @@ export function getConfusion(corpus: UUID, hypothesis: string, reference: string
5454
* @param reference Tagger job name as reference layer.
5555
*/
5656
export function getMetrics(corpus: UUID, hypothesis: string, reference: string): Promise<MetricsResponse> {
57-
return axios.get(metricsPath(corpus, hypothesis), { params: { reference } })
57+
return axios.get(metricsPath(corpus), { params: { hypothesis, reference } })
5858
}
5959

6060
/**

server/src/main/kotlin/org/ivdnt/galahad/annotations/LayerAligner.kt

Lines changed: 0 additions & 28 deletions
This file was deleted.
Lines changed: 70 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,29 @@
11
package org.ivdnt.galahad.evaluation.comparison
22

3-
import com.fasterxml.jackson.annotation.JsonIgnore
3+
import org.ivdnt.galahad.annotations.Annotation
44
import org.ivdnt.galahad.annotations.Layer
55
import org.ivdnt.galahad.annotations.Term
66
import org.ivdnt.galahad.export.DocumentExport
77

8-
fun Iterator<Term>.nextOrNull(): Term? = if (hasNext()) next() else null
8+
/** An iterator that saves the current item. */
9+
class SmartIterator<T> : Iterator<T?> {
10+
var current: T? = null
11+
private set
912

10-
// Some hardcoded punctuation
11-
val PUNCTUATION: Array<Char> = arrayOf(',', '.', '?', '!', ':', ';', ')', '(', '\'', '"')
13+
private val iter: Iterator<T>
14+
15+
constructor(iter: Iterator<T>) {
16+
this.iter = iter
17+
next()
18+
}
19+
20+
override fun hasNext(): Boolean = iter.hasNext()
21+
22+
override fun next(): T? {
23+
current = if (iter.hasNext()) iter.next() else null
24+
return current
25+
}
26+
}
1227

1328
/**
1429
* Match the [Layer.terms] of two layers based on their [WordForm] position (offset and length)
@@ -17,142 +32,88 @@ val PUNCTUATION: Array<Char> = arrayOf(',', '.', '?', '!', ':', ';', ')', '(', '
1732
* (Still, aggregating these matches is up to the (corpus/documents) evaluation classes)
1833
*/
1934
open class LayerComparison(
20-
hypothesisLayer: Layer,
21-
referenceLayer: Layer,
22-
private val layerFilter: LayerFilter? = null,
35+
private val hypothesis: Layer,
36+
private val reference: Layer,
37+
private val filter: LayerFilter? = null,
2338
) {
2439
constructor(export: DocumentExport) : this(export.layer, export.sourceLayer)
2540

26-
@JsonIgnore
2741
val matches: MutableList<TermComparison> = ArrayList()
28-
29-
@JsonIgnore
30-
val referenceTermsWithoutMatches: MutableList<Term> = ArrayList()
31-
32-
@JsonIgnore
33-
val hypothesisTermsWithoutMatches: MutableList<Term> = ArrayList()
34-
35-
@JsonIgnore
36-
private val hypoIter: Iterator<Term> = hypothesisLayer.terms.iterator()
37-
38-
@JsonIgnore
39-
private val refIter: Iterator<Term> = referenceLayer.terms.iterator()
40-
41-
@JsonIgnore
42-
private var currentHypoTerm: Term? = Term.EMPTY
43-
44-
@JsonIgnore
45-
private var currentRefTerm: Term? = Term.EMPTY
46-
47-
init {
48-
if (refIter.hasNext() && hypoIter.hasNext()) {
49-
compare()
50-
} else {
51-
hypothesisTermsWithoutMatches.addAll(hypothesisLayer.terms)
52-
referenceTermsWithoutMatches.addAll(referenceLayer.terms)
53-
}
54-
}
42+
private val hypoIter: SmartIterator<Term> = SmartIterator(hypothesis.terms.iterator())
43+
private val refIter: SmartIterator<Term> = SmartIterator(reference.terms.iterator())
5544

5645
/** Iterate through the terms of both layers simultaneously and compare them. */
57-
private fun compare() {
58-
// First terms
59-
nextHypo()
60-
nextRef()
61-
// While there are next terms
62-
while (currentHypoTerm != null && currentRefTerm != null) {
63-
val comp = TermComparison(hypoTerm = currentHypoTerm!!, refTerm = currentRefTerm!!)
64-
compareTerm(comp)
46+
init {
47+
// While non null, compare
48+
while (hypoIter.current != null && refIter.current != null) {
49+
compareTerm(TermComparison(hypoIter.current!!, refIter.current!!))
6550
}
66-
// One of the two could be non-null. These are not included in the remaining refIter.
67-
currentHypoTerm?.let(::hypoNoMatch)
68-
currentRefTerm?.let(::refNoMatch)
69-
// The remaining terms have no matches
70-
hypoIter.forEachRemaining(::hypoNoMatch)
71-
refIter.forEachRemaining(::refNoMatch)
51+
// Only one or both are null. So refIter.current can be non-null.
52+
refIter.current?.let { noMatch(it) }
53+
// And add any remaining terms
54+
refIter.forEachRemaining { t -> noMatch(t!!) }
7255
}
7356

7457
private fun compareTerm(comp: TermComparison) {
7558
// Act on the comparison
76-
if (comp.overlap) {
77-
fullMatch(comp)
59+
if (comp.equalAnnotation(Annotation.TOKEN)) {
60+
match(comp)
61+
hypoIter.next()
62+
refIter.next()
7863
} else {
79-
// Unequal first offset
80-
if (comp.hypoTerm.offset < comp.refTerm.offset) {
81-
hypoNoMatch()
82-
} else if (comp.hypoTerm.offset > comp.refTerm.offset) {
83-
refNoMatch()
84-
}
85-
// Equal first offset but no match.
86-
// Try to truncate either terms to see if the last char is punctuation.
87-
else if (symmetricTruncatedPcMatch(comp)) {
64+
if (truncatedPCMatch(comp)) {
8865
// If so, still match it.
89-
fullMatch(comp)
66+
match(comp)
67+
// and fix iterators for the next terms
68+
fixIter(comp)
9069
} else {
91-
hypoNoMatch()
92-
refNoMatch()
70+
noMatch(comp.refTerm)
9371
}
9472
}
9573
}
9674

97-
private fun fullMatch(comp: TermComparison) {
98-
if (layerFilter?.filter(comp) != false) {
99-
matches.add(comp)
75+
private fun fixIter(comp: TermComparison) {
76+
hypoIter.next()
77+
refIter.next()
78+
// Now, the shorter iterator needs to be advanced until it matches.
79+
val hypoShorter: Boolean = comp.hypoTerm.token.length < comp.refTerm.token.length
80+
val shorterIter = if (hypoShorter) hypoIter else refIter
81+
val termToMatch = if (hypoShorter) refIter.current else hypoIter.current
82+
while (termToMatch != null && shorterIter.current != null && !truncatedPCMatch(termToMatch, shorterIter.current!!)) {
83+
// Advance
84+
shorterIter.next()
10085
}
101-
nextHypo()
102-
nextRef()
10386
}
10487

105-
private fun hypoNoMatch() {
106-
hypoNoMatch(currentHypoTerm!!)
107-
nextHypo()
108-
}
109-
110-
protected open fun hypoNoMatch(t: Term) {
111-
// Note how layerFilter can be null, and both null and true != false.
112-
if (layerFilter?.hypoTermFilter?.filter(t) != false) {
113-
hypothesisTermsWithoutMatches.add(t)
88+
private fun match(comp: TermComparison) {
89+
if (filter?.filter(comp) != false) {
90+
matches.add(comp)
11491
}
11592
}
11693

117-
private fun refNoMatch() {
118-
refNoMatch(currentRefTerm!!)
119-
nextRef()
120-
}
121-
122-
protected open fun refNoMatch(t: Term) {
123-
if (layerFilter?.refTermFilter?.filter(t) != false) {
124-
referenceTermsWithoutMatches.add(t)
94+
protected open fun noMatch(t: Term) {
95+
if (filter?.refTermFilter?.filter(t) != false) {
96+
matches.add(TermComparison(Term.EMPTY, t))
12597
}
12698
}
12799

128-
private fun nextHypo() {
129-
currentHypoTerm = hypoIter.nextOrNull()
130-
}
131-
132-
private fun nextRef() {
133-
currentRefTerm = refIter.nextOrNull()
134-
}
135-
136100
companion object {
137-
fun symmetricTruncatedPcMatch(comp: TermComparison): Boolean {
138-
val aStr: String = comp.hypoTerm.token
139-
val bStr: String = comp.refTerm.token
140-
return truncatedPcMatch(aStr, bStr) || truncatedPcMatch(bStr, aStr)
101+
// It will mostly be a single punctuation mark, but this is more generic.
102+
val PUNCT: Regex = Regex("""\W$""")
103+
104+
private fun truncatedPCMatch(comp: TermComparison): Boolean {
105+
return truncatedPCMatch(comp.hypoTerm, comp.refTerm)
141106
}
142107

143-
private fun truncatedPcMatch(aStr: String, bStr: String): Boolean {
144-
if (aStr.isEmpty() || bStr.isEmpty()) {
145-
return false
146-
}
147-
return truncatePC(aStr) == bStr
108+
private fun truncatedPCMatch(t1: Term, t2: Term): Boolean {
109+
val a: String = t1.token
110+
val b: String = t2.token
111+
return truncatePC(a) == b || truncatePC(b) == a
148112
}
149113

114+
/** Truncate any punctuation at the end of the string. */
150115
fun truncatePC(str: String): String {
151-
return if (str.isNotEmpty() && str.last() in PUNCTUATION) {
152-
str.slice(0 until str.lastIndex)
153-
} else {
154-
str
155-
}
116+
return PUNCT.replace(str, "")
156117
}
157118
}
158119
}
@@ -163,17 +124,13 @@ class DocumentLayerComparison(
163124
private val layerFilter: LayerFilter? = null,
164125
) : LayerComparison(hypothesisLayer, referenceLayer, layerFilter) {
165126

166-
override fun hypoNoMatch(t: Term) {
167-
// ignore
168-
}
169-
170-
override fun refNoMatch(t: Term) {
127+
override fun noMatch(t: Term) {
171128
if (layerFilter?.refTermFilter?.filter(t) != false) {
172129
matches.add(TermComparison(hypoTerm = Term.EMPTY, refTerm = t))
173130
}
174131
}
175132

176133
init {
177-
matches.addAll(referenceTermsWithoutMatches.map { TermComparison(hypoTerm = Term.EMPTY, refTerm = it) })
134+
// matches.addAll(referenceTermsWithoutMatches.map { TermComparison(hypoTerm = Term.EMPTY, refTerm = it) })
178135
}
179136
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/comparison/TermComparison.kt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
package org.ivdnt.galahad.evaluation.comparison
22

3-
import com.fasterxml.jackson.annotation.JsonIgnore
43
import org.ivdnt.galahad.annotations.Annotation
54
import org.ivdnt.galahad.annotations.Term
65

76
data class TermComparison(
87
val hypoTerm: Term, // Hypothesis
98
val refTerm: Term, // True reference
109
) {
11-
/** Full overlap dependent on the word forms. Overlap of position, not lemma/pos. */
12-
@JsonIgnore
13-
val overlap: Boolean = hypoTerm.offset == refTerm.offset && hypoTerm.token == refTerm.token
14-
1510
/**
1611
* Apply a removal regex transformation to the annotation before comparing. E.g. removing _ from lemmas.
1712
*/

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/Confusion.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.ivdnt.galahad.evaluation.confusion
22

33
import com.fasterxml.jackson.annotation.JsonIgnore
44
import org.ivdnt.galahad.annotations.Annotation
5+
import org.ivdnt.galahad.annotations.Term
56
import org.ivdnt.galahad.evaluation.EvaluationEntry
67
import org.ivdnt.galahad.evaluation.comparison.TermComparison
78
import org.ivdnt.galahad.export.csv.CSVFile
@@ -76,8 +77,8 @@ open class Confusion(private val truncate: Boolean = true, val annotation: Annot
7677

7778
fun add(termComp: TermComparison) {
7879
add(
79-
termComp.hypoTerm.annotationHeadOrMissing(annotation),
80-
termComp.refTerm.annotationHeadOrMissing(annotation),
80+
termComp.hypoTerm.takeUnless { it == Term.EMPTY }?.annotationHeadOrMissing(annotation) ?: TermComparison.MISSING_MATCH,
81+
termComp.refTerm.takeUnless { it == Term.EMPTY }?.annotationHeadOrMissing(annotation) ?: TermComparison.MISSING_MATCH,
8182
termComp
8283
)
8384
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/DocumentConfusion.kt

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,13 @@ import org.ivdnt.galahad.evaluation.comparison.TermComparison
1313
class DocumentConfusion(
1414
hypothesis: Layer,
1515
reference: Layer,
16-
layerFilter: LayerFilter? = null,
16+
filter: LayerFilter? = null,
1717
annotation: Annotation = Annotation.POS,
18-
) : Confusion(truncate = layerFilter == null, annotation) {
18+
) : Confusion(truncate = filter == null, annotation) {
1919

2020
init {
21-
val layerComparison = LayerComparison(
22-
hypothesisLayer = hypothesis,
23-
referenceLayer = reference,
24-
layerFilter = layerFilter
25-
)
21+
val layerComparison = LayerComparison(hypothesis, reference, filter)
2622

2723
layerComparison.matches.forEach(::add)
28-
29-
layerComparison.hypothesisTermsWithoutMatches.forEach {
30-
add(
31-
hypoPos = it.annotationHeadOrMissing(annotation),
32-
refPos = TermComparison.MISSING_MATCH,
33-
sample = TermComparison(hypoTerm = it, refTerm = Term.EMPTY)
34-
)
35-
}
36-
37-
layerComparison.referenceTermsWithoutMatches.forEach {
38-
add(
39-
hypoPos = TermComparison.MISSING_MATCH,
40-
refPos = it.annotationHeadOrMissing(annotation),
41-
sample = TermComparison(hypoTerm = Term.EMPTY, refTerm = it)
42-
)
43-
}
4424
}
4525
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/metrics/DocumentMetrics.kt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,6 @@ class DocumentMetrics(
2424

2525
init {
2626
val layerComparison = LayerComparison(hypothesisJob.getLayer(doc), referenceJob.getLayer(doc), layerFilter)
27-
2827
layerComparison.matches.forEach(this::add)
29-
30-
layerComparison.referenceTermsWithoutMatches.forEach {
31-
add(
32-
TermComparison(hypoTerm = Term.EMPTY, refTerm = it)
33-
)
34-
}
3528
}
3629
}

server/src/main/kotlin/org/ivdnt/galahad/export/LayerMerger.kt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,7 @@ abstract class LayerMerger protected constructor(protected val export: DocumentE
1515
private val layerComparison: LayerComparison = LayerComparison(export)
1616

1717
// TODO move to LayerComparison
18-
protected val sourceTermComparisons: List<TermComparison> =
19-
(layerComparison.matches + layerComparison.referenceTermsWithoutMatches.map {
20-
TermComparison(
21-
Term.EMPTY,
22-
it
23-
)
24-
}).sortedBy { it.refTerm.offset }
18+
protected val sourceTermComparisons: List<TermComparison> = layerComparison.matches.sortedBy { it.refTerm.offset }
2519

2620
abstract fun merge(out: OutputStream)
2721

0 commit comments

Comments
 (0)