11package org.ivdnt.galahad.evaluation.comparison
22
3- import com.fasterxml.jackson.annotation.JsonIgnore
3+ import org.ivdnt.galahad.annotations.Annotation
44import org.ivdnt.galahad.annotations.Layer
55import org.ivdnt.galahad.annotations.Term
66import org.ivdnt.galahad.export.DocumentExport
77
8- fun Iterator<Term>.nextOrNull (): Term ? = if (hasNext()) next() else null
8+ /* * An iterator that saves the current item. */
9+ class SmartIterator <T > : Iterator <T ?> {
10+ var current: T ? = null
11+ private set
912
10- // Some hardcoded punctuation
11- val PUNCTUATION : Array <Char > = arrayOf(' ,' , ' .' , ' ?' , ' !' , ' :' , ' ;' , ' )' , ' (' , ' \' ' , ' "' )
13+ private val iter: Iterator <T >
14+
15+ constructor (iter: Iterator <T >) {
16+ this .iter = iter
17+ next()
18+ }
19+
20+ override fun hasNext (): Boolean = iter.hasNext()
21+
22+ override fun next (): T ? {
23+ current = if (iter.hasNext()) iter.next() else null
24+ return current
25+ }
26+ }
1227
1328/* *
1429 * Match the [Layer.terms] of two layers based on their [WordForm] position (offset and length)
@@ -17,142 +32,88 @@ val PUNCTUATION: Array<Char> = arrayOf(',', '.', '?', '!', ':', ';', ')', '(', '
1732 * (Still, aggregating these matches is up to the (corpus/documents) evaluation classes)
1833 */
1934open class LayerComparison (
20- hypothesisLayer : Layer ,
21- referenceLayer : Layer ,
22- private val layerFilter : LayerFilter ? = null ,
35+ private val hypothesis : Layer ,
36+ private val reference : Layer ,
37+ private val filter : LayerFilter ? = null ,
2338) {
2439 constructor (export: DocumentExport ) : this (export.layer, export.sourceLayer)
2540
26- @JsonIgnore
2741 val matches: MutableList <TermComparison > = ArrayList ()
28-
29- @JsonIgnore
30- val referenceTermsWithoutMatches: MutableList <Term > = ArrayList ()
31-
32- @JsonIgnore
33- val hypothesisTermsWithoutMatches: MutableList <Term > = ArrayList ()
34-
35- @JsonIgnore
36- private val hypoIter: Iterator <Term > = hypothesisLayer.terms.iterator()
37-
38- @JsonIgnore
39- private val refIter: Iterator <Term > = referenceLayer.terms.iterator()
40-
41- @JsonIgnore
42- private var currentHypoTerm: Term ? = Term .EMPTY
43-
44- @JsonIgnore
45- private var currentRefTerm: Term ? = Term .EMPTY
46-
47- init {
48- if (refIter.hasNext() && hypoIter.hasNext()) {
49- compare()
50- } else {
51- hypothesisTermsWithoutMatches.addAll(hypothesisLayer.terms)
52- referenceTermsWithoutMatches.addAll(referenceLayer.terms)
53- }
54- }
42+ private val hypoIter: SmartIterator <Term > = SmartIterator (hypothesis.terms.iterator())
43+ private val refIter: SmartIterator <Term > = SmartIterator (reference.terms.iterator())
5544
5645 /* * Iterate through the terms of both layers simultaneously and compare them. */
57- private fun compare () {
58- // First terms
59- nextHypo()
60- nextRef()
61- // While there are next terms
62- while (currentHypoTerm != null && currentRefTerm != null ) {
63- val comp = TermComparison (hypoTerm = currentHypoTerm!! , refTerm = currentRefTerm!! )
64- compareTerm(comp)
46+ init {
47+ // While non null, compare
48+ while (hypoIter.current != null && refIter.current != null ) {
49+ compareTerm(TermComparison (hypoIter.current!! , refIter.current!! ))
6550 }
66- // One of the two could be non-null. These are not included in the remaining refIter.
67- currentHypoTerm?.let (::hypoNoMatch)
68- currentRefTerm?.let (::refNoMatch)
69- // The remaining terms have no matches
70- hypoIter.forEachRemaining(::hypoNoMatch)
71- refIter.forEachRemaining(::refNoMatch)
51+ // Only one or both are null. So refIter.current can be non-null.
52+ refIter.current?.let { noMatch(it) }
53+ // And add any remaining terms
54+ refIter.forEachRemaining { t -> noMatch(t!! ) }
7255 }
7356
7457 private fun compareTerm (comp : TermComparison ) {
7558 // Act on the comparison
76- if (comp.overlap) {
77- fullMatch(comp)
59+ if (comp.equalAnnotation(Annotation .TOKEN )) {
60+ match(comp)
61+ hypoIter.next()
62+ refIter.next()
7863 } else {
79- // Unequal first offset
80- if (comp.hypoTerm.offset < comp.refTerm.offset) {
81- hypoNoMatch()
82- } else if (comp.hypoTerm.offset > comp.refTerm.offset) {
83- refNoMatch()
84- }
85- // Equal first offset but no match.
86- // Try to truncate either terms to see if the last char is punctuation.
87- else if (symmetricTruncatedPcMatch(comp)) {
64+ if (truncatedPCMatch(comp)) {
8865 // If so, still match it.
89- fullMatch(comp)
66+ match(comp)
67+ // and fix iterators for the next terms
68+ fixIter(comp)
9069 } else {
91- hypoNoMatch()
92- refNoMatch()
70+ noMatch(comp.refTerm)
9371 }
9472 }
9573 }
9674
97- private fun fullMatch (comp : TermComparison ) {
98- if (layerFilter?.filter(comp) != false ) {
99- matches.add(comp)
75+ private fun fixIter (comp : TermComparison ) {
76+ hypoIter.next()
77+ refIter.next()
78+ // Now, the shorter iterator needs to be advanced until it matches.
79+ val hypoShorter: Boolean = comp.hypoTerm.token.length < comp.refTerm.token.length
80+ val shorterIter = if (hypoShorter) hypoIter else refIter
81+ val termToMatch = if (hypoShorter) refIter.current else hypoIter.current
82+ while (termToMatch != null && shorterIter.current != null && ! truncatedPCMatch(termToMatch, shorterIter.current!! )) {
83+ // Advance
84+ shorterIter.next()
10085 }
101- nextHypo()
102- nextRef()
10386 }
10487
105- private fun hypoNoMatch () {
106- hypoNoMatch(currentHypoTerm!! )
107- nextHypo()
108- }
109-
110- protected open fun hypoNoMatch (t : Term ) {
111- // Note how layerFilter can be null, and both null and true != false.
112- if (layerFilter?.hypoTermFilter?.filter(t) != false ) {
113- hypothesisTermsWithoutMatches.add(t)
88+ private fun match (comp : TermComparison ) {
89+ if (filter?.filter(comp) != false ) {
90+ matches.add(comp)
11491 }
11592 }
11693
117- private fun refNoMatch () {
118- refNoMatch(currentRefTerm!! )
119- nextRef()
120- }
121-
122- protected open fun refNoMatch (t : Term ) {
123- if (layerFilter?.refTermFilter?.filter(t) != false ) {
124- referenceTermsWithoutMatches.add(t)
94+ protected open fun noMatch (t : Term ) {
95+ if (filter?.refTermFilter?.filter(t) != false ) {
96+ matches.add(TermComparison (Term .EMPTY , t))
12597 }
12698 }
12799
128- private fun nextHypo () {
129- currentHypoTerm = hypoIter.nextOrNull()
130- }
131-
132- private fun nextRef () {
133- currentRefTerm = refIter.nextOrNull()
134- }
135-
136100 companion object {
137- fun symmetricTruncatedPcMatch (comp : TermComparison ): Boolean {
138- val aStr: String = comp.hypoTerm.token
139- val bStr: String = comp.refTerm.token
140- return truncatedPcMatch(aStr, bStr) || truncatedPcMatch(bStr, aStr)
101+ // It will mostly be a single punctuation mark, but this is more generic.
102+ val PUNCT : Regex = Regex (""" \W$""" )
103+
104+ private fun truncatedPCMatch (comp : TermComparison ): Boolean {
105+ return truncatedPCMatch(comp.hypoTerm, comp.refTerm)
141106 }
142107
143- private fun truncatedPcMatch (aStr : String , bStr : String ): Boolean {
144- if (aStr.isEmpty() || bStr.isEmpty()) {
145- return false
146- }
147- return truncatePC(aStr) == bStr
108+ private fun truncatedPCMatch (t1 : Term , t2 : Term ): Boolean {
109+ val a: String = t1.token
110+ val b: String = t2.token
111+ return truncatePC(a) == b || truncatePC(b) == a
148112 }
149113
114+ /* * Truncate any punctuation at the end of the string. */
150115 fun truncatePC (str : String ): String {
151- return if (str.isNotEmpty() && str.last() in PUNCTUATION ) {
152- str.slice(0 until str.lastIndex)
153- } else {
154- str
155- }
116+ return PUNCT .replace(str, " " )
156117 }
157118 }
158119}
@@ -163,17 +124,13 @@ class DocumentLayerComparison(
163124 private val layerFilter : LayerFilter ? = null ,
164125) : LayerComparison(hypothesisLayer, referenceLayer, layerFilter) {
165126
166- override fun hypoNoMatch (t : Term ) {
167- // ignore
168- }
169-
170- override fun refNoMatch (t : Term ) {
127+ override fun noMatch (t : Term ) {
171128 if (layerFilter?.refTermFilter?.filter(t) != false ) {
172129 matches.add(TermComparison (hypoTerm = Term .EMPTY , refTerm = t))
173130 }
174131 }
175132
176133 init {
177- matches.addAll(referenceTermsWithoutMatches.map { TermComparison (hypoTerm = Term .EMPTY , refTerm = it) })
134+ // matches.addAll(referenceTermsWithoutMatches.map { TermComparison(hypoTerm = Term.EMPTY, refTerm = it) })
178135 }
179136}
0 commit comments