@@ -3,32 +3,7 @@ package org.ivdnt.galahad.evaluation.comparison
33import org.ivdnt.galahad.annotations.Layer
44import org.ivdnt.galahad.annotations.Term
55import org.ivdnt.galahad.export.DocumentExport
6-
7- /* * An iterator that saves the current item. */
8- class TermIterator : Iterator <Term ?> {
9- var current: Term ? = null
10- private set
11- var chars: Int = 0
12- private set
13-
14- private val iter: Iterator <Term ?>
15-
16- constructor (iter: Iterator <Term ?>) {
17- this .iter = iter
18- next()
19- }
20-
21- override fun hasNext (): Boolean = iter.hasNext()
22-
23- override fun next (): Term ? {
24- // add length of now previous term
25- if (current != null ) {
26- chars + = (current as Term ).token.length
27- }
28- current = if (iter.hasNext()) iter.next() else null
29- return current
30- }
31- }
6+ import org.ivdnt.galahad.util.TermIterator
327
338/* *
349 * Match the [Layer.terms] of two layers based on their [WordForm] position (offset and length)
@@ -37,85 +12,44 @@ class TermIterator : Iterator<Term?> {
3712 * (Still, aggregating these matches is up to the (corpus/documents) evaluation classes)
3813 */
3914open class LayerComparison (
40- private val hypothesis : Layer ,
41- private val reference : Layer ,
15+ hypothesis : Layer ,
16+ reference : Layer ,
4217 private val filter : LayerFilter ? = null ,
4318) {
4419 constructor (export: DocumentExport ) : this (export.layer, export.sourceLayer)
4520
46- val matches: MutableList <TermComparison > = ArrayList ()
47- private val hypoIter: TermIterator = TermIterator (hypothesis.terms.iterator())
48- private val refIter: TermIterator = TermIterator (reference.terms.iterator())
21+ val matches: MutableList <TermComparison > = mutableListOf ()
22+ private val hypoIter = TermIterator (hypothesis.terms.iterator())
23+ private val refIter = TermIterator (reference.terms.iterator())
4924
50- /* * Iterate through the terms of both layers simultaneously and compare them. */
5125 init {
52- // While non null, compare
26+ // Iterate through the terms of both layers simultaneously while non- null and compare them.
5327 while (hypoIter.current != null && refIter.current != null ) {
5428 if (hypoIter.chars == refIter.chars) {
5529 match(TermComparison (hypoIter.current!! , refIter.current!! ))
5630 hypoIter.next()
5731 refIter.next()
5832 } else {
33+ // Mismatch: advance the iterator that is behind.
5934 if (refIter.chars < hypoIter.chars) {
60- noMatch( refIter.current!! )
35+ match( TermComparison ( Term . EMPTY , refIter.current!! ) )
6136 refIter.next()
6237 } else {
38+ match(TermComparison (hypoIter.current!! , Term .EMPTY ))
6339 hypoIter.next()
6440 }
6541 }
6642 }
67- // Only one or both are null. So refIter.current can be non-null.
68- refIter.current?.let { noMatch(it) }
69- // And add any remaining terms
70- refIter.forEachRemaining { t -> noMatch(t!! ) }
43+ // Either one, or both are null. Add any remaining terms.
44+ refIter.current?.let { match(TermComparison (Term .EMPTY , it)) }
45+ hypoIter.current?.let { match(TermComparison (it, Term .EMPTY )) }
46+ refIter.forEachRemaining { match(TermComparison (Term .EMPTY , it!! )) }
47+ hypoIter.forEachRemaining { match(TermComparison (it!! , Term .EMPTY )) }
7148 }
7249
7350 private fun match (comp : TermComparison ) {
7451 if (filter?.filter(comp) != false ) {
7552 matches.add(comp)
7653 }
7754 }
78-
79- protected open fun noMatch (t : Term ) {
80- if (filter?.refTermFilter?.filter(t) != false ) {
81- matches.add(TermComparison (Term .EMPTY , t))
82- }
83- }
84-
85- companion object {
86- // It will mostly be a single punctuation mark, but this is more generic.
87- val PUNCT : Regex = Regex (""" \W$""" )
88-
89- private fun truncatedPCMatch (comp : TermComparison ): Boolean {
90- return truncatedPCMatch(comp.hyp, comp.ref)
91- }
92-
93- private fun truncatedPCMatch (t1 : Term , t2 : Term ): Boolean {
94- val a: String = t1.token
95- val b: String = t2.token
96- return truncatePC(a) == b || truncatePC(b) == a
97- }
98-
99- /* * Truncate any punctuation at the end of the string. */
100- fun truncatePC (str : String ): String {
101- return PUNCT .replace(str, " " )
102- }
103- }
10455}
105-
106- class DocumentLayerComparison (
107- private val hypothesisLayer : Layer ,
108- private val referenceLayer : Layer ,
109- private val layerFilter : LayerFilter ? = null ,
110- ) : LayerComparison(hypothesisLayer, referenceLayer, layerFilter) {
111-
112- override fun noMatch (t : Term ) {
113- if (layerFilter?.refTermFilter?.filter(t) != false ) {
114- matches.add(TermComparison (hyp = Term .EMPTY , ref = t))
115- }
116- }
117-
118- init {
119- // matches.addAll(referenceTermsWithoutMatches.map { TermComparison(hypoTerm = Term.EMPTY, refTerm = it) })
120- }
121- }
0 commit comments