@@ -32,6 +32,10 @@ class Term(
3232 @get:JsonIgnore
3333 val ner: String? = annotations[Annotation .NER ]
3434
35+ /* *
36+ * Returns a term with the same data, except its offset is aligned to that of [refTerm].
37+ */
38+ fun alignedTo (refTerm : Term ): Term = Term (id, refTerm.offset, annotations, spaceAfter)
3539
3640 fun isMulti (annotation : Annotation ): Boolean = annotations[annotation]?.contains(" +" ) == true
3741
@@ -56,15 +60,15 @@ class Term(
5660 val annotation = annotations[annotationType] ? : return null
5761 // for NER
5862 if (annotationType == Annotation .NER ) {
59- if (annotation.contains( ' -' ) ) {
63+ if (' -' in annotation ) {
6064 return annotation.split(' -' )[1 ]
6165 }
6266 }
6367 // for POS & UPOS
64- else if (listOf ( Annotation . POS , Annotation . UPOS ).contains( annotationType) ) {
68+ else if (annotationType in posAnnotations ) {
6569 return if (isMulti(annotationType)) {
6670 // Split on + and transform each part
67- annotation.split(" +" ).map { singlePosToHead(it) }.joinToString( " + " )
71+ annotation.split(" +" ).joinToString( " + " ) { singlePosToHead(it) }
6872 } else {
6973 singlePosToHead(annotation)
7074 }
@@ -75,10 +79,11 @@ class Term(
7579
7680 companion object {
7781 val EMPTY : Term = Term (" " , 0 , mapOf (Annotation .TOKEN to " " ))
82+ private val posAnnotations: Array <Annotation > = arrayOf(Annotation .POS , Annotation .UPOS )
83+ private val posHeadSeparators: Array <Char > = arrayOf(' (' , ' |' )
7884
79- fun missingName (annotation : Annotation ): String =
80- // simply uppercase and prepend "NO_"
81- " NO_${annotation.value.uppercase()} "
85+ // simply uppercase and prepend "NO_"
86+ fun missingName (annotation : Annotation ): String = " NO_${annotation.value.uppercase()} "
8287
8388 /* * The features of [pos]. E.g. "num=sg" for "NOU(num=sg)". Does not support multi-pos. */
8489 fun features (pos : String? ): String? {
@@ -91,9 +96,8 @@ class Term(
9196 }
9297
9398 fun singlePosToHead (pos : String ): String {
94- val separators = listOf (' (' , ' |' )
95- for (separator in separators) {
96- if (pos.contains(separator)) {
99+ for (separator in posHeadSeparators) {
100+ if (separator in pos) {
97101 val head = pos.split(separator)[0 ]
98102 // presumably head won't be empty, but this way we could
99103 // parse something like (VRB) if anyone would ever use that
0 commit comments