Skip to content

Commit 0afee27

Browse files
committed
Fix some Layer, Term & Format tests
1 parent 6c74f8c commit 0afee27

11 files changed

Lines changed: 64 additions & 63 deletions

File tree

server/src/main/kotlin/org/ivdnt/galahad/documents/DocumentFormat.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import javax.xml.stream.XMLStreamReader
1111
/** Format of a document. No 1-to-1 correspondence with extensions due to xml format overlap. */
1212
enum class DocumentFormat(val identifier: String, val extension: String) {
1313
TeiP4Legacy("tei-p4-legacy", "tei.xml"),
14-
TeiP5Legacy("tei-p5-legacy", "tei.xml"),
1514
TeiP5("tei-p5", "tei.xml"),
1615
Naf("naf", "naf.xml"),
1716
Tsv("tsv", "tsv"),

server/src/main/kotlin/org/ivdnt/galahad/export/CorpusExport.kt

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,7 @@ class CorpusExport private constructor(
2424
val shouldMerge: Boolean,
2525
val posHeadOnly: Boolean,
2626
) : Logging {
27-
private fun mergeFormatMatches(it: Document, format: DocumentFormat): Boolean {
28-
var otherFormat = it.metadata.format
29-
// Overwrite the format for legacy formats that can in fact be merged.
30-
if (otherFormat == DocumentFormat.TeiP5Legacy) {
31-
otherFormat = DocumentFormat.TeiP5
32-
}
33-
return otherFormat == format
34-
}
27+
private fun mergeFormatMatches(it: Document, format: DocumentFormat): Boolean = it.metadata.format == format
3528

3629
private fun formatMapper(doc: Document, out: OutputStream) {
3730
try {

server/src/main/kotlin/org/ivdnt/galahad/export/LayerMerger.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ abstract class LayerMerger protected constructor(protected val export: DocumentE
2020
DocumentFormat.Tsv -> TsvMerger(export)
2121
DocumentFormat.Folia -> FoliaMerger(export)
2222
DocumentFormat.Conllu -> ConlluMerger(export)
23-
DocumentFormat.TeiP5Legacy, DocumentFormat.TeiP5 -> TeiMerger(export)
23+
DocumentFormat.TeiP5 -> TeiMerger(export)
2424
else -> throw InvalidDocumentFormatException("Unsupported export conversion format: ${export.format}")
2525
}
2626
}

server/src/main/kotlin/org/ivdnt/galahad/formats/InternalFile.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ abstract class InternalFile protected constructor() {
3232
DocumentFormat.Pdf -> PdfFile(file)
3333
// Multiple TEI formats
3434
DocumentFormat.TeiP4Legacy,
35-
DocumentFormat.TeiP5Legacy,
3635
DocumentFormat.TeiP5,
3736
-> TeiFile(file, format)
3837
}

server/src/test/kotlin/org/ivdnt/galahad/documents/DocumentMetadataTest.kt

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,25 @@ class DocumentMetadataTest {
1818

1919
@Test
2020
fun `Properties for an unannotated file`() {
21-
val path = "formats/shared/converter/input.txt"
22-
val file = TestUtil.get(path)
23-
val plaintext = file.readText()
21+
val path = "formats/shared/converter/karel_en_martijn.txt"
2422
val doc = TestUtil.getDoc(path)
2523
val meta = doc.metadata
26-
assertEquals("input.txt", meta.name)
24+
assertEquals("karel_en_martijn.txt", meta.name)
2725
assertEquals(DocumentFormat.Txt, meta.format)
28-
assertEquals(plaintext, meta.text) // This works because the preview is < MAX_PREVIEW_LENGTH
26+
assert(meta.text.contains("Fraaie historie ende alwaer"))
2927
val total = meta.annotations.annotations[Annotation.TOKEN]
30-
assertEquals(0, total)
28+
assertEquals(39, total)
3129
}
3230

3331
@Test
3432
fun `Properties for an annotated file`() {
35-
val path = "all-formats/input/input.tei.xml"
33+
val path = "formats/shared/converter/karel_en_martijn.tei.xml"
3634
val doc = TestUtil.getDoc(path)
37-
val plaintext = TestUtil.getLayer(doc).toString()
3835
val meta = doc.metadata
39-
assertEquals("input.tei.xml", meta.name)
36+
assertEquals("karel_en_martijn.tei.xml", meta.name)
4037
assertEquals(DocumentFormat.TeiP5, meta.format)
41-
assertEquals(plaintext, meta.text) // This works because the preview is < MAX_PREVIEW_LENGTH
38+
assert(meta.text.contains("Fraaie historie ende alwaer"))
4239
val total = meta.annotations.annotations[Annotation.TOKEN]
43-
assertEquals(21, total)
40+
assertEquals(52, total)
4441
}
4542
}

server/src/test/kotlin/org/ivdnt/galahad/documents/DocumentsTest.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ class DocumentsTest {
1919
@Test
2020
fun `Create and delete files`() {
2121
// Add two files
22-
addFile("all-formats/input/input.folia.xml")
23-
addFile("all-formats/input/input.conllu")
22+
addFile("formats/shared/converter/karel_en_martijn.folia.xml")
23+
addFile("formats/shared/converter/karel_en_martijn.conllu")
2424
// 2 files exist
25-
assertDocsinDocuments(setOf("input.folia.xml", "input.conllu"))
25+
assertDocsinDocuments(setOf("karel_en_martijn.folia.xml", "karel_en_martijn.conllu"))
2626
// Delete the first
27-
deleteFile("input.folia.xml")
27+
deleteFile("karel_en_martijn.folia.xml")
2828
// 1 is left
29-
assertDocsinDocuments(setOf("input.conllu"))
29+
assertDocsinDocuments(setOf("karel_en_martijn.conllu"))
3030
// Try to access deleted file
31-
assertFileDeleted("input.folia.xml")
31+
assertFileDeleted("karel_en_martijn.folia.xml")
3232
// Delete the last file
33-
deleteFile("input.conllu")
33+
deleteFile("karel_en_martijn.conllu")
3434
// 0 left
3535
assertDocsinDocuments(setOf())
3636
// Try to access deleted file
37-
assertFileDeleted("input.conllu")
37+
assertFileDeleted("karel_en_martijn.conllu")
3838
}
3939

4040
private fun addFile(path: String) {

server/src/test/kotlin/org/ivdnt/galahad/documents/FormatInducerTest.kt

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ import org.junit.jupiter.api.Test
88
class FormatInducerTest {
99
@Test
1010
fun `Parse simple formats based on extension`() {
11-
assertEquals(DocumentFormat.Tsv, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.tsv")))
12-
assertEquals(DocumentFormat.Conllu, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.conllu")))
13-
assertEquals(DocumentFormat.Txt, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.txt")))
11+
assertEquals(DocumentFormat.Tsv, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.tsv")))
12+
assertEquals(DocumentFormat.Conllu, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.conllu")))
13+
assertEquals(DocumentFormat.Txt, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.txt")))
1414
}
1515

1616
@Test
@@ -20,26 +20,20 @@ class FormatInducerTest {
2020

2121
@Test
2222
fun `Parse non-legacy XML formats`() {
23-
assertEquals(DocumentFormat.Folia, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.folia.xml")))
24-
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.tei.xml")))
25-
assertEquals(DocumentFormat.Naf, DocumentFormat.fromFile(TestUtil.get("all-formats/input/input.naf.xml")))
23+
assertEquals(DocumentFormat.Folia, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.folia.xml")))
24+
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.tei.xml")))
25+
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("formats/tei/reader/teicorpus/input.tei.xml")))
26+
assertEquals(DocumentFormat.Naf, DocumentFormat.fromFile(TestUtil.get("formats/shared/converter/karel_en_martijn.naf.xml")))
2627
}
2728

2829
@Test
2930
fun `Parse legacy XML formats`() {
30-
assertEquals(DocumentFormat.TeiP4Legacy, DocumentFormat.fromFile(TestUtil.get("all-formats/tei/teip4legacy.xml")))
31-
// if 1 or more pos are present, it's TeiP5
32-
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("all-formats/tei/1-pos-0-type.tei.xml")))
33-
// even if there are types, it's still TeiP5
34-
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("all-formats/tei/1-pos-1-type.tei.xml")))
35-
// if no pos or type are present, it's unannotated and we default to TeiP5
36-
assertEquals(DocumentFormat.TeiP5, DocumentFormat.fromFile(TestUtil.get("all-formats/tei/0-pos-0-type.tei.xml")))
37-
// if no pos are present, but at least one type is present, it's TeiP5Legacy
38-
assertEquals(DocumentFormat.TeiP5Legacy, DocumentFormat.fromFile(TestUtil.get("all-formats/tei/0-pos-1-type.tei.xml")))
31+
assertEquals(DocumentFormat.TeiP4Legacy, DocumentFormat.fromFile(TestUtil.get("formats/tei/reader/teip4/input.tei.xml")))
3932

4033
}
4134

4235
@Test
4336
fun `Parse unknown XML root node`() {
37+
assertThrows(Exception::class.java) { DocumentFormat.fromFile(TestUtil.get("documents/invalid-root.xml")) }
4438
}
4539
}

server/src/test/kotlin/org/ivdnt/galahad/evaluation/EvaluationEntryTest.kt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ class EvaluationEntryTest {
1414
val b = EvaluationEntry(1, mutableListOf())
1515
val result = EvaluationEntry.add(a, b)
1616
assertEquals(2, result.count)
17-
assertEquals(0, result.jsonSamples.size)
1817
assertEquals(0, result.samples.size)
1918
}
2019

@@ -24,7 +23,6 @@ class EvaluationEntryTest {
2423
val b = EvaluationEntry(4, mutableListOf(TermComparison(Term.EMPTY, Term.EMPTY)))
2524
val result = EvaluationEntry.add(a, b)
2625
assertEquals(7, result.count)
27-
assertEquals(2, result.jsonSamples.size)
2826
assertEquals(2, result.samples.size)
2927
}
3028

@@ -34,12 +32,10 @@ class EvaluationEntryTest {
3432
val b = EvaluationEntry(9, MutableList(9) { TermComparison(Term.EMPTY, Term.EMPTY) })
3533
var result = EvaluationEntry.add(a, b)
3634
assertEquals(16, result.count)
37-
assertEquals(10, result.jsonSamples.size)
3835
assertEquals(16, result.samples.size)
3936
val c = EvaluationEntry(3, MutableList(3) { TermComparison(Term.EMPTY, Term.EMPTY) })
4037
result = EvaluationEntry.add(result, c)
4138
assertEquals(19, result.count)
42-
assertEquals(10, result.jsonSamples.size)
4339
assertEquals(16, result.samples.size) // should not grow once the truncation limit is reached
4440
}
4541

server/src/test/kotlin/org/ivdnt/galahad/evaluation/LayerComparisonTest.kt

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.ivdnt.galahad.evaluation
22

33
import org.ivdnt.galahad.annotations.Annotation
4+
import org.ivdnt.galahad.annotations.Term
45
import org.ivdnt.galahad.evaluation.comparison.*
56
import org.ivdnt.galahad.util.LayerBuilder
67
import org.junit.jupiter.api.Test
@@ -11,7 +12,7 @@ import org.junit.jupiter.api.Nested
1112
class LayerComparisonTest {
1213

1314
@Nested
14-
inner class PosFilter {
15+
inner class TermFilter {
1516
@Test
1617
fun `Matching layers with pos filter`() {
1718
val builder =
@@ -26,10 +27,24 @@ class LayerComparisonTest {
2627
assertEquals(100, comparison.matches.size)
2728
}
2829

30+
@Test
31+
fun `Layers with partial match`() {
32+
val hypo = LayerBuilder().loadDummies(100, pos = "NOU").build()
33+
val ref = LayerBuilder().loadDummies(50, pos = "NOU").loadDummies(20, pos = "VRB").loadDummies(30, pos = "ADJ").build()
34+
val comparison = LayerComparison(
35+
hypothesis = hypo,
36+
reference = ref,
37+
filter = NouVrbFilter()
38+
)
39+
assertEquals(20, comparison.matches.size)
40+
}
41+
2942
private fun NouNouFilter(): LayerFilter {
3043
val termFilter = HeadGroupTermFilter(Annotation.POS, "NOU")
3144
return ConfusionLayerFilter(termFilter, termFilter)
3245
}
46+
private fun NouVrbFilter(): LayerFilter =
47+
ConfusionLayerFilter(HeadGroupTermFilter(Annotation.POS, "NOU"), HeadGroupTermFilter(Annotation.POS, "VRB"))
3348

3449
@Test
3550
fun `Matching layers with pos filter, but different pos`() {
@@ -49,10 +64,10 @@ class LayerComparisonTest {
4964
inner class Punctuation {
5065
@Test
5166
fun `Matching layers despite punctuation difference`() {
52-
assertPunctuationDifference("dummy, dummy", 2, 1, 0)
53-
assertPunctuationDifference("dummy dummy, dummy", 3, 1, 0)
54-
assertPunctuationDifference("dummy dummy, dummy dummy", 4, 1, 0)
55-
assertPunctuationDifference("dummy, dummy, dummy,", 3, 3, 0)
67+
assertPunctuationDifference("dummy, dummy", 3, 0, 1)
68+
assertPunctuationDifference("dummy dummy, dummy", 4, 0, 1)
69+
assertPunctuationDifference("dummy dummy, dummy dummy", 5, 0, 1)
70+
assertPunctuationDifference("dummy, dummy, dummy,", 6, 0, 3)
5671
}
5772

5873
// Because we only fix for one punctuation mark at the end of a word. No more.
@@ -63,7 +78,9 @@ class LayerComparisonTest {
6378
val ref = LayerBuilder() // dummy":
6479
.loadDummies(1, "dummy\":").build()
6580
val comparison = LayerComparison(hypothesis = hypo, reference = ref)
66-
assertEquals(0, comparison.matches.size)
81+
assertEquals(3, comparison.matches.size)
82+
assertEquals(2, comparison.matches.filter { it.ref == Term.EMPTY }.size)
83+
assertEquals(0, comparison.matches.filter { it.hyp == Term.EMPTY }.size)
6784
}
6885

6986
/**
@@ -96,6 +113,8 @@ class LayerComparisonTest {
96113
// Compare
97114
val comparison = LayerComparison(hypothesis = hypo, reference = ref)
98115
assertEquals(numMatches, comparison.matches.size)
116+
assertEquals(numHypoMissing, comparison.matches.filter { it.hyp == Term.EMPTY }.size)
117+
assertEquals(numRefMissing, comparison.matches.filter { it.ref == Term.EMPTY }.size)
99118
}
100119
}
101120

@@ -113,6 +132,10 @@ class LayerComparisonTest {
113132
val ref = LayerBuilder().loadDummies(numRefTerms).build()
114133
val comparison = LayerComparison(hypothesis = hypo, reference = ref)
115134
assertEquals(numMatches, comparison.matches.size)
135+
val hypoNoMatch = comparison.matches.filter { it.hyp == Term.EMPTY }
136+
val refNoMatch = comparison.matches.filter { it.ref == Term.EMPTY }
137+
assertEquals(numHypoMissing, hypoNoMatch.size)
138+
assertEquals(numRefMissing, refNoMatch.size)
116139
}
117140

118141
@Test
@@ -122,15 +145,15 @@ class LayerComparisonTest {
122145

123146
@Test
124147
fun `A layer is 1 term larger than the other`() {
125-
assertLayers(numHypoTerms = 2, numRefTerms = 3, numMatches = 2, numHypoMissing = 0, numRefMissing = 1)
126-
assertLayers(numHypoTerms = 100, numRefTerms = 101, numMatches = 100, numHypoMissing = 0, numRefMissing = 1)
127-
assertLayers(numHypoTerms = 101, numRefTerms = 100, numMatches = 100, numHypoMissing = 1, numRefMissing = 0)
148+
assertLayers(numHypoTerms = 2, numRefTerms = 3, numMatches = 3, numHypoMissing = 1, numRefMissing = 0)
149+
assertLayers(numHypoTerms = 100, numRefTerms = 101, numMatches = 101, numHypoMissing = 1, numRefMissing = 0)
150+
assertLayers(numHypoTerms = 101, numRefTerms = 100, numMatches = 101, numHypoMissing = 0, numRefMissing = 1)
128151
}
129152

130153
@Test
131154
fun `A layer is empty`() {
132-
assertLayers(numHypoTerms = 100, numRefTerms = 0, numMatches = 0, numHypoMissing = 100, numRefMissing = 0)
133-
assertLayers(numHypoTerms = 0, numRefTerms = 100, numMatches = 0, numHypoMissing = 0, numRefMissing = 100)
155+
assertLayers(numHypoTerms = 100, numRefTerms = 0, numMatches = 100, numHypoMissing = 0, numRefMissing = 100)
156+
assertLayers(numHypoTerms = 0, numRefTerms = 100, numMatches = 100, numHypoMissing = 100, numRefMissing = 0)
134157
}
135158
}
136159
}

server/src/test/kotlin/org/ivdnt/galahad/evaluation/TermComparisonTest.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ class TermComparisonTest {
1616
hypoLemma: String? = "school", refLemma: String? = "school",
1717
hypoPos: String? = "NOU", refPos: String? = "NOU",
1818
) {
19-
val hypoTerm = Term("", 0, mapOf(Annotation.LEMMA to hypoLemma, Annotation.POS to hypoPos))
20-
val refTerm = Term("", 0, mapOf(Annotation.LEMMA to refLemma, Annotation.POS to refPos))
19+
val hypoTerm = Term("", 0, mapOf(Annotation.TOKEN to "dummy", Annotation.LEMMA to hypoLemma, Annotation.POS to hypoPos))
20+
val refTerm = Term("", 0, mapOf(Annotation.TOKEN to "dummy", Annotation.LEMMA to refLemma, Annotation.POS to refPos))
2121
TermComparison(hypoTerm, refTerm).apply {
2222
assertEquals(lemmaEqual, equal(Annotation.LEMMA))
2323
assertEquals(posEqual, equal(Annotation.POS))

0 commit comments

Comments
 (0)