Skip to content

Commit 784c385

Browse files
committed
NAF support for reading HEAD & DEPREL
1 parent 8605798 commit 784c385

6 files changed

Lines changed: 610 additions & 122 deletions

File tree

server/src/main/kotlin/org/ivdnt/galahad/formats/naf/NafReader.kt

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ class NafReader(file: File) : AnnotationReader() {
3030
pos = it.getAttribute("pos").ifEmpty { null },
3131
targets = it.childElements.first().childElements.map { it.getAttribute("id") }.toList()
3232
)
33-
}
33+
}.toList()
3434
private val nafDeps = root.childOrNull("deps")?.childElements?.map {
3535
NafDep(
3636
from = it.getAttribute("from"), to = it.getAttribute("to"), rfunc = it.getAttribute("rfunc")
3737
)
38-
}
38+
}?.toList()
3939
private val nafEntities = root.childOrNull("entities")?.childElements?.map {
4040
NafEntity(
4141
type = it.getAttribute("type").ifEmpty { null },
42-
references = it.childOrNull("references")?.childElements?.map { it.childElements.map { it.getAttribute("id") }.toList() }?.toList()!!
42+
references = it.childOrNull("references")?.childElements?.map {
43+
it.childElements.map { it.getAttribute("id") }.toList()
44+
}?.toList()!!
4345
)
4446
}?.toList()
4547
private val id: String = root.childOrNull("nafHeader")?.childOrNull("public")?.getAttribute("publicId").orEmpty()
@@ -55,7 +57,11 @@ class NafReader(file: File) : AnnotationReader() {
5557
// retrieve term, entity, and dependencies
5658
val term = nafTerms.find { wordform.id in it.targets }!!
5759
val entity = nafEntities?.find { it.references.any { term.id in it } }
58-
val dep = nafDeps?.first { it.to == term.id }
60+
val dep: NafDep? = if (nafDeps.isNullOrEmpty()) {
61+
null
62+
} else {
63+
nafDeps.firstOrNull { it.to == term.id } ?: NafDep("0", "0", "root")
64+
}
5965

6066
// annotations
6167
val annotations = mutableMapOf<Annotation, String>()
@@ -64,9 +70,14 @@ class NafReader(file: File) : AnnotationReader() {
6470
term.pos?.let { annotations[Annotation.POS] = it }
6571
entity?.type?.let { annotations[Annotation.NER] = it }
6672
dep?.rfunc?.let { annotations[Annotation.DEPREL] = it }
67-
val headTerm = nafTerms.find { it.id == dep?.from }
68-
val headWordform = nafWordforms.find { it.id == headTerm?.targets?.first() }
69-
headWordform?.id?.let { annotations[Annotation.HEAD] = it }
73+
74+
if (dep?.from == "0") {
75+
annotations[Annotation.HEAD] = "0"
76+
} else {
77+
val headTerm = nafTerms.find { it.id == dep?.from }
78+
val headWordform = nafWordforms.find { it.id == headTerm?.targets?.first() }
79+
headWordform?.let { annotations[Annotation.HEAD] = (sent.indexOf(headWordform) + 1).toString() }
80+
}
7081

7182
// space after
7283
val nextWordform = sent.getOrNull(i + 1)

server/src/test/resources/formats/naf/converter/input.conllu

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,79 +2,79 @@
22
# newpar id = d1.p1
33
# sent_id = d1.p1.s1
44
# text = Fraaie historie ende alwaer.
5-
1 Fraaie fraai _ AA(degree=pos,position=prenom) _ _ _ _ _
6-
2 historie historie _ NOU-C(number=sg) _ _ _ _ _
7-
3 ende en _ CONJ(type=coor) _ _ _ _ _
8-
4 alwaer alwaar _ ADV(type=reg) _ _ _ _ SpaceAfter=No
9-
5 . _ _ PC _ _ _ _ _
5+
1 Fraaie fraai _ AA(degree=pos,position=prenom) _ 2 amod _ _
6+
2 historie historie _ NOU-C(number=sg) _ 0 root _ _
7+
3 ende en _ CONJ(type=coor) _ 4 cc _ _
8+
4 alwaer alwaar _ ADV(type=reg) _ 2 conj _ SpaceAfter=No
9+
5 . _ _ PC _ 2 punct _ _
1010

1111
# sent_id = d1.p1.s2
1212
# text = Magh 'k u vertellen, hoirt naer.
13-
1 Magh mogen _ VRB(finiteness=fin,tense=pres) _ _ _ _ _
14-
2 'k ik _ PD(type=pers,position=free) _ _ _ _ _
15-
3 u u _ PD(type=pers,position=free) _ _ _ _ _
16-
4 vertellen vertellen _ VRB(finiteness=inf) _ _ _ _ SpaceAfter=No
17-
5 , _ _ PC _ _ _ _ _
18-
6 hoirt naarhoren _ VRB(finiteness=fin,tense=pres) _ _ _ _ _
19-
7 naer naarhoren _ VRB(finiteness=fin,tense=pres) _ _ _ _ SpaceAfter=No
20-
8 . _ _ PC _ _ _ _ _
13+
1 Magh mogen _ VRB(finiteness=fin,tense=pres) _ 4 aux _ _
14+
2 'k ik _ PD(type=pers,position=free) _ 4 nsubj _ _
15+
3 u u _ PD(type=pers,position=free) _ 4 iobj _ _
16+
4 vertellen vertellen _ VRB(finiteness=inf) _ 0 root _ SpaceAfter=No
17+
5 , _ _ PC _ 4 punct _ _
18+
6 hoirt naarhoren _ VRB(finiteness=fin,tense=pres) _ 4 conj _ _
19+
7 naer naarhoren _ VRB(finiteness=fin,tense=pres) _ 6 advmod _ SpaceAfter=No
20+
8 . _ _ PC _ 4 punct _ _
2121

2222
# newpar id = d1.p2
2323
# sent_id = d1.p2.s1
2424
# text = 't Was op enen avondstonde.
25-
1 't het _ PD(type=pers,position=free) _ _ _ _ _
26-
2 Was zijn _ VRB(finiteness=fin,tense=past) _ _ _ _ _
27-
3 op op _ ADP(type=pre) _ _ _ _ _
28-
4 enen een _ PD(type=indef,subtype=art,position=prenom) _ _ _ _ _
29-
5 avondstonde avondstond _ NOU-C(number=sg) _ _ _ _ SpaceAfter=No
30-
6 . _ _ PC _ _ _ _ _
25+
1 't het _ PD(type=pers,position=free) _ 2 expl _ _
26+
2 Was zijn _ VRB(finiteness=fin,tense=past) _ 5 cop _ _
27+
3 op op _ ADP(type=pre) _ 5 case _ _
28+
4 enen een _ PD(type=indef,subtype=art,position=prenom) _ 5 det _ _
29+
5 avondstonde avondstond _ NOU-C(number=sg) _ 0 root _ SpaceAfter=No
30+
6 . _ _ PC _ 5 punct _ _
3131

3232
# sent_id = d1.p2.s2
3333
# text = Dat koning Carel slaepen beghonde.
34-
1 Dat dat _ CONJ(type=sub) _ _ _ _ _
35-
2 koning koning _ NOU-C(number=sg) _ _ _ _ NamedEntity=B-PER
36-
3 Carel Carel _ NOU-P _ _ _ _ NamedEntity=I-PER
37-
4 slaepen slaap _ NOU-C(number=pl) _ _ _ _ _
38-
5 beghonde beginnen _ VRB(finiteness=fin,tense=past) _ _ _ _ SpaceAfter=No
39-
6 . _ _ PC _ _ _ _ _
34+
1 Dat dat _ CONJ(type=sub) _ 4 expl _ _
35+
2 koning koning _ NOU-C(number=sg) _ 4 nsubj _ NamedEntity=B-PER
36+
3 Carel Carel _ NOU-P _ 2 flat _ NamedEntity=I-PER
37+
4 slaepen slaap _ NOU-C(number=pl) _ 0 root _ _
38+
5 beghonde beginnen _ VRB(finiteness=fin,tense=past) _ 4 aux _ SpaceAfter=No
39+
6 . _ _ PC _ 4 punct _ _
4040

4141
# newpar id = d1.p3
4242
# sent_id = d1.p3.s1
4343
# text = "Martijn, slaepstu? slaept dijn sin?"
44-
1 " _ _ PC _ _ _ _ SpaceAfter=No
45-
2 Martijn Martijn _ NOU-P _ _ _ _ SpaceAfter=No|NamedEntity=B-PER
46-
3 , _ _ PC _ _ _ _ _
47-
4 slaepstu slapen+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ _ _ _ SpaceAfter=No
48-
5 ? _ _ PC _ _ _ _ _
49-
6 slaept slapen _ VRB(finiteness=fin,tense=pres) _ _ _ _ _
50-
7 dijn dijn _ PD(type=poss,position=prenom) _ _ _ _ _
51-
8 sin zin _ NOU-C(number=sg) _ _ _ _ SpaceAfter=No
52-
9 ? _ _ PC _ _ _ _ SpaceAfter=No
53-
10 " _ _ PC _ _ _ _ _
44+
1 " _ _ PC _ 4 punct _ SpaceAfter=No
45+
2 Martijn Martijn _ NOU-P _ 4 nsubj _ SpaceAfter=No|NamedEntity=B-PER
46+
3 , _ _ PC _ 4 punct _ _
47+
4 slaepstu slapen+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ 0 root _ SpaceAfter=No
48+
5 ? _ _ PC _ 4 punct _ _
49+
6 slaept slapen _ VRB(finiteness=fin,tense=pres) _ 4 cc _ _
50+
7 dijn dijn _ PD(type=poss,position=prenom) _ 8 det _ _
51+
8 sin zin _ NOU-C(number=sg) _ 6 nsubj _ SpaceAfter=No
52+
9 ? _ _ PC _ 4 punct _ SpaceAfter=No
53+
10 " _ _ PC _ 4 punct _ _
5454

5555
# sent_id = d1.p3.s2
5656
# text = Sprec! hebstu gheen spreken in?
57-
1 Sprec spreken _ VRB(finiteness=fin,tense=pres) _ _ _ _ SpaceAfter=No
58-
2 ! _ _ PC _ _ _ _ _
59-
3 hebstu inhebben+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ _ _ _ _
60-
4 gheen geen _ PD(type=indef,subtype=oth,position=prenom) _ _ _ _ _
61-
5 spreken spreken _ NOU-C(number=sg) _ _ _ _ _
62-
6 in inhebben+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ _ _ _ SpaceAfter=No
63-
7 ? _ _ PC _ _ _ _ _
57+
1 Sprec spreken _ VRB(finiteness=fin,tense=pres) _ 0 root _ SpaceAfter=No
58+
2 ! _ _ PC _ 1 punct _ _
59+
3 hebstu inhebben+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ 5 aux _ _
60+
4 gheen geen _ PD(type=indef,subtype=oth,position=prenom) _ 4 det _ _
61+
5 spreken spreken _ NOU-C(number=sg) _ 3 obj _ _
62+
6 in inhebben+du _ VRB(finiteness=fin,tense=pres)+PD(type=pers,position=free) _ 3 compound:prt _ SpaceAfter=No
63+
7 ? _ _ PC _ 1 punct _ _
6464

6565
# newpar id = d1.p4
6666
# sent_id = d1.p4.s1
6767
# text = Du dinkes mi verdoren.
68-
1 Du du _ PD(type=pers,position=free) _ _ _ _ _
69-
2 dinkes dunken _ VRB(finiteness=fin,tense=pres) _ _ _ _ _
70-
3 mi ik _ PD(type=pers,position=free) _ _ _ _ _
71-
4 verdoren verdoren _ VRB(finiteness=inf) _ _ _ _ SpaceAfter=No
72-
5 . _ _ PC _ _ _ _ _
68+
1 Du du _ PD(type=pers,position=free) _ 4 nsubj _ _
69+
2 dinkes dunken _ VRB(finiteness=fin,tense=pres) _ 4 aux _ _
70+
3 mi ik _ PD(type=pers,position=free) _ 4 obj _ _
71+
4 verdoren verdoren _ VRB(finiteness=inf) _ 0 root _ SpaceAfter=No
72+
5 . _ _ PC _ 5 punct _ _
7373

7474
# sent_id = d1.p4.s2
7575
# text = Dune achtes meer no min
76-
1 Dune du+ne _ PD(type=pers,position=free)+ADV(type=reg) _ _ _ _ _
77-
2 achtes achten _ VRB(finiteness=fin,tense=pres) _ _ _ _ _
78-
3 meer meer _ PD(type=indef,position=free) _ _ _ _ _
79-
4 no noch _ CONJ(type=coor) _ _ _ _ _
80-
5 min min _ PD(type=indef,position=free) _ _ _ _ _
76+
1 Dune du+ne _ PD(type=pers,position=free)+ADV(type=reg) _ 2 subj _ _
77+
2 achtes achten _ VRB(finiteness=fin,tense=pres) _ 0 root _ _
78+
3 meer meer _ PD(type=indef,position=free) _ 2 advmod _ _
79+
4 no noch _ CONJ(type=coor) _ 3 conj _ _
80+
5 min min _ PD(type=indef,position=free) _ 3 cc _ _

0 commit comments

Comments
 (0)