Skip to content

Commit 2b1a035

Browse files
committed
Cacheable Evaluations starting with Distribution
1 parent 8b62ec4 commit 2b1a035

8 files changed

Lines changed: 127 additions & 35 deletions

File tree

client/src/views/annotate/subviews/evaluate/subviews/EntitiesView.vue

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,41 +13,40 @@
1313
<template #table-empty-instruction>
1414
No entities found in this document.
1515
</template>
16-
17-
<template #cell-second="data: TableData<Term>">
18-
{{ displayEntity(data.value) }}
19-
</template>
2016
</GTable>
2117
</GCard>
2218
</template>
2319

2420
<script setup lang="ts">
2521
import stores from '@/stores'
22+
import * as API from '@/api/evaluation'
2623
import type { Entity, Term } from '@/types/evaluation'
2724
import type { Field, TableData } from '@/types/ui/table'
28-
import * as API from '@/api/evaluation'
2925
26+
// --- stores ---
3027
const jobSelection = stores.useJobSelection()
31-
const corporaStore = stores.useCorpora()
28+
const corpora = stores.useCorpora()
3229
30+
// --- data ---
3331
const selectedDoc = ref<string>()
34-
32+
const loading = ref<boolean>(false)
3533
const items = ref<Entity[]>()
34+
const columns = ref<Field[]>([
35+
{ key: "first", label: "label", sortOn: (i) => i.first },
36+
{ key: "second", label: "entity", sortOn: (i) => i.second },
37+
{ key: "third", label: "count", sortOn: (i) => i.third }
38+
])
39+
40+
// --- computed ---
3641
const title = computed<string>(() => {
3742
return `Entities in ${selectedDoc.value}`
3843
})
39-
const columns = ref<Field[]>([
40-
{ key: "first", label: "NER" },
41-
{ key: "second" },
42-
{ key: "third", label: "Count" }
43-
])
4444
45-
const loading = ref<boolean>(false)
4645
4746
watch(() => selectedDoc.value, () => {
4847
loading.value = true
4948
API.getEntities(
50-
corporaStore.activeUUID,
49+
corpora.activeUUID,
5150
jobSelection.hypothesisJobId,
5251
selectedDoc.value
5352
).then(res => {
@@ -56,8 +55,4 @@ watch(() => selectedDoc.value, () => {
5655
loading.value = false
5756
})
5857
})
59-
60-
function displayEntity(terms: Term[]): string {
61-
return terms.map(term => term.annotations["token"]).join(' ')
62-
}
63-
</script>
58+
</script>

server/src/main/kotlin/org/ivdnt/galahad/corpora/Corpus.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.ivdnt.galahad.corpora
22

33
import org.ivdnt.galahad.documents.Documents
4+
import org.ivdnt.galahad.evaluation.CorpusEvaluations
45
import org.ivdnt.galahad.files.DiskValue
56
import org.ivdnt.galahad.files.GalahadFolder
67
import org.ivdnt.galahad.files.ValidatedDiskValue
@@ -27,6 +28,7 @@ class Corpus(
2728

2829
val documents: Documents = Documents(dir.resolve(DOCS_FOLDER), this)
2930
val jobs: Jobs = Jobs(dir.resolve(JOBS_FOLDER), this)
31+
val evaluation: CorpusEvaluations = CorpusEvaluations(dir.resolve(EVALUATION_FOLDER), this)
3032

3133
// Files in the corpus folder.
3234
private val mutableMetadataFile = dir.resolve(MUTABLE_METADATA_FILE)
@@ -57,6 +59,7 @@ class Corpus(
5759

5860
private const val JOBS_FOLDER = "jobs"
5961
private const val DOCS_FOLDER = "documents"
62+
private const val EVALUATION_FOLDER = "evaluation"
6063

6164
fun create(dir: File, metadata: MutableCorpusMetadata): Corpus {
6265
// dummy corpus to access the paths
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package org.ivdnt.galahad.evaluation
2+
3+
import org.ivdnt.galahad.corpora.Corpus
4+
import org.ivdnt.galahad.exceptions.JobNotFoundException
5+
import org.ivdnt.galahad.files.GalahadFolderManager
6+
import java.io.File
7+
8+
class CorpusEvaluations(
9+
dir: File,
10+
private val corpus: Corpus,
11+
) : GalahadFolderManager<JobEvaluations, JobPair>(dir) {
12+
fun ctor(key: JobPair): JobEvaluations = JobEvaluations(dir.resolve(key.toString()), corpus, key)
13+
override fun ctor(key: String): JobEvaluations = ctor(JobPair.fromString(key))
14+
override fun throwNotFound(key: String): Nothing = throw JobNotFoundException(key)
15+
16+
}
17+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package org.ivdnt.galahad.evaluation
2+
3+
import org.ivdnt.galahad.files.GalahadFolder
4+
import java.io.File
5+
6+
class DocumentEvaluations(
7+
dir: File
8+
): GalahadFolder(dir) {
9+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package org.ivdnt.galahad.evaluation
2+
3+
import org.ivdnt.galahad.annotations.Annotation
4+
import org.ivdnt.galahad.corpora.Corpus
5+
import org.ivdnt.galahad.evaluation.confusion.CONFUSION_TYPES
6+
import org.ivdnt.galahad.evaluation.distribution.CorpusDistribution
7+
import org.ivdnt.galahad.exceptions.DocumentNotFoundException
8+
import org.ivdnt.galahad.files.GalahadFolderManager
9+
import org.ivdnt.galahad.files.ValidatedDiskValue
10+
import org.ivdnt.galahad.taggers.Tagger
11+
import org.ivdnt.galahad.web.controller.DISTRIBUTION_MAX_SIZE
12+
import java.io.File
13+
14+
class JobEvaluations(
15+
dir: File,
16+
private val corpus: Corpus,
17+
private val jobs: JobPair,
18+
) : GalahadFolderManager<DocumentEvaluations, String>(dir) {
19+
override fun ctor(key: String): DocumentEvaluations {
20+
TODO("Not yet implemented")
21+
}
22+
23+
override fun throwNotFound(key: String): Nothing = throw DocumentNotFoundException(key)
24+
25+
26+
val distribution: Map<Annotation, CorpusDistribution> get() = distributionCache.readOrCreate()
27+
private val distributionFile = dir.resolve(DISTRIBUTION_FILE)
28+
private val distributionCache = object : ValidatedDiskValue<Map<Annotation, CorpusDistribution>>(distributionFile) {
29+
override fun isValid(lastModified: Long) = lastModified >= corpus.lastModified
30+
override fun set(): Map<Annotation, CorpusDistribution> {
31+
val allAnnots = Tagger.readOrThrow(jobs.reference, corpus).annotations
32+
if (Annotation.LEMMA !in allAnnots) {
33+
return emptyMap()
34+
}
35+
val annotationTypes = CONFUSION_TYPES.filter { it in allAnnots }
36+
val distributions = annotationTypes.associateWith {
37+
CorpusDistribution(
38+
corpus,
39+
jobs.reference,
40+
it
41+
).trim(DISTRIBUTION_MAX_SIZE) as CorpusDistribution
42+
}
43+
return distributions
44+
}
45+
}
46+
47+
companion object {
48+
private const val DISTRIBUTION_FILE = "distribution.json"
49+
}
50+
51+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package org.ivdnt.galahad.evaluation
2+
3+
class JobPair(
4+
val reference: String,
5+
val hypothesis: String? = reference,
6+
) {
7+
override fun toString(): String = "$reference/$hypothesis"
8+
9+
companion object {
10+
fun fromString(pair: String): JobPair = pair.split('/').let {JobPair(it[0], it[1])}
11+
}
12+
}

server/src/main/kotlin/org/ivdnt/galahad/web/controller/EvaluationController.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,5 +198,5 @@ class EvaluationController(
198198
@PathVariable @Parameter(description = "Corpus UUID") corpus: UUID,
199199
@PathVariable @Parameter(description = "Document name") document: String,
200200
@PathVariable @Parameter(description = "Tagger name or sourceLayer") job: String,
201-
): List<Triple<String, List<Term>, Int>> = evaluationService.getEntities(corpus, document, job)
201+
): List<Triple<String, String, Int>> = evaluationService.getEntities(corpus, document, job)
202202
}

server/src/main/kotlin/org/ivdnt/galahad/web/service/EvaluationService.kt

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import org.ivdnt.galahad.annotations.SOURCE_LAYER_NAME
77
import org.ivdnt.galahad.annotations.Term
88
import org.ivdnt.galahad.app.User
99
import org.ivdnt.galahad.corpora.CorpusMetadata
10+
import org.ivdnt.galahad.evaluation.CorpusEvaluations
11+
import org.ivdnt.galahad.evaluation.JobPair
1012
import org.ivdnt.galahad.evaluation.comparison.*
1113
import org.ivdnt.galahad.evaluation.confusion.CONFUSION_TYPES
1214
import org.ivdnt.galahad.evaluation.confusion.CorpusConfusion
@@ -42,19 +44,22 @@ class EvaluationService(val corpora: CorporaService) {
4244
corpus: UUID,
4345
job: String,
4446
): Map<Annotation, CorpusDistribution> {
45-
val allAnnots = annotationTypesForTagger(job, corpus)
46-
if (Annotation.LEMMA !in allAnnots) {
47-
return emptyMap()
48-
}
49-
val annotationTypes = CONFUSION_TYPES.filter { it in allAnnots }
50-
val distributions = annotationTypes.associateWith {
51-
CorpusDistribution(
52-
corpora.readAsReaderOrThrow(corpus, user),
53-
job,
54-
it
55-
).trim(DISTRIBUTION_MAX_SIZE) as CorpusDistribution
56-
}
57-
return distributions
47+
val corpus = corpora.readAsReaderOrThrow(corpus, user)
48+
val jobEval = corpus.evaluation.createOrThrow(JobPair(job))
49+
return jobEval.distribution
50+
// val allAnnots = annotationTypesForTagger(job, corpus)
51+
// if (Annotation.LEMMA !in allAnnots) {
52+
// return emptyMap()
53+
// }
54+
// val annotationTypes = CONFUSION_TYPES.filter { it in allAnnots }
55+
// val distributions = annotationTypes.associateWith {
56+
// CorpusDistribution(
57+
// corpora.readAsReaderOrThrow(corpus, user),
58+
// job,
59+
// it
60+
// ).trim(DISTRIBUTION_MAX_SIZE) as CorpusDistribution
61+
// }
62+
// return distributions
5863
}
5964

6065
fun getConfusion(
@@ -278,7 +283,7 @@ class EvaluationService(val corpora: CorporaService) {
278283
return cm
279284
}
280285

281-
fun getEntities(corpus: UUID, document: String, job: String): List<Triple<String, List<Term>, Int>> {
286+
fun getEntities(corpus: UUID, document: String, job: String): List<Triple<String, String, Int>> {
282287
val layer = corpora.readAsReaderOrThrow(corpus, user).jobs.readOrThrow(job).getLayer(document)
283288
return layer.documents.flatMap {
284289
it.paragraphs.flatMap {
@@ -287,7 +292,7 @@ class EvaluationService(val corpora: CorporaService) {
287292
?.map { span -> span.value to span.indices.map { sent.terms[it] } } ?: emptyList()
288293
}
289294
}
290-
}.groupBy { it }.mapValues { it.value.size }.map { (key, value) ->
295+
}.groupBy { Pair(it.first, it.second.joinToString("") { it.token + it.space }) }.mapValues { it.value.size }.map { (key, value) ->
291296
Triple(key.first, key.second, value)
292297
}.sortedByDescending { it.third }
293298
}

0 commit comments

Comments
 (0)