Skip to content

Commit d8d527e

Browse files
committed
Cacheable Evaluation DocumentEntities
1 parent 2b1a035 commit d8d527e

11 files changed

Lines changed: 90 additions & 42 deletions

File tree

client/src/components/tables/GTable.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ const {
109109
columns,
110110
selectable,
111111
sortedByColumn,
112-
sortDesc,
112+
sortDesc = true,
113113
compact,
114114
items,
115115
} = defineProps<{

client/src/views/annotate/subviews/evaluate/subviews/EntitiesView.vue

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<p v-else>
1010
Please select a hypothesis job first.
1111
</p>
12-
<GTable compact v-if="items" :title :loading :items :columns>
12+
<GTable v-if="items" :title :loading :items :columns sortedByColumn="count" compact>
1313
<template #table-empty-instruction>
1414
No entities found in this document.
1515
</template>
@@ -32,9 +32,9 @@ const selectedDoc = ref<string>()
3232
const loading = ref<boolean>(false)
3333
const items = ref<Entity[]>()
3434
const columns = ref<Field[]>([
35-
{ key: "first", label: "label", sortOn: (i) => i.first },
36-
{ key: "second", label: "entity", sortOn: (i) => i.second },
37-
{ key: "third", label: "count", sortOn: (i) => i.third }
35+
{ key: "label", sortOn: (i) => i.label },
36+
{ key: "form", sortOn: (i) => i.form },
37+
{ key: "count", sortOn: (i) => i.count }
3838
])
3939
4040
// --- computed ---

server/src/main/kotlin/org/ivdnt/galahad/corpora/Corpus.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.ivdnt.galahad.corpora
22

33
import org.ivdnt.galahad.documents.Documents
4-
import org.ivdnt.galahad.evaluation.CorpusEvaluations
4+
import org.ivdnt.galahad.evaluation.JobsEvaluations
55
import org.ivdnt.galahad.files.DiskValue
66
import org.ivdnt.galahad.files.GalahadFolder
77
import org.ivdnt.galahad.files.ValidatedDiskValue
@@ -28,7 +28,7 @@ class Corpus(
2828

2929
val documents: Documents = Documents(dir.resolve(DOCS_FOLDER), this)
3030
val jobs: Jobs = Jobs(dir.resolve(JOBS_FOLDER), this)
31-
val evaluation: CorpusEvaluations = CorpusEvaluations(dir.resolve(EVALUATION_FOLDER), this)
31+
val evaluation: JobsEvaluations = JobsEvaluations(dir.resolve(EVALUATIONS_FOLDER), this)
3232

3333
// Files in the corpus folder.
3434
private val mutableMetadataFile = dir.resolve(MUTABLE_METADATA_FILE)
@@ -59,7 +59,7 @@ class Corpus(
5959

6060
private const val JOBS_FOLDER = "jobs"
6161
private const val DOCS_FOLDER = "documents"
62-
private const val EVALUATION_FOLDER = "evaluation"
62+
private const val EVALUATIONS_FOLDER = "evaluations"
6363

6464
fun create(dir: File, metadata: MutableCorpusMetadata): Corpus {
6565
// dummy corpus to access the paths
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package org.ivdnt.galahad.evaluation
2+
3+
import org.ivdnt.galahad.corpora.Corpus
4+
import org.ivdnt.galahad.evaluation.entities.DocumentEntities
5+
import org.ivdnt.galahad.files.GalahadFolder
6+
import org.ivdnt.galahad.files.ValidatedDiskValue
7+
import java.io.File
8+
9+
class DocumentEvaluation(
10+
dir: File,
11+
private val corpus: Corpus,
12+
private val jobs: JobPair,
13+
) : GalahadFolder(dir) {
14+
val entities: List<DocumentEntities.Entity> get() = entitiesCache.readOrCreate()
15+
private val entitiesFile = dir.resolve(ENTITIES_FILE)
16+
private val entitiesCache = object : ValidatedDiskValue<List<DocumentEntities.Entity>>(entitiesFile) {
17+
override fun isValid(lastModified: Long) = lastModified >= corpus.lastModified
18+
override fun set(): List<DocumentEntities.Entity> = DocumentEntities.fromLayer(corpus.jobs.readOrThrow(jobs.reference).getLayer(name))
19+
}
20+
companion object {
21+
const val ENTITIES_FILE = "entities.json"
22+
}
23+
}
Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
package org.ivdnt.galahad.evaluation
22

3-
import org.ivdnt.galahad.files.GalahadFolder
3+
import org.ivdnt.galahad.corpora.Corpus
4+
import org.ivdnt.galahad.exceptions.DocumentNotFoundException
5+
import org.ivdnt.galahad.files.GalahadFolderManager
46
import java.io.File
57

68
class DocumentEvaluations(
7-
dir: File
8-
): GalahadFolder(dir) {
9+
dir: File,
10+
private val corpus: Corpus,
11+
private val jobs: JobPair,
12+
): GalahadFolderManager<DocumentEvaluation, String>(dir) {
13+
override fun ctor(key: String): DocumentEvaluation = DocumentEvaluation(dir.resolve(key), corpus, jobs)
14+
override fun throwNotFound(key: String): Nothing = throw DocumentNotFoundException(key)
915
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/JobEvaluations.kt renamed to server/src/main/kotlin/org/ivdnt/galahad/evaluation/JobEvaluation.kt

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,19 @@ import org.ivdnt.galahad.corpora.Corpus
55
import org.ivdnt.galahad.evaluation.confusion.CONFUSION_TYPES
66
import org.ivdnt.galahad.evaluation.distribution.CorpusDistribution
77
import org.ivdnt.galahad.exceptions.DocumentNotFoundException
8+
import org.ivdnt.galahad.files.GalahadFolder
89
import org.ivdnt.galahad.files.GalahadFolderManager
910
import org.ivdnt.galahad.files.ValidatedDiskValue
1011
import org.ivdnt.galahad.taggers.Tagger
1112
import org.ivdnt.galahad.web.controller.DISTRIBUTION_MAX_SIZE
1213
import java.io.File
1314

14-
class JobEvaluations(
15+
class JobEvaluation(
1516
dir: File,
1617
private val corpus: Corpus,
1718
private val jobs: JobPair,
18-
) : GalahadFolderManager<DocumentEvaluations, String>(dir) {
19-
override fun ctor(key: String): DocumentEvaluations {
20-
TODO("Not yet implemented")
21-
}
22-
23-
override fun throwNotFound(key: String): Nothing = throw DocumentNotFoundException(key)
24-
19+
) : GalahadFolder(dir) {
20+
val documents: DocumentEvaluations = DocumentEvaluations(dir.resolve(DOCUMENTS_FOLDER), corpus, jobs)
2521

2622
val distribution: Map<Annotation, CorpusDistribution> get() = distributionCache.readOrCreate()
2723
private val distributionFile = dir.resolve(DISTRIBUTION_FILE)
@@ -46,6 +42,6 @@ class JobEvaluations(
4642

4743
companion object {
4844
private const val DISTRIBUTION_FILE = "distribution.json"
45+
const val DOCUMENTS_FOLDER = "documents"
4946
}
50-
5147
}

server/src/main/kotlin/org/ivdnt/galahad/evaluation/CorpusEvaluations.kt renamed to server/src/main/kotlin/org/ivdnt/galahad/evaluation/JobsEvaluations.kt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@ import org.ivdnt.galahad.exceptions.JobNotFoundException
55
import org.ivdnt.galahad.files.GalahadFolderManager
66
import java.io.File
77

8-
class CorpusEvaluations(
8+
class JobsEvaluations(
99
dir: File,
1010
private val corpus: Corpus,
11-
) : GalahadFolderManager<JobEvaluations, JobPair>(dir) {
12-
fun ctor(key: JobPair): JobEvaluations = JobEvaluations(dir.resolve(key.toString()), corpus, key)
13-
override fun ctor(key: String): JobEvaluations = ctor(JobPair.fromString(key))
11+
) : GalahadFolderManager<JobEvaluation, JobPair>(dir) {
12+
fun ctor(key: JobPair): JobEvaluation = JobEvaluation(dir.resolve(key.toString()), corpus, key)
13+
override fun ctor(key: String): JobEvaluation = ctor(JobPair.fromString(key))
1414
override fun throwNotFound(key: String): Nothing = throw JobNotFoundException(key)
15-
1615
}
1716

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package org.ivdnt.galahad.evaluation.entities
2+
3+
import org.ivdnt.galahad.annotations.Annotation
4+
import org.ivdnt.galahad.annotations.Layer
5+
import kotlin.collections.map
6+
7+
object DocumentEntities {
8+
fun fromLayer(layer: Layer): List<Entity> =
9+
layer.documents.flatMap {
10+
it.paragraphs.flatMap {
11+
it.sentences.flatMap { sent ->
12+
sent.spans?.get(Annotation.NER)?.map { span -> span.value to span.indices.map { sent.terms[it] }.joinToString("") { it.token + it.space } } ?: emptyList()
13+
}
14+
}
15+
}.groupBy{ it }.mapValues { it.value.size }.map{
16+
Entity(
17+
label = it.key.first,
18+
form = it.key.second,
19+
count = it.value,
20+
)
21+
}
22+
23+
class Entity(
24+
val label: String,
25+
val form: String,
26+
val count: Int,
27+
)
28+
}

server/src/main/kotlin/org/ivdnt/galahad/files/DiskValue.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ open class DiskValue<T>(
1717
inline fun <reified T> readOrNull(): T? {
1818
if (file.length() == 0L) return null
1919

20-
cache.getIfPresent(file.absolutePath)?.let { return it as T }
20+
cache.getIfPresent(file.absolutePath)?.let {
21+
println("Cache hit for ${file.absolutePath}")
22+
return it as T
23+
}
24+
println("Cache miss for ${file.absolutePath}, reading from disk...")
2125

2226
return JsonUtil.mapper.readValue(file.readBytes(), object : TypeReference<T>() {})
2327
.also { ThreadPoolUtil.pool.execute { cache.put(file.absolutePath, it as Any) } }

server/src/main/kotlin/org/ivdnt/galahad/web/controller/EvaluationController.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.ivdnt.galahad.app.*
1414
import org.ivdnt.galahad.evaluation.comparison.TermComparison
1515
import org.ivdnt.galahad.evaluation.confusion.Confusion
1616
import org.ivdnt.galahad.evaluation.distribution.CorpusDistribution
17+
import org.ivdnt.galahad.evaluation.entities.DocumentEntities
1718
import org.ivdnt.galahad.evaluation.metrics.CorpusMetrics
1819
import org.ivdnt.galahad.exceptions.ErrorResponse
1920
import org.ivdnt.galahad.web.service.EvaluationService
@@ -198,5 +199,5 @@ class EvaluationController(
198199
@PathVariable @Parameter(description = "Corpus UUID") corpus: UUID,
199200
@PathVariable @Parameter(description = "Document name") document: String,
200201
@PathVariable @Parameter(description = "Tagger name or sourceLayer") job: String,
201-
): List<Triple<String, String, Int>> = evaluationService.getEntities(corpus, document, job)
202+
): List<DocumentEntities.Entity> = evaluationService.getEntities(corpus, document, job)
202203
}

0 commit comments

Comments
 (0)