Skip to content

Commit c358a35

Browse files
Vincent PrinsVincent Prins
authored andcommitted
Start work on synchronous job tagging
1 parent 8fcf5f5 commit c358a35

18 files changed

Lines changed: 206 additions & 464 deletions

File tree

server/src/main/kotlin/org/ivdnt/galahad/corpora/CorpusMetadata.kt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,12 @@ class CorpusMetadata(
4747
) {
4848
companion object {
4949
fun create(corpus: Corpus): CorpusMetadata {
50-
val allJobs = corpus.jobs.readAll()
51-
val allDocs = corpus.documents.readAll()
5250
val meta = CorpusMetadata(
5351
// Immutable/calculated fields
5452
// sourceAnnotationTypes = uniqueAnnotations,
5553
uuid = UUID.fromString(corpus.name),
56-
activeJobs = allJobs.count { it.isActive },
57-
numResults = allJobs.count { it.hasResult },
58-
numDocs = allDocs.size,
54+
numResults = corpus.jobs.readAll().count { it.hasResult },
55+
numDocs = corpus.documents.readAll().size,
5956
sizeInBytes = corpus.sizeInBytes,
6057
lastModified = System.currentTimeMillis(),
6158
)

server/src/main/kotlin/org/ivdnt/galahad/documents/Document.kt

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,11 @@ class Document(
2323
dir: File,
2424
) : GalahadFolder(dir), Logging {
2525
// Files in the document folder.
26-
val plaintextFile: File = dir.resolve(PLAINTEXT_FILE)
2726
private val metadataFile = dir.resolve(METADATA_FILE)
2827
val uploadedFile: File = dir.resolve("uploaded").resolve(name)
2928

3029
// Values in those files.
3130

32-
/** The plaintext content of the document. */
33-
val plaintext: String by lazy {
34-
try {
35-
plaintextFile.readText()
36-
} catch (e: Exception) {
37-
logger.warn("Error reading plaintext file, creating new plaintext", e)
38-
val internalFile = InternalFile.create(uploadedFile)
39-
val text = internalFile.plaintext
40-
plaintextFile.writeText(text)
41-
text
42-
}
43-
}
44-
4531
/**
4632
* The UUID is only used as a metadata pid when converting a layer to TEI (for now).
4733
* When merging TEI, it is only used if the document itself defines no pid.
@@ -62,7 +48,6 @@ class Document(
6248

6349
internal companion object {
6450
private const val METADATA_FILE = "metadata.json"
65-
private const val PLAINTEXT_FILE = "plaintext.txt"
6651

6752
/**
6853
* Create a new document folder from an uploaded file and fill it with the necessary data.
@@ -81,10 +66,6 @@ class Document(
8166
val sourceLayer = internalFile.layer
8267
// Set sourceLayer as job. Note that if we threw, we don't unnecessarily create a job folder, keeping the disk clean.
8368
corpus.jobs.createOrThrow(SOURCE_LAYER_NAME).setLayer(doc.name, sourceLayer)
84-
// plaintext
85-
ThreadPoolUtil.pool.execute {
86-
doc.plaintextFile.writeText(internalFile.plaintext)
87-
}
8869
// metadata; needs to be serialized as well
8970
DiskValue<DocumentMetadata>(doc.metadataFile).write(DocumentMetadata.create(internalFile))
9071

server/src/main/kotlin/org/ivdnt/galahad/documents/DocumentMetadata.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ data class DocumentMetadata(
2929
private const val PREVIEW_LENGTH: Int = 100
3030

3131
fun create(file: InternalFile): DocumentMetadata {
32-
val text = file.plaintext
32+
val text = file.layer.toString()
3333
return DocumentMetadata(
3434
name = file.file.name,
3535
format = file.format,

server/src/main/kotlin/org/ivdnt/galahad/evaluation/confusion/CorpusConfusion.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class CorpusConfusion(
5050
)
5151
}
5252
}
53-
53+
// TODO: is Sequence.size the best way? There is also [corpus.documents.size]
5454
for (i in 0..<allDocs.size) add(completionService.take().get())
5555
}
5656

server/src/main/kotlin/org/ivdnt/galahad/files/GalahadFolderManager.kt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,16 @@ abstract class GalahadFolderManager<ReadType : GalahadFolder, CreateType : Any>(
3030

3131
protected abstract fun throwNotFound(key: String): Nothing
3232

33-
abstract fun createOrThrow(key: CreateType): ReadType
33+
open fun createOrThrow(key: CreateType): ReadType = ctor(key.toString())
3434

35-
open fun readAll(): Set<ReadType> = dir.list()?.map { readOrThrow(it) }?.toSet() ?: setOf()
35+
open fun readAll(): List<ReadType> = dir.list()?.map { readOrThrow(it) } ?: emptyList()
36+
open fun readAllSequence(): Sequence<ReadType> = dir.list()?.asSequence()?.map { readOrThrow(it) } ?: emptySequence()
3637

3738
open fun readOrNull(key: String): ReadType? = if (dir.resolve(key).exists()) ctor(key) else null
3839

3940
fun readOrThrow(key: String): ReadType = readOrNull(key) ?: throwNotFound(key)
4041

41-
fun deleteOrThrow(key: String) {
42+
open fun deleteOrThrow(key: String) {
4243
readOrThrow(key) // does it exist?
4344
if (!dir.resolve(key).deleteRecursively()) {
4445
logger.warn("Partial deletion of $key")

server/src/main/kotlin/org/ivdnt/galahad/formats/InternalFile.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import java.io.File
1818
abstract class InternalFile protected constructor() {
1919
abstract val file: File
2020
abstract val format: DocumentFormat
21-
val plaintext: String by lazy { reader.layer.toString() }
2221
val layer: Layer by lazy { reader.layer }
2322
protected abstract val reader: AnnotationReader
2423

0 commit comments

Comments
 (0)