Skip to content

Commit a403cb0

Browse files
committed
documentation, improve perm only performance
1 parent 877ccce commit a403cb0

1 file changed

Lines changed: 19 additions & 5 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package edu.harvard.iq.dataverse.search;
22

33
import edu.harvard.iq.dataverse.DataFile;
4+
import edu.harvard.iq.dataverse.DataFileServiceBean;
45
import edu.harvard.iq.dataverse.Dataset;
56
import edu.harvard.iq.dataverse.DatasetServiceBean;
67
import edu.harvard.iq.dataverse.DatasetVersion;
@@ -53,6 +54,8 @@ public class SolrIndexServiceBean {
5354
@EJB
5455
SearchPermissionsServiceBean searchPermissionsService;
5556
@EJB
57+
DataFileServiceBean dataFileService;
58+
@EJB
5659
DataverseServiceBean dataverseService;
5760
@EJB
5861
DatasetServiceBean datasetService;
@@ -365,14 +368,24 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
365368
indexPermissionsForOneDvObject(definitionPoint);
366369
numObjects++;
367370

368-
// Prepare the data needed for the new transaction.
369-
// This ensures lazy-loaded collections are fetched here.
371+
/**
372+
* Prepare the data needed for the new transaction. For performance reasons, indexDatasetFilesInNewTransaction does not merge the dataset or versions into the new transaction (we only read info, there
373+
* are no changes to write). However, there are two ways the code here is used. In one case, indexing content and permissions, the versions and fileMetadatas in them are already loaded. In the other
374+
* case, indexing permissions only, the fileMetadatas are not yet loaded, and we may need them, but only if there are fewer than fileQueryMin. For each version that will get reindexed (at most two of
375+
* them), the code below does a lightweight query to see how many fileMetadatas exist in it and, if it is equal to or below fileQueryMin, calls getFileMetadatas().size() to assure they are loaded
376+
* (before we pass the version into a new transaction where it will be detached and fileMetadatas can't be loaded). Calling getFileMetadas.size() should be lightweight when the fileMetadatas are
377+
* loaded (first case) and done only when needed for the second case.
378+
*
379+
**/
370380
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
371381
List<DatasetVersion> versionsToIndex = new ArrayList<>();
372382
for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) {
373383
if (desiredCards.get(version.getVersionState())) {
374-
// IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
375-
version.getFileMetadatas().size();
384+
int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue();
385+
if (fileCount >= fileQueryMin) {
386+
// IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
387+
version.getFileMetadatas().size();
388+
}
376389
versionsToIndex.add(version);
377390
}
378391
}
@@ -429,7 +442,7 @@ private void processDatasetVersionFiles(DatasetVersion version,
429442
// Process files in batches of 100
430443
int batchSize = 100;
431444

432-
if (version.getFileMetadatas().size() > fileQueryMin) {
445+
if (dataFileService.findCountByDatasetVersionId(version.getId()).intValue() > fileQueryMin) {
433446
// For large datasets, use a more efficient SQL query
434447
try (Stream<DataFileProxy> fileStream = getDataFileInfoForPermissionIndexing(version.getId())) {
435448

@@ -446,6 +459,7 @@ private void processDatasetVersionFiles(DatasetVersion version,
446459
}
447460
} else {
448461
// For smaller datasets, process files directly
462+
// We only call getFileMetadatas() in the case where we know they have already been loaded
449463
for (FileMetadata fmd : version.getFileMetadatas()) {
450464
DataFileProxy fileProxy = new DataFileProxy(fmd);
451465
filesToReindexAsBatch.add(fileProxy);

0 commit comments

Comments
 (0)