IQSS · landreev · Jan 26, 2026 · Jan 8, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -1,6 +1,7 @@
 package edu.harvard.iq.dataverse.search;
 
 import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.DataFileServiceBean;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.DatasetServiceBean;
 import edu.harvard.iq.dataverse.DatasetVersion;
@@ -53,6 +54,8 @@ public class SolrIndexServiceBean {
     @EJB
     SearchPermissionsServiceBean searchPermissionsService;
     @EJB
+    DataFileServiceBean dataFileService;
+    @EJB
     DataverseServiceBean dataverseService;
     @EJB
     DatasetServiceBean datasetService;
@@ -365,14 +368,24 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
             indexPermissionsForOneDvObject(definitionPoint);
             numObjects++;
 
-            // Prepare the data needed for the new transaction.
-            // This ensures lazy-loaded collections are fetched here.
+            /**
+             * Prepare the data needed for the new transaction. For performance reasons, indexDatasetFilesInNewTransaction does not merge the dataset or versions into the new transaction (we only read info, there
+             * are no changes to write). However, there are two ways the code here is used. In one case, indexing content and permissions, the versions and fileMetadatas in them are already loaded. In the other
+             * case, indexing permissions only, the fileMetadatas are not yet loaded, and we may need them, but only if there are fewer than fileQueryMin. For each version that will get reindexed (at most two of
+             * them), the code below does a lightweight query to see how many fileMetadatas exist in it and, if it is equal to or below fileQueryMin, calls getFileMetadatas().size() to assure they are loaded
+             * (before we pass the version into a new transaction where it will be detached and fileMetadatas can't be loaded). Calling getFileMetadas.size() should be lightweight when the fileMetadatas are
+             * loaded (first case) and done only when needed for the second case.
+             * 
+             **/
             Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
             List<DatasetVersion> versionsToIndex = new ArrayList<>();
             for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) {
                 if (desiredCards.get(version.getVersionState())) {
-                    // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
-                    version.getFileMetadatas().size(); 
+                    int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue();
+                    if (fileCount >= fileQueryMin) {
+                        // IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
+                        version.getFileMetadatas().size();
+                    }
                     versionsToIndex.add(version);
                 }
             }
@@ -429,7 +442,7 @@ private void processDatasetVersionFiles(DatasetVersion version,
         // Process files in batches of 100
         int batchSize = 100;
 
-        if (version.getFileMetadatas().size() > fileQueryMin) {
+        if (dataFileService.findCountByDatasetVersionId(version.getId()).intValue() > fileQueryMin) {
             // For large datasets, use a more efficient SQL query
             try (Stream<DataFileProxy> fileStream = getDataFileInfoForPermissionIndexing(version.getId())) {
 
@@ -446,6 +459,7 @@ private void processDatasetVersionFiles(DatasetVersion version,
             }
         } else {
             // For smaller datasets, process files directly
+            // We only call getFileMetadatas() in the case where we know they have already been loaded
             for (FileMetadata fmd : version.getFileMetadatas()) {
                 DataFileProxy fileProxy = new DataFileProxy(fmd);
                 filesToReindexAsBatch.add(fileProxy);