Skip to content

Commit 4bc9c67

Browse files
authored
Merge pull request #12093 from QualitativeDataRepository/indexingperf2
Indexing Simplifications
2 parents 86796a1 + 2bdcb05 commit 4bc9c67

3 files changed

Lines changed: 180 additions & 104 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/search/SearchPermissionsServiceBean.java

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -92,52 +92,6 @@ public List<String> findDvObjectPerms(DvObject dvObject) {
9292
return permStrings;
9393
}
9494

95-
public Map<DatasetVersion.VersionState, Boolean> getDesiredCards(Dataset dataset) {
96-
Map<DatasetVersion.VersionState, Boolean> desiredCards = new LinkedHashMap<>();
97-
DatasetVersion latestVersion = dataset.getLatestVersion();
98-
DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState();
99-
DatasetVersion releasedVersion = dataset.getReleasedVersion();
100-
boolean atLeastOnePublishedVersion = false;
101-
if (releasedVersion != null) {
102-
atLeastOnePublishedVersion = true;
103-
} else {
104-
atLeastOnePublishedVersion = false;
105-
}
106-
107-
if (atLeastOnePublishedVersion == false) {
108-
if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
109-
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
110-
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
111-
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
112-
} else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
113-
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true);
114-
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
115-
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
116-
} else {
117-
String msg = "No-op. Unexpected condition reached: There is no published version and the latest published version is neither " + DatasetVersion.VersionState.DRAFT + " nor " + DatasetVersion.VersionState.DEACCESSIONED + ". Its state is " + latestVersionState + ".";
118-
logger.info(msg);
119-
}
120-
} else if (atLeastOnePublishedVersion == true) {
121-
if (latestVersionState.equals(DatasetVersion.VersionState.RELEASED)
122-
|| latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
123-
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
124-
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
125-
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
126-
} else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
127-
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
128-
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
129-
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
130-
} else {
131-
String msg = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft";
132-
logger.info(msg);
133-
}
134-
} else {
135-
String msg = "No-op. Unexpected condition reached: Has a version been published or not?";
136-
logger.info(msg);
137-
}
138-
return desiredCards;
139-
}
140-
14195
private boolean hasBeenPublished(Dataverse dataverse) {
14296
return dataverse.isReleased();
14397
}

src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java

Lines changed: 40 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,14 @@ public List<DvObjectSolrDoc> determineSolrDocs(DvObject dvObject) {
8686
solrDocs.addAll(datasetSolrDocs);
8787
} else if (dvObject.isInstanceofDataFile()) {
8888
DataFile datafile = (DataFile) dvObject;
89-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(datafile.getOwner());
9089
Set<DatasetVersion> datasetVersions = datasetVersionsToBuildCardsFor(datafile.getOwner());
9190
for (DatasetVersion version : datasetVersions) {
92-
if(desiredCards.containsKey(version.getVersionState()) && desiredCards.get(version.getVersionState()) && datafile.isInDatasetVersion(version)) {
93-
List<String> cachedPerms = searchPermissionsService.findDatasetVersionPerms(version);
94-
String solrIdEnd = getDatasetOrDataFileSolrEnding(version.getVersionState());
95-
Long versionId = version.getId();
96-
DvObjectSolrDoc fileSolrDoc = constructDatafileSolrDoc(new DataFileProxy(datafile.getFileMetadata()), cachedPerms, versionId, solrIdEnd);
97-
solrDocs.add(fileSolrDoc);
91+
if (datafile.isInDatasetVersion(version)) {
92+
List<String> cachedPerms = searchPermissionsService.findDatasetVersionPerms(version);
93+
String solrIdEnd = getDatasetOrDataFileSolrEnding(version.getVersionState());
94+
Long versionId = version.getId();
95+
DvObjectSolrDoc fileSolrDoc = constructDatafileSolrDoc(new DataFileProxy(datafile.getFileMetadata()), cachedPerms, versionId, solrIdEnd);
96+
solrDocs.add(fileSolrDoc);
9897
}
9998
}
10099
} else {
@@ -136,13 +135,9 @@ private DvObjectSolrDoc constructDataverseSolrDoc(Dataverse dataverse) {
136135
private List<DvObjectSolrDoc> constructDatasetSolrDocs(Dataset dataset) {
137136
List<DvObjectSolrDoc> emptyList = new ArrayList<>();
138137
List<DvObjectSolrDoc> solrDocs = emptyList;
139-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
140138
for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) {
141-
boolean cardShouldExist = desiredCards.get(version.getVersionState());
142-
if (cardShouldExist) {
143-
DvObjectSolrDoc datasetSolrDoc = makeDatasetSolrDoc(version);
144-
solrDocs.add(datasetSolrDoc);
145-
}
139+
DvObjectSolrDoc datasetSolrDoc = makeDatasetSolrDoc(version);
140+
solrDocs.add(datasetSolrDoc);
146141
}
147142
return solrDocs;
148143
}
@@ -161,39 +156,45 @@ private DvObjectSolrDoc constructDatafileSolrDoc(DataFileProxy fileProxy, List<S
161156

162157
private List<DvObjectSolrDoc> constructDatafileSolrDocsFromDataset(Dataset dataset) {
163158
List<DvObjectSolrDoc> datafileSolrDocs = new ArrayList<>();
164-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
165159
for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
166-
boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
167-
if (cardShouldExist) {
168-
List<String> perms = new ArrayList<>();
169-
if (datasetVersionFileIsAttachedTo.isReleased()) {
170-
perms.add(IndexServiceBean.getPublicGroupString());
171-
} else {
172-
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
173-
}
160+
List<String> perms = new ArrayList<>();
161+
if (datasetVersionFileIsAttachedTo.isReleased()) {
162+
perms.add(IndexServiceBean.getPublicGroupString());
163+
} else {
164+
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
165+
}
174166

175-
for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) {
176-
Long fileId = fileMetadata.getDataFile().getId();
177-
String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId;
178-
String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState());
179-
String solrId = solrIdStart + solrIdEnd;
180-
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms);
181-
logger.finest("adding fileid " + fileId);
182-
datafileSolrDocs.add(dataFileSolrDoc);
183-
}
167+
for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) {
168+
Long fileId = fileMetadata.getDataFile().getId();
169+
String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId;
170+
String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState());
171+
String solrId = solrIdStart + solrIdEnd;
172+
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms);
173+
logger.finest("adding fileid " + fileId);
174+
datafileSolrDocs.add(dataFileSolrDoc);
184175
}
185176
}
186177
return datafileSolrDocs;
187178
}
188179

180+
/** Find the versions to index. The overall logic is
181+
* If there is only one version, or no released version (all non-draft versions are deaccessioned)
182+
* then index it regardless of it's versionstate
183+
* If there are released versions
184+
* then index the latest released version and a draft version if one exists
185+
* Hence - the latest deaccessioned version is only indexed if there is no released version
186+
* @param dataset
187+
* @return the set of versions to build cards for
188+
*/
189189
private Set<DatasetVersion> datasetVersionsToBuildCardsFor(Dataset dataset) {
190190
Set<DatasetVersion> datasetVersions = new HashSet<>();
191191
DatasetVersion latest = dataset.getLatestVersion();
192-
if (latest != null) {
192+
DatasetVersion released = dataset.getReleasedVersion();
193+
if (latest != null && (released == null || latest.isDraft())) {
193194
datasetVersions.add(latest);
194195
}
195-
DatasetVersion released = dataset.getReleasedVersion();
196196
if (released != null) {
197+
//May be the same as the latest version - only one copy will be in the set in that case
197198
datasetVersions.add(released);
198199
}
199200
return datasetVersions;
@@ -377,17 +378,14 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
377378
* loaded (first case) and done only when needed for the second case.
378379
*
379380
**/
380-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
381381
List<DatasetVersion> versionsToIndex = new ArrayList<>();
382-
for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) {
383-
if (desiredCards.get(version.getVersionState())) {
384-
int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue();
382+
for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) {
383+
int fileCount = dataFileService.findCountByDatasetVersionId(version.getId()).intValue();
385384
if (fileCount <= fileQueryMin) {
386-
// IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
387-
version.getFileMetadatas().size();
388-
}
389-
versionsToIndex.add(version);
385+
// IMPORTANT: This triggers the loading of fileMetadatas within the current transaction
386+
version.getFileMetadatas().size();
390387
}
388+
versionsToIndex.add(version);
391389
}
392390

393391
// Process the dataset's files in a new transaction, passing the pre-loaded data
@@ -412,16 +410,12 @@ public void indexDatasetBatchInNewTransaction(List<Long> datasetIds, final int[]
412410
indexPermissionsForOneDvObject(dataset);
413411

414412
// Process files for this dataset
415-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
416-
417-
for (DatasetVersion version : versionsToReIndexPermissionsFor(dataset)) {
418-
if (desiredCards.get(version.getVersionState())) {
413+
for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) {
419414
processDatasetVersionFiles(version, fileCounter, fileQueryMin);
420415
}
421416
}
422417
}
423418
}
424-
}
425419

426420
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
427421
public void indexDatasetFilesInNewTransaction(List<DatasetVersion> versions, final int[] fileCounter, int fileQueryMin) {
@@ -500,18 +494,6 @@ private void reindexFilesInBatches(List<DataFileProxy> filesToReindexAsBatch, Li
500494
}
501495
}
502496

503-
private List<DatasetVersion> versionsToReIndexPermissionsFor(Dataset dataset) {
504-
List<DatasetVersion> versionsToReindexPermissionsFor = new ArrayList<>();
505-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
506-
for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) {
507-
boolean cardShouldExist = desiredCards.get(version.getVersionState());
508-
if (cardShouldExist) {
509-
versionsToReindexPermissionsFor.add(version);
510-
}
511-
}
512-
return versionsToReindexPermissionsFor;
513-
}
514-
515497
public IndexResponse deleteMultipleSolrIds(List<String> solrIdsToDelete) {
516498
if (solrIdsToDelete.isEmpty()) {
517499
return new IndexResponse("nothing to delete");
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
2+
package edu.harvard.iq.dataverse.search;
3+
4+
import edu.harvard.iq.dataverse.Dataset;
5+
import edu.harvard.iq.dataverse.DatasetVersion;
6+
import org.junit.jupiter.api.BeforeEach;
7+
import org.junit.jupiter.api.Test;
8+
import org.mockito.InjectMocks;
9+
import org.mockito.MockitoAnnotations;
10+
11+
import java.util.Set;
12+
13+
import static org.junit.jupiter.api.Assertions.*;
14+
import static org.mockito.Mockito.*;
15+
16+
public class SolrIndexServiceBeanTest {
17+
18+
@InjectMocks
19+
private SolrIndexServiceBean solrIndexServiceBean;
20+
21+
@BeforeEach
22+
public void setUp() {
23+
MockitoAnnotations.openMocks(this);
24+
}
25+
26+
@Test
27+
public void testDatasetVersionsToBuildCardsFor_OnlyDraft() {
28+
// Arrange
29+
Dataset dataset = mock(Dataset.class);
30+
DatasetVersion draftVersion = createMockVersion(1L, DatasetVersion.VersionState.DRAFT, true);
31+
32+
when(dataset.getLatestVersion()).thenReturn(draftVersion);
33+
when(dataset.getReleasedVersion()).thenReturn(null);
34+
35+
// Act
36+
Set<DatasetVersion> result = invokeDatasetVersionsToBuildCardsFor(dataset);
37+
38+
// Assert
39+
assertEquals(1, result.size());
40+
assertTrue(result.contains(draftVersion));
41+
}
42+
43+
@Test
44+
public void testDatasetVersionsToBuildCardsFor_OnlyDeaccessioned() {
45+
// Arrange
46+
Dataset dataset = mock(Dataset.class);
47+
DatasetVersion deaccessionedVersion = createMockVersion(1L, DatasetVersion.VersionState.DEACCESSIONED, false);
48+
49+
when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion);
50+
when(dataset.getReleasedVersion()).thenReturn(null);
51+
52+
// Act
53+
Set<DatasetVersion> result = invokeDatasetVersionsToBuildCardsFor(dataset);
54+
55+
// Assert
56+
assertEquals(1, result.size());
57+
assertTrue(result.contains(deaccessionedVersion));
58+
}
59+
60+
@Test
61+
public void testDatasetVersionsToBuildCardsFor_OnlyReleased() {
62+
// Arrange
63+
Dataset dataset = mock(Dataset.class);
64+
DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false);
65+
66+
when(dataset.getLatestVersion()).thenReturn(releasedVersion);
67+
when(dataset.getReleasedVersion()).thenReturn(releasedVersion);
68+
69+
// Act
70+
Set<DatasetVersion> result = invokeDatasetVersionsToBuildCardsFor(dataset);
71+
72+
// Assert
73+
assertEquals(1, result.size());
74+
assertTrue(result.contains(releasedVersion));
75+
}
76+
77+
@Test
78+
public void testDatasetVersionsToBuildCardsFor_ReleasedAndDraft() {
79+
// Arrange
80+
Dataset dataset = mock(Dataset.class);
81+
DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false);
82+
DatasetVersion draftVersion = createMockVersion(2L, DatasetVersion.VersionState.DRAFT, true);
83+
84+
when(dataset.getLatestVersion()).thenReturn(draftVersion);
85+
when(dataset.getReleasedVersion()).thenReturn(releasedVersion);
86+
87+
// Act
88+
Set<DatasetVersion> result = invokeDatasetVersionsToBuildCardsFor(dataset);
89+
90+
// Assert
91+
assertEquals(2, result.size());
92+
assertTrue(result.contains(releasedVersion));
93+
assertTrue(result.contains(draftVersion));
94+
}
95+
96+
@Test
97+
public void testDatasetVersionsToBuildCardsFor_ReleasedAndDeaccessioned() {
98+
// Arrange
99+
Dataset dataset = mock(Dataset.class);
100+
DatasetVersion releasedVersion = createMockVersion(1L, DatasetVersion.VersionState.RELEASED, false);
101+
DatasetVersion deaccessionedVersion = createMockVersion(2L, DatasetVersion.VersionState.DEACCESSIONED, false);
102+
103+
// Latest is deaccessioned, but there's a released version
104+
when(dataset.getLatestVersion()).thenReturn(deaccessionedVersion);
105+
when(dataset.getReleasedVersion()).thenReturn(releasedVersion);
106+
107+
// Act
108+
Set<DatasetVersion> result = invokeDatasetVersionsToBuildCardsFor(dataset);
109+
110+
// Assert
111+
// Should only return the released version, not the deaccessioned one
112+
assertEquals(1, result.size());
113+
assertTrue(result.contains(releasedVersion));
114+
assertFalse(result.contains(deaccessionedVersion));
115+
}
116+
117+
// Helper method to create mock DatasetVersion
118+
private DatasetVersion createMockVersion(Long id, DatasetVersion.VersionState state, boolean isDraft) {
119+
DatasetVersion version = mock(DatasetVersion.class);
120+
when(version.getId()).thenReturn(id);
121+
when(version.getVersionState()).thenReturn(state);
122+
when(version.isDraft()).thenReturn(isDraft);
123+
when(version.isReleased()).thenReturn(state == DatasetVersion.VersionState.RELEASED);
124+
when(version.isDeaccessioned()).thenReturn(state == DatasetVersion.VersionState.DEACCESSIONED);
125+
return version;
126+
}
127+
128+
// Helper method to invoke the private method using reflection
129+
@SuppressWarnings("unchecked")
130+
private Set<DatasetVersion> invokeDatasetVersionsToBuildCardsFor(Dataset dataset) {
131+
try {
132+
java.lang.reflect.Method method = SolrIndexServiceBean.class.getDeclaredMethod(
133+
"datasetVersionsToBuildCardsFor", Dataset.class);
134+
method.setAccessible(true);
135+
return (Set<DatasetVersion>) method.invoke(solrIndexServiceBean, dataset);
136+
} catch (Exception e) {
137+
throw new RuntimeException("Failed to invoke private method", e);
138+
}
139+
}
140+
}

0 commit comments

Comments
 (0)