Skip to content

Commit aa16779

Browse files
angelozerrdatho7561
authored andcommitted
Improve formatting memory.
Signed-off-by: azerr <azerr@redhat.com>
1 parent 7512bc2 commit aa16779

File tree

5 files changed

+97
-30
lines changed

5 files changed

+97
-30
lines changed

org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/services/XMLFoldings.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,12 @@ public List<FoldingRange> getFoldingRanges(TextDocument document, XMLFoldingSett
6565
CancelChecker cancelChecker) {
6666
Scanner scanner = XMLScanner.createScanner(document.getText());
6767
TokenType token = scanner.scan();
68-
List<FoldingRange> ranges = new ArrayList<>();
68+
// Pre-allocate capacity based on document size (estimate: 1 folding per 500 chars)
69+
int estimatedCapacity = Math.min(document.getText().length() / 500, 1000);
70+
List<FoldingRange> ranges = new ArrayList<>(estimatedCapacity);
6971

70-
List<TagInfo> stack = new ArrayList<>();
72+
// Pre-allocate stack capacity (estimate: max nesting depth of 50)
73+
List<TagInfo> stack = new ArrayList<>(50);
7174
String lastTagName = null;
7275
int prevStart = -1;
7376

org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/services/XMLFormatter.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,22 @@
1212
*/
1313
package org.eclipse.lemminx.services;
1414

15+
import static org.eclipse.lemminx.utils.TextEditUtils.applyEdits;
16+
1517
import java.util.Collection;
1618
import java.util.List;
1719
import java.util.logging.Level;
1820
import java.util.logging.Logger;
1921

2022
import org.eclipse.lemminx.commons.BadLocationException;
23+
import org.eclipse.lemminx.commons.TextDocument;
2124
import org.eclipse.lemminx.dom.DOMDocument;
2225
import org.eclipse.lemminx.services.extensions.XMLExtensionsRegistry;
2326
import org.eclipse.lemminx.services.extensions.format.IFormatterParticipant;
2427
import org.eclipse.lemminx.services.format.XMLFormatterDocumentOld;
2528
import org.eclipse.lemminx.services.format.XMLFormatterDocument;
2629
import org.eclipse.lemminx.settings.SharedSettings;
30+
import org.eclipse.lsp4j.Position;
2731
import org.eclipse.lsp4j.Range;
2832
import org.eclipse.lsp4j.TextEdit;
2933

@@ -58,12 +62,50 @@ public List<? extends TextEdit> format(DOMDocument xmlDocument, Range range, Sha
5862
}
5963
XMLFormatterDocument formatterDocument = new XMLFormatterDocument(xmlDocument, range,
6064
sharedSettings, getFormatterParticipants());
61-
return formatterDocument.format();
65+
List<? extends TextEdit> result = formatterDocument.format();
66+
67+
// For large files, merge all TextEdits into a single one to avoid OutOfMemory
68+
// This is more memory efficient as we don't keep thousands of TextEdit objects
69+
if (shouldMergeEdits(result, xmlDocument)) {
70+
String formatted = applyEdits(xmlDocument.getTextDocument(), result);
71+
Range editRange = range != null ? range : getFullDocumentRange(xmlDocument);
72+
return List.of(new TextEdit(editRange, formatted));
73+
}
74+
return result;
6275
} catch (BadLocationException e) {
6376
LOGGER.log(Level.SEVERE, "Formatting failed due to BadLocation", e);
6477
}
6578
return null;
6679
}
80+
81+
/**
82+
* Determines if TextEdits should be merged into a single edit.
83+
* Merging is beneficial for large files to reduce memory consumption.
84+
*
85+
* @param edits the list of text edits
86+
* @param xmlDocument the XML document
87+
* @return true if edits should be merged
88+
*/
89+
private boolean shouldMergeEdits(List<? extends TextEdit> edits, DOMDocument xmlDocument) {
90+
// Merge if there are many edits (> 1000) or if the document is large (> 100KB)
91+
int editCount = edits != null ? edits.size() : 0;
92+
int documentSize = xmlDocument.getTextDocument().getText().length();
93+
return editCount > 1000 || documentSize > 100_000;
94+
}
95+
96+
/**
97+
* Returns the full document range.
98+
*
99+
* @param xmlDocument the XML document
100+
* @return the full document range
101+
* @throws BadLocationException if position calculation fails
102+
*/
103+
private Range getFullDocumentRange(DOMDocument xmlDocument) throws BadLocationException {
104+
TextDocument textDocument = xmlDocument.getTextDocument();
105+
Position start = new Position(0, 0);
106+
Position end = textDocument.positionAt(textDocument.getText().length());
107+
return new Range(start, end);
108+
}
67109

68110
/**
69111
* Returns list of {@link IFormatterParticipant}.

org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/services/XMLSymbolsProvider.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
import java.util.ArrayList;
1616
import java.util.Collection;
1717
import java.util.Collections;
18+
import java.util.HashSet;
1819
import java.util.List;
20+
import java.util.Set;
1921
import java.util.concurrent.CancellationException;
2022
import java.util.concurrent.atomic.AtomicLong;
2123
import java.util.logging.Level;
@@ -136,7 +138,9 @@ private void findSymbolInformations(DOMNode node, String container, List<SymbolI
136138
boolean collectAttributes = hasFilterForAttr && node.hasAttributes();
137139
if (collectAttributes) {
138140
// Collect attributes from the DOM element
139-
List<DOMNode> attrToIgnore = getFilteredNodeAttributes(node, filter, hasFilterForAttr);
141+
List<DOMNode> attrToIgnoreList = getFilteredNodeAttributes(node, filter, hasFilterForAttr);
142+
// Convert to HashSet for O(1) lookup instead of O(n) with List.contains()
143+
Set<DOMNode> attrToIgnore = attrToIgnoreList.isEmpty() ? Collections.emptySet() : new HashSet<>(attrToIgnoreList);
140144
for (DOMAttr attr : node.getAttributeNodes()) {
141145
findSymbolInformations(attr, containerName, symbols, attrToIgnore.contains(attr), filter, hasFilterForAttr,
142146
cancelChecker);
@@ -172,7 +176,7 @@ public DocumentSymbolsResult findDocumentSymbols(DOMDocument xmlDocument, XMLSym
172176
// Process default symbol providers
173177
boolean isDTD = xmlDocument.isDTD();
174178
boolean hasFilterForAttr = filter.hasFilterFor(MatcherType.ATTRIBUTE);
175-
List<DOMNode> nodesToIgnore = new ArrayList<>();
179+
Set<DOMNode> nodesToIgnore = new HashSet<>();
176180
xmlDocument.getRoots().forEach(node -> {
177181
try {
178182
if ((node.isDoctype() && isDTD)) {
@@ -190,7 +194,7 @@ public DocumentSymbolsResult findDocumentSymbols(DOMDocument xmlDocument, XMLSym
190194
return symbols;
191195
}
192196

193-
private void findDocumentSymbols(DOMNode node, DocumentSymbolsResult symbols, List<DOMNode> nodesToIgnore,
197+
private void findDocumentSymbols(DOMNode node, DocumentSymbolsResult symbols, Set<DOMNode> nodesToIgnore,
194198
XMLSymbolFilter filter, boolean hasFilterForAttr, CancelChecker cancelChecker) throws BadLocationException {
195199
if (!isNodeSymbol(node, filter)) {
196200
return;
@@ -223,7 +227,9 @@ private void findDocumentSymbols(DOMNode node, DocumentSymbolsResult symbols, Li
223227
if (node.isElement()) {
224228
if (collectAttributes) {
225229
// Collect attributes from the DOM element
226-
List<DOMNode> attrToIgnore = getFilteredNodeAttributes(node, filter, hasFilterForAttr);
230+
List<DOMNode> attrToIgnoreList = getFilteredNodeAttributes(node, filter, hasFilterForAttr);
231+
// Convert to HashSet for O(1) lookup instead of O(n) with List.contains()
232+
Set<DOMNode> attrToIgnore = attrToIgnoreList.isEmpty() ? Collections.emptySet() : new HashSet<>(attrToIgnoreList);
227233
for (DOMAttr attr : node.getAttributeNodes()) {
228234
findDocumentSymbols(attr, childrenSymbols, attrToIgnore, filter, hasFilterForAttr, cancelChecker);
229235
}

org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/services/format/XMLFormatterDocument.java

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ public class XMLFormatterDocument {
6464
private final TextDocument textDocument;
6565
private final String lineDelimiter;
6666
private final SharedSettings sharedSettings;
67+
68+
// Reusable StringBuilder for indentation to reduce object allocation
69+
private final StringBuilder indentBuilder;
6770

6871
private final DOMProcessingInstructionFormatter processingInstructionFormatter;
6972

@@ -114,6 +117,8 @@ public XMLFormatterDocument(DOMDocument xmlDocument, Range range, SharedSettings
114117
this.commentFormatter = new DOMCommentFormatter(this);
115118
this.cDATAFormatter = new DOMCDATAFormatter(this);
116119
this.formattingContext = new HashMap<>();
120+
// Pre-allocate reusable StringBuilder for indentation (max reasonable indent: 100 levels * 4 spaces)
121+
this.indentBuilder = new StringBuilder(400);
117122
}
118123

119124
private static String computeLineDelimiter(TextDocument textDocument) {
@@ -138,7 +143,10 @@ public List<? extends TextEdit> format() throws BadLocationException {
138143
}
139144

140145
public List<? extends TextEdit> format(DOMDocument document, int start, int end) {
141-
List<TextEdit> edits = new ArrayList<>();
146+
// Pre-allocate list capacity based on document size to reduce reallocations
147+
// Estimate: 1 edit per 100 characters for typical XML formatting
148+
int estimatedCapacity = Math.min(textDocument.getText().length() / 100, 10000);
149+
List<TextEdit> edits = new ArrayList<>(estimatedCapacity);
142150

143151
// get initial document region
144152
DOMNode currentDOMNode = getDOMNodeToFormat(document, start, end);
@@ -654,21 +662,22 @@ public boolean shouldCollapseEmptyElement(DOMElement element, SharedSettings sha
654662
}
655663

656664
private String getIndentSpaces(int level, boolean addLineSeparator) {
657-
StringBuilder spaces = new StringBuilder();
665+
// Reuse StringBuilder to avoid object allocation
666+
indentBuilder.setLength(0);
658667
if (addLineSeparator) {
659-
spaces.append(lineDelimiter);
668+
indentBuilder.append(lineDelimiter);
660669
}
661670

662671
for (int i = 0; i < level; i++) {
663672
if (isInsertSpaces()) {
664673
for (int j = 0; j < getTabSize(); j++) {
665-
spaces.append(" ");
674+
indentBuilder.append(" ");
666675
}
667676
} else {
668-
spaces.append("\t");
677+
indentBuilder.append("\t");
669678
}
670679
}
671-
return spaces.toString();
680+
return indentBuilder.toString();
672681
}
673682

674683
/**
@@ -682,46 +691,48 @@ private String getIndentSpaces(int level, boolean addLineSeparator) {
682691
* new lines.
683692
*/
684693
private String getIndentSpacesWithMultiNewLines(int level, int newLineCount) {
685-
StringBuilder spaces = new StringBuilder();
694+
// Reuse StringBuilder to avoid object allocation
695+
indentBuilder.setLength(0);
686696
while (newLineCount != 0) {
687-
spaces.append(lineDelimiter);
697+
indentBuilder.append(lineDelimiter);
688698
newLineCount--;
689699
}
690700

691701
for (int i = 0; i < level; i++) {
692702
if (isInsertSpaces()) {
693703
for (int j = 0; j < getTabSize(); j++) {
694-
spaces.append(" ");
704+
indentBuilder.append(" ");
695705
}
696706
} else {
697-
spaces.append("\t");
707+
indentBuilder.append("\t");
698708
}
699709
}
700-
return spaces.toString();
710+
return indentBuilder.toString();
701711
}
702712

703713
private String getIndentSpacesWithOffsetSpaces(int spaceCount, boolean addLineSeparator) {
704-
StringBuilder spaces = new StringBuilder();
714+
// Reuse StringBuilder to avoid object allocation
715+
indentBuilder.setLength(0);
705716
if (addLineSeparator) {
706-
spaces.append(lineDelimiter);
717+
indentBuilder.append(lineDelimiter);
707718
}
708719
int spaceOffset = spaceCount % getTabSize();
709720

710721
for (int i = 0; i < spaceCount / getTabSize(); i++) {
711722
if (isInsertSpaces()) {
712723
for (int j = 0; j < getTabSize(); j++) {
713-
spaces.append(" ");
724+
indentBuilder.append(" ");
714725
}
715726
} else {
716-
spaces.append("\t");
727+
indentBuilder.append("\t");
717728
}
718729
}
719730

720731
for (int i = 0; i < spaceOffset; i++) {
721-
spaces.append(" ");
732+
indentBuilder.append(" ");
722733
}
723734

724-
return spaces.toString();
735+
return indentBuilder.toString();
725736
}
726737

727738
private void trimFinalNewlines(boolean insertFinalNewline, List<TextEdit> edits) {

org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/utils/TextEditUtils.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,23 +110,28 @@ public static String applyEdits(TextDocument document, List<? extends TextEdit>
110110
}
111111
return diff;
112112
});
113+
114+
// Use StringBuilder for better memory efficiency, especially for large files
115+
// Pre-allocate capacity based on original text size to minimize reallocations
116+
StringBuilder result = new StringBuilder(text.length());
113117
int lastModifiedOffset = 0;
114-
List<String> spans = new ArrayList<>();
118+
115119
for (TextEdit e : edits) {
116120
int startOffset = document.offsetAt(e.getRange().getStart());
117121
if (startOffset < lastModifiedOffset) {
118122
throw new Error("Overlapping edit");
119123
} else if (startOffset > lastModifiedOffset) {
120-
spans.add(text.substring(lastModifiedOffset, startOffset));
124+
// Append unchanged text between edits
125+
result.append(text, lastModifiedOffset, startOffset);
121126
}
122127
if (e.getNewText() != null) {
123-
spans.add(e.getNewText());
128+
result.append(e.getNewText());
124129
}
125130
lastModifiedOffset = document.offsetAt(e.getRange().getEnd());
126131
}
127-
spans.add(text.substring(lastModifiedOffset));
128-
return spans.stream() //
129-
.collect(Collectors.joining());
132+
// Append remaining text after last edit
133+
result.append(text, lastModifiedOffset, text.length());
134+
return result.toString();
130135
}
131136

132137
/**

0 commit comments

Comments
 (0)