@@ -214,13 +214,10 @@ public function sanitize($dirty)
214214 $ this ->elementReferenceResolver ->collect ();
215215 $ elementsToRemove = $ this ->elementReferenceResolver ->getElementsToRemove ();
216216
217- // Grab all the elements
218- $ allElements = $ this ->xmlDocument ->getElementsByTagName ("* " );
219-
220217 // remove doctype after node elements have been analyzed
221218 $ this ->removeDoctype ();
222219 // Start the cleaning proccess
223- $ this ->startClean ($ allElements , $ elementsToRemove );
220+ $ this ->startClean ($ this -> xmlDocument -> childNodes , $ elementsToRemove );
224221
225222 // Save cleaned XML to a variable
226223 if ($ this ->removeXMLTag ) {
@@ -316,33 +313,63 @@ protected function startClean(\DOMNodeList $elements, array $elementsToRemove)
316313 continue ;
317314 }
318315
319- // If the tag isn't in the whitelist, remove it and continue with next iteration
320- if (!in_array (strtolower ($ currentElement ->tagName ), $ this ->allowedTags )) {
321- $ currentElement ->parentNode ->removeChild ($ currentElement );
322- $ this ->xmlIssues [] = array (
323- 'message ' => 'Suspicious tag \'' . $ currentElement ->tagName . '\'' ,
324- 'line ' => $ currentElement ->getLineNo (),
325- );
326- continue ;
327- }
328-
329- $ this ->cleanHrefs ($ currentElement );
330-
331- $ this ->cleanXlinkHrefs ($ currentElement );
332-
333- $ this ->cleanAttributesOnWhitelist ($ currentElement );
334-
335- if (strtolower ($ currentElement ->tagName ) === 'use ' ) {
336- if ($ this ->isUseTagDirty ($ currentElement )
337- || $ this ->isUseTagExceedingThreshold ($ currentElement )
338- ) {
316+ if ($ currentElement instanceof \DOMElement) {
317+ // If the tag isn't in the whitelist, remove it and continue with next iteration
318+ if (!in_array (strtolower ($ currentElement ->tagName ), $ this ->allowedTags )) {
339319 $ currentElement ->parentNode ->removeChild ($ currentElement );
340320 $ this ->xmlIssues [] = array (
341- 'message ' => 'Suspicious \'' . $ currentElement ->tagName . '\'' ,
321+ 'message ' => 'Suspicious tag \'' . $ currentElement ->tagName . '\'' ,
342322 'line ' => $ currentElement ->getLineNo (),
343323 );
344324 continue ;
345325 }
326+
327+ $ this ->cleanHrefs ( $ currentElement );
328+
329+ $ this ->cleanXlinkHrefs ( $ currentElement );
330+
331+ $ this ->cleanAttributesOnWhitelist ($ currentElement );
332+
333+ if (strtolower ($ currentElement ->tagName ) === 'use ' ) {
334+ if ($ this ->isUseTagDirty ($ currentElement )
335+ || $ this ->isUseTagExceedingThreshold ($ currentElement )
336+ ) {
337+ $ currentElement ->parentNode ->removeChild ($ currentElement );
338+ $ this ->xmlIssues [] = array (
339+ 'message ' => 'Suspicious \'' . $ currentElement ->tagName . '\'' ,
340+ 'line ' => $ currentElement ->getLineNo (),
341+ );
342+ continue ;
343+ }
344+ }
345+
346+ // Strip out font elements that will break out of foreign content.
347+ if (strtolower ($ currentElement ->tagName ) === 'font ' ) {
348+ $ breaksOutOfForeignContent = false ;
349+ for ($ x = $ currentElement ->attributes ->length - 1 ; $ x >= 0 ; $ x --) {
350+ // get attribute name
351+ $ attrName = $ currentElement ->attributes ->item ( $ x )->name ;
352+
353+ if (in_array ($ attrName , ['face ' , 'color ' , 'size ' ])) {
354+ $ breaksOutOfForeignContent = true ;
355+ }
356+ }
357+
358+ if ($ breaksOutOfForeignContent ) {
359+ $ currentElement ->parentNode ->removeChild ($ currentElement );
360+ $ this ->xmlIssues [] = array (
361+ 'message ' => 'Suspicious tag \'' . $ currentElement ->tagName . '\'' ,
362+ 'line ' => $ currentElement ->getLineNo (),
363+ );
364+ continue ;
365+ }
366+ }
367+ }
368+
369+ $ this ->cleanUnsafeNodes ($ currentElement );
370+
371+ if ($ currentElement ->hasChildNodes ()) {
372+ $ this ->startClean ($ currentElement ->childNodes , $ elementsToRemove );
346373 }
347374 }
348375 }
@@ -627,4 +654,50 @@ public function setUseNestingLimit($limit)
627654 {
628655 $ this ->useNestingLimit = (int ) $ limit ;
629656 }
657+
658+ /**
659+ * Determines whether a node is safe or not.
660+ *
661+ * @param \DOMNode $node
662+ * @return bool
663+ */
664+ protected function isNodeSafe (\DOMNode $ node ) {
665+ $ safeNodes = [
666+ '#text ' ,
667+ ];
668+
669+ if (!in_array ($ node ->nodeName , $ safeNodes , true )) {
670+ return false ;
671+ }
672+
673+ return true ;
674+ }
675+
676+ /**
677+ * Remove nodes that are either invalid or malformed.
678+ *
679+ * @param \DOMNode $currentElement The current element.
680+ */
681+ protected function cleanUnsafeNodes (\DOMNode $ currentElement ) {
682+ // If the element doesn't have a tagname, remove it and continue with next iteration
683+ if (!property_exists ($ currentElement , 'tagName ' )) {
684+ if (!$ this ->isNodeSafe ($ currentElement )) {
685+ $ currentElement ->parentNode ->removeChild ($ currentElement );
686+ $ this ->xmlIssues [] = array (
687+ 'message ' => 'Suspicious node \'' . $ currentElement ->nodeName . '\'' ,
688+ 'line ' => $ currentElement ->getLineNo (),
689+ );
690+
691+ return ;
692+ }
693+ }
694+
695+ if ( $ currentElement ->childNodes && $ currentElement ->childNodes ->length > 0 ) {
696+ for ($ j = $ currentElement ->childNodes ->length - 1 ; $ j >= 0 ; $ j --) {
697+ /** @var \DOMElement $childElement */
698+ $ childElement = $ currentElement ->childNodes ->item ($ j );
699+ $ this ->cleanUnsafeNodes ($ childElement );
700+ }
701+ }
702+ }
630703}
0 commit comments