Skip to content

Commit e16abc2

Browse files
authored
Merge pull request #19162 from Yoast/PC-965-yoast-markers-break-html-of-content
PC-965 Fixes highlights when inline HTML tags are present in Classic editor
2 parents db44faa + af58ac5 commit e16abc2

7 files changed

Lines changed: 232 additions & 17 deletions

File tree

packages/js/src/decorator/tinyMCE.js

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import { markers } from "yoastseo";
1+
import { markers, languageProcessing } from "yoastseo";
22
import { forEach } from "lodash-es";
3-
import { languageProcessing } from "yoastseo";
43

54
var MARK_TAG = "yoastmark";
65

@@ -36,11 +35,28 @@ function markTinyMCE( editor, paper, marks ) {
3635
let html = editor.getContent();
3736
html = markers.removeMarks( html );
3837

38+
/*
39+
* Get the information whether we want to mark a specific part of the HTML. If we do, `fieldsToMark` should return an array with that information.
40+
* For example, [ "subehading" ] means that we want to apply the markings in subheadings only, and not the other parts.
41+
* `selectedHTML` is an array of the HTML parts that we want to apply the marking to.
42+
*/
3943
const { fieldsToMark, selectedHTML } = languageProcessing.getFieldsToMark( marks, html );
4044

4145
// Generate marked HTML.
4246
forEach( marks, function( mark ) {
47+
/*
48+
* Classic editor uses double quotes for HTML attribute values. However, Block editor uses single quotes for HTML tag attributes,
49+
* and that's why in `yoastseo`, we use single quotes for the attribute values when we create the marked object. As a result,
50+
* the replacement did not work, as the marks passed by `yoastseo` did not match anything in the original text.
51+
* This step is replacing the single quotes in the marked object output by `yoastseo` with double quotes.
52+
* This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page.
53+
*/
54+
mark._properties.marked = languageProcessing.replaceSingleQuotesInTags( mark._properties.marked );
55+
mark._properties.original = languageProcessing.replaceSingleQuotesInTags( mark._properties.original );
56+
57+
// Check if we want to mark only specific part of the HTML.
4358
if ( fieldsToMark.length > 0 ) {
59+
// Apply the marking to the selected HTML parts.
4460
selectedHTML.forEach( element => {
4561
const markedElement = mark.applyWithReplace( element );
4662
html = html.replace( element, markedElement );
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import replaceSingleQuotesInTags from "../../../../src/languageProcessing/helpers/html/replaceQuotes";
2+
3+
describe( "replace-quotes", function() {
4+
describe( "replaceSingleQuotesInTags", function() {
5+
it( "should return the same string when no single quotes are present", function() {
6+
expect( replaceSingleQuotesInTags( "This is a test" ) )
7+
.toEqual( "This is a test" );
8+
} );
9+
10+
it( "should return the same string when only double quotes in HTML attribute values are present", function() {
11+
expect( replaceSingleQuotesInTags( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" ) )
12+
.toEqual( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" );
13+
} );
14+
15+
it( "should not replace single quotes (or apostrophes) outside HTML tags", function() {
16+
expect( replaceSingleQuotesInTags( "This is a test, let's go!" ) )
17+
.toEqual( "This is a test, let's go!" );
18+
} );
19+
20+
it( "should replace the outer single quotes in HTML attribute values with double quotes", function() {
21+
expect( replaceSingleQuotesInTags( "<span style='color: red'>This</span> is a test" ) )
22+
.toEqual( "<span style=\"color: red\">This</span> is a test" );
23+
} );
24+
25+
it( "should not replace any inner single quotes in HTML attribute values", function() {
26+
expect( replaceSingleQuotesInTags( "<span data-attr=\"let's go, time's up\">This</span> is a test" ) )
27+
.toEqual( "<span data-attr=\"let's go, time's up\">This</span> is a test" );
28+
} );
29+
30+
it( "should replace the outer single quotes in multiple HTML attribute values with double quotes", function() {
31+
expect( replaceSingleQuotesInTags( "<yoastmark class='yoast-text-mark' style='color: blue'>This is a test</yoastmark>" ) )
32+
.toEqual( "<yoastmark class=\"yoast-text-mark\" style=\"color: blue\">This is a test</yoastmark>" );
33+
} );
34+
} );
35+
} );

packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,48 @@
1-
import { markWordsInSentences } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
1+
import { deConstructAnchor, markWordsInSentences, reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
22
import Mark from "../../../../src/values/Mark";
33
import matchWordCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord";
44

55
describe( "Adds Yoast marks to specific words in a sentence", function() {
6+
it( "should add Yoast marks to all instances of specified words in a sentence, except when there is an anchor," +
7+
" the marking should not be applied to the anchor tag attribute", function() {
8+
expect( markWordsInSentences(
9+
[ "picket", "tile" ],
10+
[ "Introducing Palisades Ceramic Picket Tile — the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
11+
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" ],
12+
"en_EN"
13+
) ).toEqual( [
14+
new Mark( {
15+
marked: "Introducing Palisades Ceramic <yoastmark class='yoast-text-mark'>Picket Tile</yoastmark> — the latest trend in " +
16+
"<a href=\"https://www.tileclub.com/" +
17+
"collections/ceramic-tile\" target=\"_blank\" rel=\"noopener\">ceramic " +
18+
"<yoastmark class='yoast-text-mark'>tile</yoastmark></a>!",
19+
original: "Introducing Palisades Ceramic Picket Tile — the latest trend in " +
20+
"<a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
21+
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" } ),
22+
]
23+
);
24+
} );
25+
it( "should add Yoast marks to all instances of specified words in a sentence, except when there are multiple anchors," +
26+
" the marking should not be applied to the anchor tag attribute", function() {
27+
expect( markWordsInSentences(
28+
[ "picket", "tile" ],
29+
[ "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\">Picket Tile</a> — " +
30+
"the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
31+
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" ],
32+
"en_EN"
33+
) ).toEqual( [
34+
new Mark( {
35+
marked: "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\"><yoastmark class='yoast-text-mark'>" +
36+
"Picket Tile</yoastmark></a> — the latest trend in " +
37+
"<a href=\"https://www.tileclub.com/" +
38+
"collections/ceramic-tile\" target=\"_blank\" rel=\"noopener\">ceramic " +
39+
"<yoastmark class='yoast-text-mark'>tile</yoastmark></a>!",
40+
original: "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\">Picket Tile</a> — " +
41+
"the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
42+
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" } ),
43+
]
44+
);
45+
} );
646
it( "should add Yoast marks to all instances of specified words in a sentence", function() {
747
expect( markWordsInSentences(
848
[ "turtle", "hamster" ],
@@ -73,7 +113,7 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
73113
new Mark( {
74114
marked: "<yoastmark class='yoast-text-mark'>小さい花の刺繍</yoastmark>しかし、それは在庫切れでしたマキシドレス。",
75115
original: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。" } ),
76-
]
116+
]
77117
);
78118
} );
79119

@@ -102,3 +142,30 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
102142
} );
103143
} );
104144

145+
describe( "test the deconstructAnchor and reconstructAnchor helper", () => {
146+
it( "correctly deconstructs and reconstructs an anchor", () => {
147+
const testAnchor = "<a href=\"https://yoast.com\">This is yoast.</a>";
148+
const deconstructedAnchor = deConstructAnchor( testAnchor );
149+
150+
expect( deconstructedAnchor ).toEqual( {
151+
openTag: "<a href=\"https://yoast.com\">",
152+
content: "This is yoast.",
153+
} );
154+
155+
const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
156+
expect( reconstructedAnchor ).toEqual( testAnchor );
157+
} );
158+
159+
it( "correctly deconstructs and reconstructs an anchor that contains html elements itself", () => {
160+
const testAnchor = "<a href=\"https://yoast.com\">This <i>is</i> <b>yoast</b>.</a>";
161+
const deconstructedAnchor = deConstructAnchor( testAnchor );
162+
163+
expect( deconstructedAnchor ).toEqual( {
164+
openTag: "<a href=\"https://yoast.com\">",
165+
content: "This <i>is</i> <b>yoast</b>.",
166+
} );
167+
168+
const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
169+
expect( reconstructedAnchor ).toEqual( testAnchor );
170+
} );
171+
} );

packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { getSubheadings } from "./getSubheadings";
77
* @param {array} marks The array of mark objects.
88
* @param {string} html The html of the page where we want to apply the marking to.
99
*
10-
* @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking tp.
10+
* @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking to.
1111
*/
1212
export function getFieldsToMark( marks, html ) {
1313
const fieldsToMark = uniq( flatten( marks.map( mark => {
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/**
2+
* Replaces single quotes around HTML attribute values with double quotes.
3+
* Double quotes are the standard, but we convert these to single quotes when parsing the HTML in `yoastseo` package.
4+
* Here, we change them back to double quotes so by parsing the HTML and then outputting it again.
5+
*
6+
* @param {string} str The input string.
7+
*
8+
* @returns {string} The string with single quotes around HTML attributes replaced with double quotes.
9+
*/
10+
export default function( str ) {
11+
const element = document.createElement( "body" );
12+
element.innerHTML = str;
13+
return element.innerHTML;
14+
}

packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js

Lines changed: 93 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,109 @@
1-
import matchWords from "../match/matchTextWithArray";
2-
import arrayToRegex from "../regex/createRegexFromArray";
1+
import { escapeRegExp } from "lodash-es";
32
import addMark from "../../../markers/addMarkSingleWord";
43
import Mark from "../../../values/Mark";
5-
import { escapeRegExp } from "lodash-es";
4+
import getAnchorsFromText from "../link/getAnchorsFromText";
5+
import matchWords from "../match/matchTextWithArray";
6+
import arrayToRegex from "../regex/createRegexFromArray";
7+
8+
// Regex to deconstruct an anchor into open tag, content and close tag.
9+
const anchorDeconstructionRegex = /(<a[\s]+[^>]+>)(.+?)(<\/a>)/;
10+
11+
/**
12+
* Deconstructs an anchor to the opening tag and the content. The content is the anchor text.
13+
* We don't return the closing tag since the value would always be the same, i.e. </a>.
14+
*
15+
* @param {string} anchor An anchor of the shape <a ...>...</a>.
16+
*
17+
* @returns {object} An object containing the opening tag and the content.
18+
*/
19+
export const deConstructAnchor = function( anchor ) {
20+
// The const array mirrors the anchorDeconstructionRegex, using a comma to access the first element without a name.
21+
const [ , openTag, content ] = anchor.match( anchorDeconstructionRegex );
22+
return {
23+
openTag: openTag,
24+
content: content,
25+
};
26+
};
27+
28+
/**
29+
* Reconstructs an anchor from an openTag, the content, and the closing tag.
30+
*
31+
* @param {string} openTag The opening tag of the anchor. Must be of the shape <a ...>.
32+
* @param {string} content The text of the anchor.
33+
*
34+
* @returns {string} An anchor.
35+
*/
36+
export const reConstructAnchor = function( openTag, content ) {
37+
return `${openTag}${content}</a>`;
38+
};
39+
40+
41+
/**
42+
* Gets the anchors and marks the anchors' text if the words are found in it.
43+
*
44+
* @param {string} sentence The sentence to retrieve the anchors from.
45+
* @param {RegExp} wordsRegex The regex of the words.
46+
*
47+
* @returns {Object} The anchors and the marked anchors.
48+
*/
49+
const getMarkedAnchors = function( sentence, wordsRegex ) {
50+
// Retrieve the anchors.
51+
const anchors = getAnchorsFromText( sentence );
52+
// For every anchor, apply the markings only to the anchor tag.
53+
const markedAnchors = anchors.map( anchor => {
54+
// Retrieve the open tag and the content/anchor text.
55+
const { openTag, content } = deConstructAnchor( anchor );
56+
57+
// Apply the marking to the anchor text if there is a match.
58+
const markedAnchorText = content.replace( wordsRegex, ( x ) => addMark( x ) );
59+
60+
// Create a new anchor tag with a (marked) anchor text.
61+
return reConstructAnchor( openTag, markedAnchorText );
62+
} );
63+
64+
return { anchors, markedAnchors };
65+
};
666

767
/**
868
* Adds marks to a sentence and merges marks if those are only separated by a space
969
* (e.g., if highlighting words "ballet" and "shoes" in a sentence "I have a lot of ballet shoes and other paraphernalia."
1070
* the marks will be put around "ballet shoes" together, not "`ballet` `shoes`".)
1171
*
1272
* @param {string} sentence The sentence to mark words in.
13-
* @param {[string]} topicFoundInSentence The words to mark in the sentence.
73+
* @param {[string]} wordsFoundInSentence The words to mark in the sentence.
1474
* @param {function} matchWordCustomHelper The language-specific helper function to match word in text.
1575
*
1676
* @returns {string} The sentence with marks.
1777
*/
18-
export const collectMarkingsInSentence = function( sentence, topicFoundInSentence, matchWordCustomHelper ) {
19-
topicFoundInSentence = topicFoundInSentence.map( word => escapeRegExp( word ) );
78+
export const collectMarkingsInSentence = function( sentence, wordsFoundInSentence, matchWordCustomHelper ) {
79+
wordsFoundInSentence = wordsFoundInSentence.map( word => escapeRegExp( word ) );
2080
// If a language has a custom helper to match words, we disable the word boundary when creating the regex.
21-
const topicRegex = matchWordCustomHelper ? arrayToRegex( topicFoundInSentence, true ) : arrayToRegex( topicFoundInSentence );
22-
const markup = sentence.replace( topicRegex, function( x ) {
81+
const wordsRegex = matchWordCustomHelper ? arrayToRegex( wordsFoundInSentence, true ) : arrayToRegex( wordsFoundInSentence );
82+
83+
// Retrieve the anchors and mark the anchors' text if the words are found in the anchors' text.
84+
const { anchors, markedAnchors } = getMarkedAnchors( sentence, wordsRegex );
85+
86+
let markup = sentence.replace( wordsRegex, function( x ) {
2387
return addMark( x );
2488
} );
2589

90+
/**
91+
* In 'markup', we apply the markings also inside the anchor's attribute if there is a match, on top of
92+
* marking the anchor's text.
93+
* The step below is to replace the incorrectly marked anchors with the marked anchors that we want:
94+
* where the markings are only applied in the anchor's text.
95+
*/
96+
if ( anchors.length > 0 ) {
97+
const markupAnchors = getAnchorsFromText( markup );
98+
for ( let i = 0; i < markupAnchors.length; i++ ) {
99+
markup = markup.replace( markupAnchors[ i ], markedAnchors[ i ] );
100+
}
101+
}
102+
103+
/*
104+
* If two marks are separated by only a space, remove the closing tag of the first mark and the opening tag of the
105+
* second mark so that the two marks can be combined into one.
106+
*/
26107
return ( markup.replace( new RegExp( "</yoastmark> <yoastmark class='yoast-text-mark'>", "ig" ), " " ) );
27108
};
28109

@@ -37,16 +118,16 @@ export const collectMarkingsInSentence = function( sentence, topicFoundInSentenc
37118
* @returns {[string]} The sentences with marks.
38119
*/
39120
export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) {
40-
let topicFoundInSentence = [];
121+
let wordsFoundInSentence = [];
41122
let markings = [];
42123

43124
sentences.forEach( function( sentence ) {
44-
topicFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;
125+
wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;
45126

46-
if ( topicFoundInSentence.length > 0 ) {
127+
if ( wordsFoundInSentence.length > 0 ) {
47128
markings = markings.concat( new Mark( {
48129
original: sentence,
49-
marked: collectMarkingsInSentence( sentence, topicFoundInSentence, matchWordCustomHelper ),
130+
marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ),
50131
} ) );
51132
}
52133
} );

packages/yoastseo/src/languageProcessing/index.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import { stripFullTags as stripHTMLTags } from "./helpers/sanitize/stripHTMLTags
2626
import sanitizeString from "./helpers/sanitize/sanitizeString";
2727
import { unifyAllSpaces } from "./helpers/sanitize/unifyWhitespace";
2828
import removePunctuation from "./helpers/sanitize/removePunctuation";
29+
import replaceSingleQuotesInTags from "./helpers/html/replaceQuotes";
2930
import countMetaDescriptionLength from "./helpers/word/countMetaDescriptionLength";
3031
import getLanguage from "./helpers/language/getLanguage";
3132
import getSentences from "./helpers/sentence/getSentences";
@@ -65,4 +66,5 @@ export {
6566
getSentences,
6667
getFieldsToMark,
6768
unifyAllSpaces,
69+
replaceSingleQuotesInTags,
6870
};

0 commit comments

Comments
 (0)