diff --git a/packages/js/src/decorator/tinyMCE.js b/packages/js/src/decorator/tinyMCE.js
index 65c38f97d55..c3f2b8b4b31 100644
--- a/packages/js/src/decorator/tinyMCE.js
+++ b/packages/js/src/decorator/tinyMCE.js
@@ -1,6 +1,5 @@
-import { markers } from "yoastseo";
+import { markers, languageProcessing } from "yoastseo";
import { forEach } from "lodash-es";
-import { languageProcessing } from "yoastseo";
var MARK_TAG = "yoastmark";
@@ -36,11 +35,28 @@ function markTinyMCE( editor, paper, marks ) {
let html = editor.getContent();
html = markers.removeMarks( html );
+ /*
+ * Get the information whether we want to mark a specific part of the HTML. If we do, `fieldsToMark` should return an array with that information.
+ * For example, [ "subehading" ] means that we want to apply the markings in subheadings only, and not the other parts.
+ * `selectedHTML` is an array of the HTML parts that we want to apply the marking to.
+ */
const { fieldsToMark, selectedHTML } = languageProcessing.getFieldsToMark( marks, html );
// Generate marked HTML.
forEach( marks, function( mark ) {
+ /*
+ * Classic editor uses double quotes for HTML attribute values. However, Block editor uses single quotes for HTML tag attributes,
+ * and that's why in `yoastseo`, we use single quotes for the attribute values when we create the marked object. As a result,
+ * the replacement did not work, as the marks passed by `yoastseo` did not match anything in the original text.
+ * This step is replacing the single quotes in the marked object output by `yoastseo` with double quotes.
+ * This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page.
+ */
+ mark._properties.marked = languageProcessing.replaceSingleQuotesInTags( mark._properties.marked );
+ mark._properties.original = languageProcessing.replaceSingleQuotesInTags( mark._properties.original );
+
+ // Check if we want to mark only specific part of the HTML.
if ( fieldsToMark.length > 0 ) {
+ // Apply the marking to the selected HTML parts.
selectedHTML.forEach( element => {
const markedElement = mark.applyWithReplace( element );
html = html.replace( element, markedElement );
diff --git a/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js
new file mode 100644
index 00000000000..a6545bc8b84
--- /dev/null
+++ b/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js
@@ -0,0 +1,35 @@
+import replaceSingleQuotesInTags from "../../../../src/languageProcessing/helpers/html/replaceQuotes";
+
+describe( "replace-quotes", function() {
+ describe( "replaceSingleQuotesInTags", function() {
+ it( "should return the same string when no single quotes are present", function() {
+ expect( replaceSingleQuotesInTags( "This is a test" ) )
+ .toEqual( "This is a test" );
+ } );
+
+ it( "should return the same string when only double quotes in HTML attribute values are present", function() {
+ expect( replaceSingleQuotesInTags( "This is a test" ) )
+ .toEqual( "This is a test" );
+ } );
+
+ it( "should not replace single quotes (or apostrophes) outside HTML tags", function() {
+ expect( replaceSingleQuotesInTags( "This is a test, let's go!" ) )
+ .toEqual( "This is a test, let's go!" );
+ } );
+
+ it( "should replace the outer single quotes in HTML attribute values with double quotes", function() {
+ expect( replaceSingleQuotesInTags( "This is a test" ) )
+ .toEqual( "This is a test" );
+ } );
+
+ it( "should not replace any inner single quotes in HTML attribute values", function() {
+ expect( replaceSingleQuotesInTags( "This is a test" ) )
+ .toEqual( "This is a test" );
+ } );
+
+ it( "should replace the outer single quotes in multiple HTML attribute values with double quotes", function() {
+ expect( replaceSingleQuotesInTags( "This is a test" ) )
+ .toEqual( "This is a test" );
+ } );
+ } );
+} );
diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js
index caf3ae18664..1fc5049810e 100644
--- a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js
@@ -1,8 +1,48 @@
-import { markWordsInSentences } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
+import { deConstructAnchor, markWordsInSentences, reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
import Mark from "../../../../src/values/Mark";
import matchWordCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord";
describe( "Adds Yoast marks to specific words in a sentence", function() {
+ it( "should add Yoast marks to all instances of specified words in a sentence, except when there is an anchor," +
+ " the marking should not be applied to the anchor tag attribute", function() {
+ expect( markWordsInSentences(
+ [ "picket", "tile" ],
+ [ "Introducing Palisades Ceramic Picket Tile — the latest trend in ceramic tile!" ],
+ "en_EN"
+ ) ).toEqual( [
+ new Mark( {
+ marked: "Introducing Palisades Ceramic Picket Tile — the latest trend in " +
+ "ceramic " +
+ "tile!",
+ original: "Introducing Palisades Ceramic Picket Tile — the latest trend in " +
+ "ceramic tile!" } ),
+ ]
+ );
+ } );
+ it( "should add Yoast marks to all instances of specified words in a sentence, except when there are multiple anchors," +
+ " the marking should not be applied to the anchor tag attribute", function() {
+ expect( markWordsInSentences(
+ [ "picket", "tile" ],
+ [ "Introducing Palisades Ceramic Picket Tile — " +
+ "the latest trend in ceramic tile!" ],
+ "en_EN"
+ ) ).toEqual( [
+ new Mark( {
+ marked: "Introducing Palisades Ceramic " +
+ "Picket Tile — the latest trend in " +
+ "ceramic " +
+ "tile!",
+ original: "Introducing Palisades Ceramic Picket Tile — " +
+ "the latest trend in ceramic tile!" } ),
+ ]
+ );
+ } );
it( "should add Yoast marks to all instances of specified words in a sentence", function() {
expect( markWordsInSentences(
[ "turtle", "hamster" ],
@@ -73,7 +113,7 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
new Mark( {
marked: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。",
original: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。" } ),
- ]
+ ]
);
} );
@@ -102,3 +142,30 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
} );
} );
+describe( "test the deconstructAnchor and reconstructAnchor helper", () => {
+ it( "correctly deconstructs and reconstructs an anchor", () => {
+ const testAnchor = "This is yoast.";
+ const deconstructedAnchor = deConstructAnchor( testAnchor );
+
+ expect( deconstructedAnchor ).toEqual( {
+ openTag: "",
+ content: "This is yoast.",
+ } );
+
+ const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
+ expect( reconstructedAnchor ).toEqual( testAnchor );
+ } );
+
+ it( "correctly deconstructs and reconstructs an anchor that contains html elements itself", () => {
+ const testAnchor = "This is yoast.";
+ const deconstructedAnchor = deConstructAnchor( testAnchor );
+
+ expect( deconstructedAnchor ).toEqual( {
+ openTag: "",
+ content: "This is yoast.",
+ } );
+
+ const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
+ expect( reconstructedAnchor ).toEqual( testAnchor );
+ } );
+} );
diff --git a/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js b/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js
index be770388e95..04783418366 100644
--- a/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js
+++ b/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js
@@ -7,7 +7,7 @@ import { getSubheadings } from "./getSubheadings";
* @param {array} marks The array of mark objects.
* @param {string} html The html of the page where we want to apply the marking to.
*
- * @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking tp.
+ * @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking to.
*/
export function getFieldsToMark( marks, html ) {
const fieldsToMark = uniq( flatten( marks.map( mark => {
diff --git a/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js b/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js
new file mode 100644
index 00000000000..dffe809f48d
--- /dev/null
+++ b/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js
@@ -0,0 +1,14 @@
+/**
+ * Replaces single quotes around HTML attribute values with double quotes.
+ * Double quotes are the standard, but we convert these to single quotes when parsing the HTML in `yoastseo` package.
+ * Here, we change them back to double quotes so by parsing the HTML and then outputting it again.
+ *
+ * @param {string} str The input string.
+ *
+ * @returns {string} The string with single quotes around HTML attributes replaced with double quotes.
+ */
+export default function( str ) {
+ const element = document.createElement( "body" );
+ element.innerHTML = str;
+ return element.innerHTML;
+}
diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js
index 0d10a8f5af6..b6252dd1264 100644
--- a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js
+++ b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js
@@ -1,8 +1,68 @@
-import matchWords from "../match/matchTextWithArray";
-import arrayToRegex from "../regex/createRegexFromArray";
+import { escapeRegExp } from "lodash-es";
import addMark from "../../../markers/addMarkSingleWord";
import Mark from "../../../values/Mark";
-import { escapeRegExp } from "lodash-es";
+import getAnchorsFromText from "../link/getAnchorsFromText";
+import matchWords from "../match/matchTextWithArray";
+import arrayToRegex from "../regex/createRegexFromArray";
+
+// Regex to deconstruct an anchor into open tag, content and close tag.
+const anchorDeconstructionRegex = /(]+>)(.+?)(<\/a>)/;
+
+/**
+ * Deconstructs an anchor to the opening tag and the content. The content is the anchor text.
+ * We don't return the closing tag since the value would always be the same, i.e. .
+ *
+ * @param {string} anchor An anchor of the shape ....
+ *
+ * @returns {object} An object containing the opening tag and the content.
+ */
+export const deConstructAnchor = function( anchor ) {
+ // The const array mirrors the anchorDeconstructionRegex, using a comma to access the first element without a name.
+ const [ , openTag, content ] = anchor.match( anchorDeconstructionRegex );
+ return {
+ openTag: openTag,
+ content: content,
+ };
+};
+
+/**
+ * Reconstructs an anchor from an openTag, the content, and the closing tag.
+ *
+ * @param {string} openTag The opening tag of the anchor. Must be of the shape .
+ * @param {string} content The text of the anchor.
+ *
+ * @returns {string} An anchor.
+ */
+export const reConstructAnchor = function( openTag, content ) {
+ return `${openTag}${content}`;
+};
+
+
+/**
+ * Gets the anchors and marks the anchors' text if the words are found in it.
+ *
+ * @param {string} sentence The sentence to retrieve the anchors from.
+ * @param {RegExp} wordsRegex The regex of the words.
+ *
+ * @returns {Object} The anchors and the marked anchors.
+ */
+const getMarkedAnchors = function( sentence, wordsRegex ) {
+ // Retrieve the anchors.
+ const anchors = getAnchorsFromText( sentence );
+ // For every anchor, apply the markings only to the anchor tag.
+ const markedAnchors = anchors.map( anchor => {
+ // Retrieve the open tag and the content/anchor text.
+ const { openTag, content } = deConstructAnchor( anchor );
+
+ // Apply the marking to the anchor text if there is a match.
+ const markedAnchorText = content.replace( wordsRegex, ( x ) => addMark( x ) );
+
+ // Create a new anchor tag with a (marked) anchor text.
+ return reConstructAnchor( openTag, markedAnchorText );
+ } );
+
+ return { anchors, markedAnchors };
+};
/**
* Adds marks to a sentence and merges marks if those are only separated by a space
@@ -10,19 +70,40 @@ import { escapeRegExp } from "lodash-es";
* the marks will be put around "ballet shoes" together, not "`ballet` `shoes`".)
*
* @param {string} sentence The sentence to mark words in.
- * @param {[string]} topicFoundInSentence The words to mark in the sentence.
+ * @param {[string]} wordsFoundInSentence The words to mark in the sentence.
* @param {function} matchWordCustomHelper The language-specific helper function to match word in text.
*
* @returns {string} The sentence with marks.
*/
-export const collectMarkingsInSentence = function( sentence, topicFoundInSentence, matchWordCustomHelper ) {
- topicFoundInSentence = topicFoundInSentence.map( word => escapeRegExp( word ) );
+export const collectMarkingsInSentence = function( sentence, wordsFoundInSentence, matchWordCustomHelper ) {
+ wordsFoundInSentence = wordsFoundInSentence.map( word => escapeRegExp( word ) );
// If a language has a custom helper to match words, we disable the word boundary when creating the regex.
- const topicRegex = matchWordCustomHelper ? arrayToRegex( topicFoundInSentence, true ) : arrayToRegex( topicFoundInSentence );
- const markup = sentence.replace( topicRegex, function( x ) {
+ const wordsRegex = matchWordCustomHelper ? arrayToRegex( wordsFoundInSentence, true ) : arrayToRegex( wordsFoundInSentence );
+
+ // Retrieve the anchors and mark the anchors' text if the words are found in the anchors' text.
+ const { anchors, markedAnchors } = getMarkedAnchors( sentence, wordsRegex );
+
+ let markup = sentence.replace( wordsRegex, function( x ) {
return addMark( x );
} );
+ /**
+ * In 'markup', we apply the markings also inside the anchor's attribute if there is a match, on top of
+ * marking the anchor's text.
+ * The step below is to replace the incorrectly marked anchors with the marked anchors that we want:
+ * where the markings are only applied in the anchor's text.
+ */
+ if ( anchors.length > 0 ) {
+ const markupAnchors = getAnchorsFromText( markup );
+ for ( let i = 0; i < markupAnchors.length; i++ ) {
+ markup = markup.replace( markupAnchors[ i ], markedAnchors[ i ] );
+ }
+ }
+
+ /*
+ * If two marks are separated by only a space, remove the closing tag of the first mark and the opening tag of the
+ * second mark so that the two marks can be combined into one.
+ */
return ( markup.replace( new RegExp( " ", "ig" ), " " ) );
};
@@ -37,16 +118,16 @@ export const collectMarkingsInSentence = function( sentence, topicFoundInSentenc
* @returns {[string]} The sentences with marks.
*/
export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) {
- let topicFoundInSentence = [];
+ let wordsFoundInSentence = [];
let markings = [];
sentences.forEach( function( sentence ) {
- topicFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;
+ wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;
- if ( topicFoundInSentence.length > 0 ) {
+ if ( wordsFoundInSentence.length > 0 ) {
markings = markings.concat( new Mark( {
original: sentence,
- marked: collectMarkingsInSentence( sentence, topicFoundInSentence, matchWordCustomHelper ),
+ marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ),
} ) );
}
} );
diff --git a/packages/yoastseo/src/languageProcessing/index.js b/packages/yoastseo/src/languageProcessing/index.js
index 19f3ead1357..c1b1fa00f4b 100644
--- a/packages/yoastseo/src/languageProcessing/index.js
+++ b/packages/yoastseo/src/languageProcessing/index.js
@@ -26,6 +26,7 @@ import { stripFullTags as stripHTMLTags } from "./helpers/sanitize/stripHTMLTags
import sanitizeString from "./helpers/sanitize/sanitizeString";
import { unifyAllSpaces } from "./helpers/sanitize/unifyWhitespace";
import removePunctuation from "./helpers/sanitize/removePunctuation";
+import replaceSingleQuotesInTags from "./helpers/html/replaceQuotes";
import countMetaDescriptionLength from "./helpers/word/countMetaDescriptionLength";
import getLanguage from "./helpers/language/getLanguage";
import getSentences from "./helpers/sentence/getSentences";
@@ -65,4 +66,5 @@ export {
getSentences,
getFieldsToMark,
unifyAllSpaces,
+ replaceSingleQuotesInTags,
};