diff --git a/packages/js/src/decorator/tinyMCE.js b/packages/js/src/decorator/tinyMCE.js index 65c38f97d55..c3f2b8b4b31 100644 --- a/packages/js/src/decorator/tinyMCE.js +++ b/packages/js/src/decorator/tinyMCE.js @@ -1,6 +1,5 @@ -import { markers } from "yoastseo"; +import { markers, languageProcessing } from "yoastseo"; import { forEach } from "lodash-es"; -import { languageProcessing } from "yoastseo"; var MARK_TAG = "yoastmark"; @@ -36,11 +35,28 @@ function markTinyMCE( editor, paper, marks ) { let html = editor.getContent(); html = markers.removeMarks( html ); + /* + * Get the information whether we want to mark a specific part of the HTML. If we do, `fieldsToMark` should return an array with that information. + * For example, [ "subehading" ] means that we want to apply the markings in subheadings only, and not the other parts. + * `selectedHTML` is an array of the HTML parts that we want to apply the marking to. + */ const { fieldsToMark, selectedHTML } = languageProcessing.getFieldsToMark( marks, html ); // Generate marked HTML. forEach( marks, function( mark ) { + /* + * Classic editor uses double quotes for HTML attribute values. However, Block editor uses single quotes for HTML tag attributes, + * and that's why in `yoastseo`, we use single quotes for the attribute values when we create the marked object. As a result, + * the replacement did not work, as the marks passed by `yoastseo` did not match anything in the original text. + * This step is replacing the single quotes in the marked object output by `yoastseo` with double quotes. + * This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page. + */ + mark._properties.marked = languageProcessing.replaceSingleQuotesInTags( mark._properties.marked ); + mark._properties.original = languageProcessing.replaceSingleQuotesInTags( mark._properties.original ); + + // Check if we want to mark only specific part of the HTML. if ( fieldsToMark.length > 0 ) { + // Apply the marking to the selected HTML parts. selectedHTML.forEach( element => { const markedElement = mark.applyWithReplace( element ); html = html.replace( element, markedElement ); diff --git a/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js new file mode 100644 index 00000000000..a6545bc8b84 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/html/replaceQuotesSpec.js @@ -0,0 +1,35 @@ +import replaceSingleQuotesInTags from "../../../../src/languageProcessing/helpers/html/replaceQuotes"; + +describe( "replace-quotes", function() { + describe( "replaceSingleQuotesInTags", function() { + it( "should return the same string when no single quotes are present", function() { + expect( replaceSingleQuotesInTags( "This is a test" ) ) + .toEqual( "This is a test" ); + } ); + + it( "should return the same string when only double quotes in HTML attribute values are present", function() { + expect( replaceSingleQuotesInTags( "This is a test" ) ) + .toEqual( "This is a test" ); + } ); + + it( "should not replace single quotes (or apostrophes) outside HTML tags", function() { + expect( replaceSingleQuotesInTags( "This is a test, let's go!" ) ) + .toEqual( "This is a test, let's go!" ); + } ); + + it( "should replace the outer single quotes in HTML attribute values with double quotes", function() { + expect( replaceSingleQuotesInTags( "This is a test" ) ) + .toEqual( "This is a test" ); + } ); + + it( "should not replace any inner single quotes in HTML attribute values", function() { + expect( replaceSingleQuotesInTags( "This is a test" ) ) + .toEqual( "This is a test" ); + } ); + + it( "should replace the outer single quotes in multiple HTML attribute values with double quotes", function() { + expect( replaceSingleQuotesInTags( "This is a test" ) ) + .toEqual( "This is a test" ); + } ); + } ); +} ); diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js index caf3ae18664..1fc5049810e 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js @@ -1,8 +1,48 @@ -import { markWordsInSentences } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences"; +import { deConstructAnchor, markWordsInSentences, reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences"; import Mark from "../../../../src/values/Mark"; import matchWordCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; describe( "Adds Yoast marks to specific words in a sentence", function() { + it( "should add Yoast marks to all instances of specified words in a sentence, except when there is an anchor," + + " the marking should not be applied to the anchor tag attribute", function() { + expect( markWordsInSentences( + [ "picket", "tile" ], + [ "Introducing Palisades Ceramic Picket Tile — the latest trend in ceramic tile!" ], + "en_EN" + ) ).toEqual( [ + new Mark( { + marked: "Introducing Palisades Ceramic Picket Tile — the latest trend in " + + "ceramic " + + "tile!", + original: "Introducing Palisades Ceramic Picket Tile — the latest trend in " + + "ceramic tile!" } ), + ] + ); + } ); + it( "should add Yoast marks to all instances of specified words in a sentence, except when there are multiple anchors," + + " the marking should not be applied to the anchor tag attribute", function() { + expect( markWordsInSentences( + [ "picket", "tile" ], + [ "Introducing Palisades Ceramic Picket Tile — " + + "the latest trend in ceramic tile!" ], + "en_EN" + ) ).toEqual( [ + new Mark( { + marked: "Introducing Palisades Ceramic " + + "Picket Tile — the latest trend in " + + "ceramic " + + "tile!", + original: "Introducing Palisades Ceramic Picket Tile — " + + "the latest trend in ceramic tile!" } ), + ] + ); + } ); it( "should add Yoast marks to all instances of specified words in a sentence", function() { expect( markWordsInSentences( [ "turtle", "hamster" ], @@ -73,7 +113,7 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c new Mark( { marked: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。", original: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。" } ), - ] + ] ); } ); @@ -102,3 +142,30 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c } ); } ); +describe( "test the deconstructAnchor and reconstructAnchor helper", () => { + it( "correctly deconstructs and reconstructs an anchor", () => { + const testAnchor = "This is yoast."; + const deconstructedAnchor = deConstructAnchor( testAnchor ); + + expect( deconstructedAnchor ).toEqual( { + openTag: "", + content: "This is yoast.", + } ); + + const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content ); + expect( reconstructedAnchor ).toEqual( testAnchor ); + } ); + + it( "correctly deconstructs and reconstructs an anchor that contains html elements itself", () => { + const testAnchor = "This is yoast."; + const deconstructedAnchor = deConstructAnchor( testAnchor ); + + expect( deconstructedAnchor ).toEqual( { + openTag: "", + content: "This is yoast.", + } ); + + const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content ); + expect( reconstructedAnchor ).toEqual( testAnchor ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js b/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js index be770388e95..04783418366 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js +++ b/packages/yoastseo/src/languageProcessing/helpers/html/getFieldsToMark.js @@ -7,7 +7,7 @@ import { getSubheadings } from "./getSubheadings"; * @param {array} marks The array of mark objects. * @param {string} html The html of the page where we want to apply the marking to. * - * @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking tp. + * @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking to. */ export function getFieldsToMark( marks, html ) { const fieldsToMark = uniq( flatten( marks.map( mark => { diff --git a/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js b/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js new file mode 100644 index 00000000000..dffe809f48d --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/html/replaceQuotes.js @@ -0,0 +1,14 @@ +/** + * Replaces single quotes around HTML attribute values with double quotes. + * Double quotes are the standard, but we convert these to single quotes when parsing the HTML in `yoastseo` package. + * Here, we change them back to double quotes so by parsing the HTML and then outputting it again. + * + * @param {string} str The input string. + * + * @returns {string} The string with single quotes around HTML attributes replaced with double quotes. + */ +export default function( str ) { + const element = document.createElement( "body" ); + element.innerHTML = str; + return element.innerHTML; +} diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js index 0d10a8f5af6..b6252dd1264 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js @@ -1,8 +1,68 @@ -import matchWords from "../match/matchTextWithArray"; -import arrayToRegex from "../regex/createRegexFromArray"; +import { escapeRegExp } from "lodash-es"; import addMark from "../../../markers/addMarkSingleWord"; import Mark from "../../../values/Mark"; -import { escapeRegExp } from "lodash-es"; +import getAnchorsFromText from "../link/getAnchorsFromText"; +import matchWords from "../match/matchTextWithArray"; +import arrayToRegex from "../regex/createRegexFromArray"; + +// Regex to deconstruct an anchor into open tag, content and close tag. +const anchorDeconstructionRegex = /(]+>)(.+?)(<\/a>)/; + +/** + * Deconstructs an anchor to the opening tag and the content. The content is the anchor text. + * We don't return the closing tag since the value would always be the same, i.e. . + * + * @param {string} anchor An anchor of the shape .... + * + * @returns {object} An object containing the opening tag and the content. + */ +export const deConstructAnchor = function( anchor ) { + // The const array mirrors the anchorDeconstructionRegex, using a comma to access the first element without a name. + const [ , openTag, content ] = anchor.match( anchorDeconstructionRegex ); + return { + openTag: openTag, + content: content, + }; +}; + +/** + * Reconstructs an anchor from an openTag, the content, and the closing tag. + * + * @param {string} openTag The opening tag of the anchor. Must be of the shape . + * @param {string} content The text of the anchor. + * + * @returns {string} An anchor. + */ +export const reConstructAnchor = function( openTag, content ) { + return `${openTag}${content}`; +}; + + +/** + * Gets the anchors and marks the anchors' text if the words are found in it. + * + * @param {string} sentence The sentence to retrieve the anchors from. + * @param {RegExp} wordsRegex The regex of the words. + * + * @returns {Object} The anchors and the marked anchors. + */ +const getMarkedAnchors = function( sentence, wordsRegex ) { + // Retrieve the anchors. + const anchors = getAnchorsFromText( sentence ); + // For every anchor, apply the markings only to the anchor tag. + const markedAnchors = anchors.map( anchor => { + // Retrieve the open tag and the content/anchor text. + const { openTag, content } = deConstructAnchor( anchor ); + + // Apply the marking to the anchor text if there is a match. + const markedAnchorText = content.replace( wordsRegex, ( x ) => addMark( x ) ); + + // Create a new anchor tag with a (marked) anchor text. + return reConstructAnchor( openTag, markedAnchorText ); + } ); + + return { anchors, markedAnchors }; +}; /** * Adds marks to a sentence and merges marks if those are only separated by a space @@ -10,19 +70,40 @@ import { escapeRegExp } from "lodash-es"; * the marks will be put around "ballet shoes" together, not "`ballet` `shoes`".) * * @param {string} sentence The sentence to mark words in. - * @param {[string]} topicFoundInSentence The words to mark in the sentence. + * @param {[string]} wordsFoundInSentence The words to mark in the sentence. * @param {function} matchWordCustomHelper The language-specific helper function to match word in text. * * @returns {string} The sentence with marks. */ -export const collectMarkingsInSentence = function( sentence, topicFoundInSentence, matchWordCustomHelper ) { - topicFoundInSentence = topicFoundInSentence.map( word => escapeRegExp( word ) ); +export const collectMarkingsInSentence = function( sentence, wordsFoundInSentence, matchWordCustomHelper ) { + wordsFoundInSentence = wordsFoundInSentence.map( word => escapeRegExp( word ) ); // If a language has a custom helper to match words, we disable the word boundary when creating the regex. - const topicRegex = matchWordCustomHelper ? arrayToRegex( topicFoundInSentence, true ) : arrayToRegex( topicFoundInSentence ); - const markup = sentence.replace( topicRegex, function( x ) { + const wordsRegex = matchWordCustomHelper ? arrayToRegex( wordsFoundInSentence, true ) : arrayToRegex( wordsFoundInSentence ); + + // Retrieve the anchors and mark the anchors' text if the words are found in the anchors' text. + const { anchors, markedAnchors } = getMarkedAnchors( sentence, wordsRegex ); + + let markup = sentence.replace( wordsRegex, function( x ) { return addMark( x ); } ); + /** + * In 'markup', we apply the markings also inside the anchor's attribute if there is a match, on top of + * marking the anchor's text. + * The step below is to replace the incorrectly marked anchors with the marked anchors that we want: + * where the markings are only applied in the anchor's text. + */ + if ( anchors.length > 0 ) { + const markupAnchors = getAnchorsFromText( markup ); + for ( let i = 0; i < markupAnchors.length; i++ ) { + markup = markup.replace( markupAnchors[ i ], markedAnchors[ i ] ); + } + } + + /* + * If two marks are separated by only a space, remove the closing tag of the first mark and the opening tag of the + * second mark so that the two marks can be combined into one. + */ return ( markup.replace( new RegExp( " ", "ig" ), " " ) ); }; @@ -37,16 +118,16 @@ export const collectMarkingsInSentence = function( sentence, topicFoundInSentenc * @returns {[string]} The sentences with marks. */ export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) { - let topicFoundInSentence = []; + let wordsFoundInSentence = []; let markings = []; sentences.forEach( function( sentence ) { - topicFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; + wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; - if ( topicFoundInSentence.length > 0 ) { + if ( wordsFoundInSentence.length > 0 ) { markings = markings.concat( new Mark( { original: sentence, - marked: collectMarkingsInSentence( sentence, topicFoundInSentence, matchWordCustomHelper ), + marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), } ) ); } } ); diff --git a/packages/yoastseo/src/languageProcessing/index.js b/packages/yoastseo/src/languageProcessing/index.js index 19f3ead1357..c1b1fa00f4b 100644 --- a/packages/yoastseo/src/languageProcessing/index.js +++ b/packages/yoastseo/src/languageProcessing/index.js @@ -26,6 +26,7 @@ import { stripFullTags as stripHTMLTags } from "./helpers/sanitize/stripHTMLTags import sanitizeString from "./helpers/sanitize/sanitizeString"; import { unifyAllSpaces } from "./helpers/sanitize/unifyWhitespace"; import removePunctuation from "./helpers/sanitize/removePunctuation"; +import replaceSingleQuotesInTags from "./helpers/html/replaceQuotes"; import countMetaDescriptionLength from "./helpers/word/countMetaDescriptionLength"; import getLanguage from "./helpers/language/getLanguage"; import getSentences from "./helpers/sentence/getSentences"; @@ -65,4 +66,5 @@ export { getSentences, getFieldsToMark, unifyAllSpaces, + replaceSingleQuotesInTags, };