Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2afa263
only mark the anchor text
FAMarfuaty Nov 9, 2022
f0644b2
add unit tests
FAMarfuaty Nov 9, 2022
daa14cf
fix highlight in classic editor
FAMarfuaty Nov 9, 2022
dfbdc4b
Create a helper to replace single quotes to double quotes and add uni…
FAMarfuaty Nov 10, 2022
9913b62
convert single quotes to double quotes in Classic editor
FAMarfuaty Nov 10, 2022
e39d58b
Make a separate helper to get the anchors and the marked anchors
FAMarfuaty Nov 10, 2022
a80d44f
Adjust documentation
FAMarfuaty Nov 14, 2022
4ad41d6
Add comment
agnieszkaszuba Nov 16, 2022
899572c
Rename 'topic' to 'words'
agnieszkaszuba Nov 16, 2022
09480e4
Merge branch 'trunk' of github.com:Yoast/wordpress-seo into PC-965-yo…
agnieszkaszuba Nov 17, 2022
d45ade7
Add comment
FAMarfuaty Nov 17, 2022
fe14f57
Merge branch 'trunk' into PC-965-yoast-markers-break-html-of-content
hdvos Nov 22, 2022
a280537
Merge branch 'trunk' into PC-965-yoast-markers-break-html-of-content
hdvos Nov 22, 2022
7c97ec6
Merge branch 'trunk' into PC-965-yoast-markers-break-html-of-content
hdvos Nov 23, 2022
e723bfe
fix bug that the word in the url is marked instead of the content of …
hdvos Nov 25, 2022
1fcd588
clean up code
FAMarfuaty Nov 28, 2022
d127696
add specs for reconstructAnchor and deconstructAnchor
hdvos Nov 28, 2022
850941a
simplify code
FAMarfuaty Nov 28, 2022
39cca12
Merge branch 'PC-965-yoast-markers-break-html-of-content' of github.c…
FAMarfuaty Nov 28, 2022
8940fed
Adjust unit tests
FAMarfuaty Nov 28, 2022
9de0715
adjust test description
FAMarfuaty Nov 28, 2022
71a14c3
adjust comment
FAMarfuaty Nov 28, 2022
51f0f99
edit typos in comments
iolse Nov 30, 2022
deef450
edit function documentation to add clarity
iolse Dec 1, 2022
33b6b48
Merge branch 'trunk' of https://github.com/Yoast/wordpress-seo into P…
marinakoleva Dec 5, 2022
af58ac5
Merge branch 'trunk' of https://github.com/Yoast/wordpress-seo into P…
marinakoleva Dec 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions packages/js/src/decorator/tinyMCE.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { markers } from "yoastseo";
import { markers, languageProcessing } from "yoastseo";
import { forEach } from "lodash-es";
import { languageProcessing } from "yoastseo";

var MARK_TAG = "yoastmark";

Expand Down Expand Up @@ -36,11 +35,28 @@ function markTinyMCE( editor, paper, marks ) {
let html = editor.getContent();
html = markers.removeMarks( html );

/*
* Get the information whether we want to mark a specific part of the HTML. If we do, `fieldsToMark` should return an array with that information.
* For example, [ "subehading" ] means that we want to apply the markings in subheadings only, and not the other parts.
* `selectedHTML` is an array of the HTML parts that we want to apply the marking to.
*/
const { fieldsToMark, selectedHTML } = languageProcessing.getFieldsToMark( marks, html );

// Generate marked HTML.
forEach( marks, function( mark ) {
/*
* Classic editor uses double quotes for HTML attribute values. However, Block editor uses single quotes for HTML tag attributes,
* and that's why in `yoastseo`, we use single quotes for the attribute values when we create the marked object. As a result,
* the replacement did not work, as the marks passed by `yoastseo` did not match anything in the original text.
* This step is replacing the single quotes in the marked object output by `yoastseo` with double quotes.
* This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page.
*/
mark._properties.marked = languageProcessing.replaceSingleQuotesInTags( mark._properties.marked );
mark._properties.original = languageProcessing.replaceSingleQuotesInTags( mark._properties.original );

Comment on lines +54 to +56
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Classic editor uses double quotes for HTML attribute values. However, in yoastseo, we use single quotes for the attribute values when we create the marked object. As the result, the replacement did not work, as the marks passed by yoastseo did not match anything in the original text.
This step is replacing the single quotes in the marked object output by yoastseo with double quotes. This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In yoastseo, single quotes are used because Block editor uses single quotes for HTML tag attributes.

// Check if we want to mark only specific part of the HTML.
if ( fieldsToMark.length > 0 ) {
// Apply the marking to the selected HTML parts.
selectedHTML.forEach( element => {
const markedElement = mark.applyWithReplace( element );
html = html.replace( element, markedElement );
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import replaceSingleQuotesInTags from "../../../../src/languageProcessing/helpers/html/replaceQuotes";

describe( "replace-quotes", function() {
describe( "replaceSingleQuotesInTags", function() {
it( "should return the same string when no single quotes are present", function() {
expect( replaceSingleQuotesInTags( "This is a test" ) )
.toEqual( "This is a test" );
} );

it( "should return the same string when only double quotes in HTML attribute values are present", function() {
expect( replaceSingleQuotesInTags( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" ) )
.toEqual( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" );
} );

it( "should not replace single quotes (or apostrophes) outside HTML tags", function() {
expect( replaceSingleQuotesInTags( "This is a test, let's go!" ) )
.toEqual( "This is a test, let's go!" );
} );

it( "should replace the outer single quotes in HTML attribute values with double quotes", function() {
expect( replaceSingleQuotesInTags( "<span style='color: red'>This</span> is a test" ) )
.toEqual( "<span style=\"color: red\">This</span> is a test" );
} );

it( "should not replace any inner single quotes in HTML attribute values", function() {
expect( replaceSingleQuotesInTags( "<span data-attr=\"let's go, time's up\">This</span> is a test" ) )
.toEqual( "<span data-attr=\"let's go, time's up\">This</span> is a test" );
} );

it( "should replace the outer single quotes in multiple HTML attribute values with double quotes", function() {
expect( replaceSingleQuotesInTags( "<yoastmark class='yoast-text-mark' style='color: blue'>This is a test</yoastmark>" ) )
.toEqual( "<yoastmark class=\"yoast-text-mark\" style=\"color: blue\">This is a test</yoastmark>" );
} );
} );
} );
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
import { markWordsInSentences } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
import { deConstructAnchor, markWordsInSentences, reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences";
import Mark from "../../../../src/values/Mark";
import matchWordCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord";

describe( "Adds Yoast marks to specific words in a sentence", function() {
it( "should add Yoast marks to all instances of specified words in a sentence, except when there is an anchor," +
" the marking should not be applied to the anchor tag attribute", function() {
expect( markWordsInSentences(
[ "picket", "tile" ],
[ "Introducing Palisades Ceramic Picket Tile — the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" ],
"en_EN"
) ).toEqual( [
new Mark( {
marked: "Introducing Palisades Ceramic <yoastmark class='yoast-text-mark'>Picket Tile</yoastmark> — the latest trend in " +
"<a href=\"https://www.tileclub.com/" +
"collections/ceramic-tile\" target=\"_blank\" rel=\"noopener\">ceramic " +
"<yoastmark class='yoast-text-mark'>tile</yoastmark></a>!",
original: "Introducing Palisades Ceramic Picket Tile — the latest trend in " +
"<a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" } ),
]
);
} );
it( "should add Yoast marks to all instances of specified words in a sentence, except when there are multiple anchors," +
" the marking should not be applied to the anchor tag attribute", function() {
expect( markWordsInSentences(
[ "picket", "tile" ],
[ "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\">Picket Tile</a> — " +
"the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" ],
"en_EN"
) ).toEqual( [
new Mark( {
marked: "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\"><yoastmark class='yoast-text-mark'>" +
"Picket Tile</yoastmark></a> — the latest trend in " +
"<a href=\"https://www.tileclub.com/" +
"collections/ceramic-tile\" target=\"_blank\" rel=\"noopener\">ceramic " +
"<yoastmark class='yoast-text-mark'>tile</yoastmark></a>!",
original: "Introducing Palisades Ceramic <a href=\"https://www.tileclub.com/ceramic-tile\">Picket Tile</a> — " +
"the latest trend in <a href=\"https://www.tileclub.com/collections/ceramic-tile\"" +
" target=\"_blank\" rel=\"noopener\">ceramic tile</a>!" } ),
]
);
} );
it( "should add Yoast marks to all instances of specified words in a sentence", function() {
expect( markWordsInSentences(
[ "turtle", "hamster" ],
Expand Down Expand Up @@ -73,7 +113,7 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
new Mark( {
marked: "<yoastmark class='yoast-text-mark'>小さい花の刺繍</yoastmark>しかし、それは在庫切れでしたマキシドレス。",
original: "小さい花の刺繍しかし、それは在庫切れでしたマキシドレス。" } ),
]
]
);
} );

Expand Down Expand Up @@ -102,3 +142,30 @@ describe( "Adds Yoast marks to specific words in a sentence for languages with c
} );
} );

describe( "test the deconstructAnchor and reconstructAnchor helper", () => {
it( "correctly deconstructs and reconstructs an anchor", () => {
const testAnchor = "<a href=\"https://yoast.com\">This is yoast.</a>";
const deconstructedAnchor = deConstructAnchor( testAnchor );

expect( deconstructedAnchor ).toEqual( {
openTag: "<a href=\"https://yoast.com\">",
content: "This is yoast.",
} );

const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
expect( reconstructedAnchor ).toEqual( testAnchor );
} );

it( "correctly deconstructs and reconstructs an anchor that contains html elements itself", () => {
const testAnchor = "<a href=\"https://yoast.com\">This <i>is</i> <b>yoast</b>.</a>";
const deconstructedAnchor = deConstructAnchor( testAnchor );

expect( deconstructedAnchor ).toEqual( {
openTag: "<a href=\"https://yoast.com\">",
content: "This <i>is</i> <b>yoast</b>.",
} );

const reconstructedAnchor = reConstructAnchor( deconstructedAnchor.openTag, deconstructedAnchor.content );
expect( reconstructedAnchor ).toEqual( testAnchor );
} );
} );
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { getSubheadings } from "./getSubheadings";
* @param {array} marks The array of mark objects.
* @param {string} html The html of the page where we want to apply the marking to.
*
* @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking tp.
* @returns {{selectedHTML: *[], fieldsToMark: *}} The selected part of the html we want to apply the marking to.
*/
export function getFieldsToMark( marks, html ) {
const fieldsToMark = uniq( flatten( marks.map( mark => {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Replaces single quotes around HTML attribute values with double quotes.
* Double quotes are the standard, but we convert these to single quotes when parsing the HTML in `yoastseo` package.
* Here, we change them back to double quotes so by parsing the HTML and then outputting it again.
*
* @param {string} str The input string.
*
* @returns {string} The string with single quotes around HTML attributes replaced with double quotes.
*/
export default function( str ) {
const element = document.createElement( "body" );
element.innerHTML = str;
return element.innerHTML;
}
Original file line number Diff line number Diff line change
@@ -1,28 +1,109 @@
import matchWords from "../match/matchTextWithArray";
import arrayToRegex from "../regex/createRegexFromArray";
import { escapeRegExp } from "lodash-es";
import addMark from "../../../markers/addMarkSingleWord";
import Mark from "../../../values/Mark";
import { escapeRegExp } from "lodash-es";
import getAnchorsFromText from "../link/getAnchorsFromText";
import matchWords from "../match/matchTextWithArray";
import arrayToRegex from "../regex/createRegexFromArray";

// Regex to deconstruct an anchor into open tag, content and close tag.
const anchorDeconstructionRegex = /(<a[\s]+[^>]+>)(.+?)(<\/a>)/;

/**
* Deconstructs an anchor to the opening tag and the content. The content is the anchor text.
* We don't return the closing tag since the value would always be the same, i.e. </a>.
*
* @param {string} anchor An anchor of the shape <a ...>...</a>.
*
* @returns {object} An object containing the opening tag and the content.
*/
export const deConstructAnchor = function( anchor ) {
// The const array mirrors the anchorDeconstructionRegex, using a comma to access the first element without a name.
const [ , openTag, content ] = anchor.match( anchorDeconstructionRegex );
return {
openTag: openTag,
content: content,
};
};

/**
* Reconstructs an anchor from an openTag, the content, and the closing tag.
*
* @param {string} openTag The opening tag of the anchor. Must be of the shape <a ...>.
* @param {string} content The text of the anchor.
*
* @returns {string} An anchor.
*/
export const reConstructAnchor = function( openTag, content ) {
return `${openTag}${content}</a>`;
};


/**
* Gets the anchors and marks the anchors' text if the words are found in it.
*
* @param {string} sentence The sentence to retrieve the anchors from.
* @param {RegExp} wordsRegex The regex of the words.
*
* @returns {Object} The anchors and the marked anchors.
*/
const getMarkedAnchors = function( sentence, wordsRegex ) {
// Retrieve the anchors.
const anchors = getAnchorsFromText( sentence );
// For every anchor, apply the markings only to the anchor tag.
const markedAnchors = anchors.map( anchor => {
// Retrieve the open tag and the content/anchor text.
const { openTag, content } = deConstructAnchor( anchor );

// Apply the marking to the anchor text if there is a match.
const markedAnchorText = content.replace( wordsRegex, ( x ) => addMark( x ) );

// Create a new anchor tag with a (marked) anchor text.
return reConstructAnchor( openTag, markedAnchorText );
} );

return { anchors, markedAnchors };
};

/**
* Adds marks to a sentence and merges marks if those are only separated by a space
* (e.g., if highlighting words "ballet" and "shoes" in a sentence "I have a lot of ballet shoes and other paraphernalia."
* the marks will be put around "ballet shoes" together, not "`ballet` `shoes`".)
*
* @param {string} sentence The sentence to mark words in.
* @param {[string]} topicFoundInSentence The words to mark in the sentence.
* @param {[string]} wordsFoundInSentence The words to mark in the sentence.
* @param {function} matchWordCustomHelper The language-specific helper function to match word in text.
*
* @returns {string} The sentence with marks.
*/
export const collectMarkingsInSentence = function( sentence, topicFoundInSentence, matchWordCustomHelper ) {
topicFoundInSentence = topicFoundInSentence.map( word => escapeRegExp( word ) );
export const collectMarkingsInSentence = function( sentence, wordsFoundInSentence, matchWordCustomHelper ) {
wordsFoundInSentence = wordsFoundInSentence.map( word => escapeRegExp( word ) );
// If a language has a custom helper to match words, we disable the word boundary when creating the regex.
const topicRegex = matchWordCustomHelper ? arrayToRegex( topicFoundInSentence, true ) : arrayToRegex( topicFoundInSentence );
const markup = sentence.replace( topicRegex, function( x ) {
const wordsRegex = matchWordCustomHelper ? arrayToRegex( wordsFoundInSentence, true ) : arrayToRegex( wordsFoundInSentence );

// Retrieve the anchors and mark the anchors' text if the words are found in the anchors' text.
const { anchors, markedAnchors } = getMarkedAnchors( sentence, wordsRegex );

let markup = sentence.replace( wordsRegex, function( x ) {
return addMark( x );
} );

/**
* In 'markup', we apply the markings also inside the anchor's attribute if there is a match, on top of
* marking the anchor's text.
* The step below is to replace the incorrectly marked anchors with the marked anchors that we want:
* where the markings are only applied in the anchor's text.
*/
if ( anchors.length > 0 ) {
const markupAnchors = getAnchorsFromText( markup );
for ( let i = 0; i < markupAnchors.length; i++ ) {
markup = markup.replace( markupAnchors[ i ], markedAnchors[ i ] );
}
}
Comment on lines +90 to +101
Copy link
Copy Markdown
Contributor Author

@FAMarfuaty FAMarfuaty Nov 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The approach is as follows:

  • Get the anchors from the sentence
  • For every anchor, apply the markings only to the anchor text. Replace the unmarked anchor in the sentence with the marked anchor:
    • Retrieve the anchor text
    • Apply the marking to the anchor text
    • Replace the original anchor text with the marked anchor text
  • If there is an anchor found in the sentence:
    • The incorrectly marked anchor will be replaced with the correctly marked anchor (only marked anchor text, excluding the attributes), retrieved from getMarkedAnchors()

This approach works, but I have to admit that it's a little bit convoluted.
Let me know if you have suggestions on how to improve the approach :)

Copy link
Copy Markdown
Contributor Author

@FAMarfuaty FAMarfuaty Nov 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@agnieszkaszuba 's suggestion for improvement for this approach:

  • Before running the functionality in addMarkSingleWord, get all the <a> tags (so only the opening tags) from the text and save them in an array (you can use this part of the regex from getAnchorsFromText: <a[\s]+(?:[^>]+)>)
  • Apply marks to the text (addMarkSingleWord)
  • Replace the <a> tags in the marked text with the original <a> tags

The advantage of this approach is that we don’t need to mark the text between alt tags twice, like it’s currently done in addMarkSingleWord and in getMarkedAnchors.

Copy link
Copy Markdown
Contributor Author

@FAMarfuaty FAMarfuaty Nov 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We tried to implement the above suggestion. However, the third step "Replace the tags in the marked text with the original tags" proved to be tricky. And we couldn't formulate a regex that can detect anchor opening tag that includes the <yoastmark>.

Hence, we decided to use the current approach.


/*
* If two marks are separated by only a space, remove the closing tag of the first mark and the opening tag of the
* second mark so that the two marks can be combined into one.
*/
return ( markup.replace( new RegExp( "</yoastmark> <yoastmark class='yoast-text-mark'>", "ig" ), " " ) );
};

Expand All @@ -37,16 +118,16 @@ export const collectMarkingsInSentence = function( sentence, topicFoundInSentenc
* @returns {[string]} The sentences with marks.
*/
export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) {
let topicFoundInSentence = [];
let wordsFoundInSentence = [];
let markings = [];

sentences.forEach( function( sentence ) {
topicFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;
wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches;

if ( topicFoundInSentence.length > 0 ) {
if ( wordsFoundInSentence.length > 0 ) {
markings = markings.concat( new Mark( {
original: sentence,
marked: collectMarkingsInSentence( sentence, topicFoundInSentence, matchWordCustomHelper ),
marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ),
} ) );
}
} );
Expand Down
2 changes: 2 additions & 0 deletions packages/yoastseo/src/languageProcessing/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { stripFullTags as stripHTMLTags } from "./helpers/sanitize/stripHTMLTags
import sanitizeString from "./helpers/sanitize/sanitizeString";
import { unifyAllSpaces } from "./helpers/sanitize/unifyWhitespace";
import removePunctuation from "./helpers/sanitize/removePunctuation";
import replaceSingleQuotesInTags from "./helpers/html/replaceQuotes";
import countMetaDescriptionLength from "./helpers/word/countMetaDescriptionLength";
import getLanguage from "./helpers/language/getLanguage";
import getSentences from "./helpers/sentence/getSentences";
Expand Down Expand Up @@ -65,4 +66,5 @@ export {
getSentences,
getFieldsToMark,
unifyAllSpaces,
replaceSingleQuotesInTags,
};