-
Notifications
You must be signed in to change notification settings - Fork 953
PC-965 Fixes highlighting when inline HTML tags are present in Classic editor and in Shopify #19162
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2afa263
f0644b2
daa14cf
dfbdc4b
9913b62
e39d58b
a80d44f
4ad41d6
899572c
09480e4
d45ade7
fe14f57
a280537
7c97ec6
e723bfe
1fcd588
d127696
850941a
39cca12
8940fed
9de0715
71a14c3
51f0f99
deef450
33b6b48
af58ac5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| import replaceSingleQuotesInTags from "../../../../src/languageProcessing/helpers/html/replaceQuotes"; | ||
|
|
||
| describe( "replace-quotes", function() { | ||
| describe( "replaceSingleQuotesInTags", function() { | ||
| it( "should return the same string when no single quotes are present", function() { | ||
| expect( replaceSingleQuotesInTags( "This is a test" ) ) | ||
| .toEqual( "This is a test" ); | ||
| } ); | ||
|
|
||
| it( "should return the same string when only double quotes in HTML attribute values are present", function() { | ||
| expect( replaceSingleQuotesInTags( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" ) ) | ||
| .toEqual( "<yoastmark class=\"yoast-text-mark\">This is a test</yoastmark>" ); | ||
| } ); | ||
|
|
||
| it( "should not replace single quotes (or apostrophes) outside HTML tags", function() { | ||
| expect( replaceSingleQuotesInTags( "This is a test, let's go!" ) ) | ||
| .toEqual( "This is a test, let's go!" ); | ||
| } ); | ||
|
|
||
| it( "should replace the outer single quotes in HTML attribute values with double quotes", function() { | ||
| expect( replaceSingleQuotesInTags( "<span style='color: red'>This</span> is a test" ) ) | ||
| .toEqual( "<span style=\"color: red\">This</span> is a test" ); | ||
| } ); | ||
|
|
||
| it( "should not replace any inner single quotes in HTML attribute values", function() { | ||
| expect( replaceSingleQuotesInTags( "<span data-attr=\"let's go, time's up\">This</span> is a test" ) ) | ||
| .toEqual( "<span data-attr=\"let's go, time's up\">This</span> is a test" ); | ||
| } ); | ||
|
|
||
| it( "should replace the outer single quotes in multiple HTML attribute values with double quotes", function() { | ||
| expect( replaceSingleQuotesInTags( "<yoastmark class='yoast-text-mark' style='color: blue'>This is a test</yoastmark>" ) ) | ||
| .toEqual( "<yoastmark class=\"yoast-text-mark\" style=\"color: blue\">This is a test</yoastmark>" ); | ||
| } ); | ||
| } ); | ||
| } ); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| /** | ||
| * Replaces single quotes around HTML attribute values with double quotes. | ||
| * Double quotes are the standard, but we convert these to single quotes when parsing the HTML in `yoastseo` package. | ||
| * Here, we change them back to double quotes so by parsing the HTML and then outputting it again. | ||
| * | ||
| * @param {string} str The input string. | ||
| * | ||
| * @returns {string} The string with single quotes around HTML attributes replaced with double quotes. | ||
| */ | ||
| export default function( str ) { | ||
| const element = document.createElement( "body" ); | ||
| element.innerHTML = str; | ||
| return element.innerHTML; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,28 +1,109 @@ | ||
| import matchWords from "../match/matchTextWithArray"; | ||
| import arrayToRegex from "../regex/createRegexFromArray"; | ||
| import { escapeRegExp } from "lodash-es"; | ||
| import addMark from "../../../markers/addMarkSingleWord"; | ||
| import Mark from "../../../values/Mark"; | ||
| import { escapeRegExp } from "lodash-es"; | ||
| import getAnchorsFromText from "../link/getAnchorsFromText"; | ||
| import matchWords from "../match/matchTextWithArray"; | ||
| import arrayToRegex from "../regex/createRegexFromArray"; | ||
|
|
||
| // Regex to deconstruct an anchor into open tag, content and close tag. | ||
| const anchorDeconstructionRegex = /(<a[\s]+[^>]+>)(.+?)(<\/a>)/; | ||
|
|
||
| /** | ||
| * Deconstructs an anchor to the opening tag and the content. The content is the anchor text. | ||
| * We don't return the closing tag since the value would always be the same, i.e. </a>. | ||
| * | ||
| * @param {string} anchor An anchor of the shape <a ...>...</a>. | ||
| * | ||
| * @returns {object} An object containing the opening tag and the content. | ||
| */ | ||
| export const deConstructAnchor = function( anchor ) { | ||
| // The const array mirrors the anchorDeconstructionRegex, using a comma to access the first element without a name. | ||
| const [ , openTag, content ] = anchor.match( anchorDeconstructionRegex ); | ||
| return { | ||
| openTag: openTag, | ||
| content: content, | ||
| }; | ||
| }; | ||
|
|
||
| /** | ||
| * Reconstructs an anchor from an openTag, the content, and the closing tag. | ||
| * | ||
| * @param {string} openTag The opening tag of the anchor. Must be of the shape <a ...>. | ||
| * @param {string} content The text of the anchor. | ||
| * | ||
| * @returns {string} An anchor. | ||
| */ | ||
| export const reConstructAnchor = function( openTag, content ) { | ||
| return `${openTag}${content}</a>`; | ||
| }; | ||
|
|
||
|
|
||
| /** | ||
| * Gets the anchors and marks the anchors' text if the words are found in it. | ||
| * | ||
| * @param {string} sentence The sentence to retrieve the anchors from. | ||
| * @param {RegExp} wordsRegex The regex of the words. | ||
| * | ||
| * @returns {Object} The anchors and the marked anchors. | ||
| */ | ||
| const getMarkedAnchors = function( sentence, wordsRegex ) { | ||
| // Retrieve the anchors. | ||
| const anchors = getAnchorsFromText( sentence ); | ||
| // For every anchor, apply the markings only to the anchor tag. | ||
| const markedAnchors = anchors.map( anchor => { | ||
| // Retrieve the open tag and the content/anchor text. | ||
| const { openTag, content } = deConstructAnchor( anchor ); | ||
|
|
||
| // Apply the marking to the anchor text if there is a match. | ||
| const markedAnchorText = content.replace( wordsRegex, ( x ) => addMark( x ) ); | ||
|
|
||
| // Create a new anchor tag with a (marked) anchor text. | ||
| return reConstructAnchor( openTag, markedAnchorText ); | ||
| } ); | ||
|
|
||
| return { anchors, markedAnchors }; | ||
| }; | ||
|
|
||
| /** | ||
| * Adds marks to a sentence and merges marks if those are only separated by a space | ||
| * (e.g., if highlighting words "ballet" and "shoes" in a sentence "I have a lot of ballet shoes and other paraphernalia." | ||
| * the marks will be put around "ballet shoes" together, not "`ballet` `shoes`".) | ||
| * | ||
| * @param {string} sentence The sentence to mark words in. | ||
| * @param {[string]} topicFoundInSentence The words to mark in the sentence. | ||
| * @param {[string]} wordsFoundInSentence The words to mark in the sentence. | ||
| * @param {function} matchWordCustomHelper The language-specific helper function to match word in text. | ||
| * | ||
| * @returns {string} The sentence with marks. | ||
| */ | ||
| export const collectMarkingsInSentence = function( sentence, topicFoundInSentence, matchWordCustomHelper ) { | ||
| topicFoundInSentence = topicFoundInSentence.map( word => escapeRegExp( word ) ); | ||
| export const collectMarkingsInSentence = function( sentence, wordsFoundInSentence, matchWordCustomHelper ) { | ||
| wordsFoundInSentence = wordsFoundInSentence.map( word => escapeRegExp( word ) ); | ||
| // If a language has a custom helper to match words, we disable the word boundary when creating the regex. | ||
| const topicRegex = matchWordCustomHelper ? arrayToRegex( topicFoundInSentence, true ) : arrayToRegex( topicFoundInSentence ); | ||
| const markup = sentence.replace( topicRegex, function( x ) { | ||
| const wordsRegex = matchWordCustomHelper ? arrayToRegex( wordsFoundInSentence, true ) : arrayToRegex( wordsFoundInSentence ); | ||
|
|
||
| // Retrieve the anchors and mark the anchors' text if the words are found in the anchors' text. | ||
| const { anchors, markedAnchors } = getMarkedAnchors( sentence, wordsRegex ); | ||
|
|
||
| let markup = sentence.replace( wordsRegex, function( x ) { | ||
| return addMark( x ); | ||
| } ); | ||
|
|
||
| /** | ||
| * In 'markup', we apply the markings also inside the anchor's attribute if there is a match, on top of | ||
| * marking the anchor's text. | ||
| * The step below is to replace the incorrectly marked anchors with the marked anchors that we want: | ||
| * where the markings are only applied in the anchor's text. | ||
| */ | ||
| if ( anchors.length > 0 ) { | ||
| const markupAnchors = getAnchorsFromText( markup ); | ||
| for ( let i = 0; i < markupAnchors.length; i++ ) { | ||
| markup = markup.replace( markupAnchors[ i ], markedAnchors[ i ] ); | ||
| } | ||
| } | ||
|
Comment on lines
+90
to
+101
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The approach is as follows:
This approach works, but I have to admit that it's a little bit convoluted.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @agnieszkaszuba 's suggestion for improvement for this approach:
The advantage of this approach is that we don’t need to mark the text between alt tags twice, like it’s currently done in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We tried to implement the above suggestion. However, the third step "Replace the tags in the marked text with the original tags" proved to be tricky. And we couldn't formulate a regex that can detect anchor opening tag that includes the Hence, we decided to use the current approach. |
||
|
|
||
| /* | ||
| * If two marks are separated by only a space, remove the closing tag of the first mark and the opening tag of the | ||
| * second mark so that the two marks can be combined into one. | ||
| */ | ||
| return ( markup.replace( new RegExp( "</yoastmark> <yoastmark class='yoast-text-mark'>", "ig" ), " " ) ); | ||
| }; | ||
|
|
||
|
|
@@ -37,16 +118,16 @@ export const collectMarkingsInSentence = function( sentence, topicFoundInSentenc | |
| * @returns {[string]} The sentences with marks. | ||
| */ | ||
| export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) { | ||
| let topicFoundInSentence = []; | ||
| let wordsFoundInSentence = []; | ||
| let markings = []; | ||
|
|
||
| sentences.forEach( function( sentence ) { | ||
| topicFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; | ||
| wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; | ||
|
|
||
| if ( topicFoundInSentence.length > 0 ) { | ||
| if ( wordsFoundInSentence.length > 0 ) { | ||
| markings = markings.concat( new Mark( { | ||
| original: sentence, | ||
| marked: collectMarkingsInSentence( sentence, topicFoundInSentence, matchWordCustomHelper ), | ||
| marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), | ||
| } ) ); | ||
| } | ||
| } ); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Classic editor uses double quotes for HTML attribute values. However, in
yoastseo, we use single quotes for the attribute values when we create the marked object. As the result, the replacement did not work, as the marks passed byyoastseodid not match anything in the original text.This step is replacing the single quotes in the marked object output by
yoastseowith double quotes. This way, we make sure that the replacement can find a match between the original text of the marked object and the text in the page.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
yoastseo, single quotes are used because Block editor uses single quotes for HTML tag attributes.