Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ const expectedResults = {
textLength: {
isApplicable: true,
score: 9,
resultText: "<a href='https://yoa.st/34n' target='_blank'>Text length</a>: The text contains 3181 characters. Good job!",
resultText: "<a href='https://yoa.st/34n' target='_blank'>Text length</a>: The text contains 3165 characters. Good job!",
},
externalLinks: {
isApplicable: true,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { doesWordMatchRegex } from "../../../../../src/languageProcessing/helpers/morphology/regexHelpers";
import countCharactersFunction from "../../../../../src/languageProcessing/languages/ja/helpers/countCharacters.js";

describe( "counts characters in a string", function() {
Expand All @@ -6,6 +7,11 @@ describe( "counts characters in a string", function() {
"高速運転が可能な標準軌新線を建設することを決定。1959年(昭和34年)4月20日、新丹那トンネル熱海口で起工式を行って着工し、" +
"東京オリンピック開会直前の1964年(昭和39年)10月1日に開業した。" ) ).toBe( 136 );
} );
it( "returns the number of characters not including URL characters in the count", function() {
expect( countCharactersFunction( "www.yoast.comこれに対し日本国有鉄道(国鉄)は、十河信二国鉄総裁と技師長の島秀雄の下、" +
"高速運転が可能な標準軌新線を建設することを決定。1959年(昭和34年)4月20日、新丹那トンネル熱海口で起工式を行って着工し、" +
"東京オリンピック開会直前の1964年(昭和39年)10月1日に開業した。" ) ).toBe( 136 );
} );
it( "makes sure the countCharacters function still works when the input is a non-Japanese string", function() {
expect( countCharactersFunction( "this is a string" ) ).toBe( 16 );
expect( countCharactersFunction( "Низът в компютърните науки е крайна поредица от символи " +
Expand All @@ -30,7 +36,34 @@ describe( "counts characters in a string", function() {
"<p>戦後においては一般に広義の<a href=\"https://ja.wikipedia.org/wiki/%E7%AB%A5%E8%AC%A1\">童謡</a>にカテゴライズされる本作品は、" +
"初出本の題名にもあるとおり青木自身は「唱歌」であるとし、「学校や家庭で」歌ってもらえれば本懐であるとしている。しかし発表当時の教育現場では、" +
"本作品を歌うことは原則上はできなかった。</p>";
expect( countCharactersFunction( text ) ).toBe( 757 );
} );
it( "makes sure that no charachters are counted when a URL is embedded in video tags", function() {
const text = "<!-- wp:embed {\"url\":\"https://www.youtube.com/watch?v=cbP2N1BQdYc\",\"type\":\"video\"," +
"\"providerNameSlug\":\"youtube\",\"responsive\":true,\"className\":\"wp-embed-aspect-16-9 wp-has-aspect-ratio\"} -->\n" +
"\t\t\t<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect" +
"-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n" +
"\t\t\t\thttps://www.youtube.com/watch?v=cbP2N1BQdYc\n" +
"\t\t\t</div></figure><!-- /wp:embed -->";
expect( countCharactersFunction( text ) ).toBe( 0 );
} );
} );

expect( countCharactersFunction( text ) ).toBe( 760 );
describe( "A test to return a regex match for URLs", () => {
it( "Returns true if there is a match against a URL starting with www", () => {
// eslint-disable-next-line max-len
expect( doesWordMatchRegex( "www.yoast.com", "(http(s)?:\\/\\/.)?(www\\.|ftp:\\/\\/)?[-a-zA-Z0-9@:%._\\/+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\/+.~#?&()=]*)" ) ).toEqual( true );
} );
it( "Returns true if there is a match against a URL starting with https", () => {
// eslint-disable-next-line max-len
expect( doesWordMatchRegex( "https://www.codecademy.com/learn/hello", "(http(s)?:\\/\\/.)?(www\\.|ftp:\\/\\/)?[-a-zA-Z0-9@:%._\\/+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\/+.~#?&()=]*)" ) ).toEqual( true );
} );
it( "Returns true if there is a match against a URL starting with http", () => {
// eslint-disable-next-line max-len
expect( doesWordMatchRegex( "http://foo.com/blah_blah/", "(http(s)?:\\/\\/.)?(www\\.|ftp:\\/\\/)?[-a-zA-Z0-9@:%._\\/+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\/+.~#?&()=]*)" ) ).toEqual( true );
} );
it( "Returns false if there is no match", () => {
// eslint-disable-next-line max-len
expect( doesWordMatchRegex( "My cat is sweet!", "(http(s)?:\\/\\/.)?(www\\.|ftp:\\/\\/)?[-a-zA-Z0-9@:%._\\/+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\/+.~#?&()=]*)" ) ).toEqual( false );
} );
} );
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,11 @@ export default function( text ) {
// Remove first/last character if space
text = text.replace( /^\s+|\s+$/g, "" );

// Replace spaces followed by Japanese periods with only the period.
text = text.replace( /\s。/g, "。" );

// Replace spaces after Japanese periods with only the period.
text = text.replace( /。\s/g, "。" );

return text;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ const { sanitizeString } = languageProcessing;
* @returns {int} The word count of the given text.
*/
export default function( text ) {
// This regex is used to match URLs in the text, either embedded in tags or not, so that they are excluded from the characters count.
// eslint-disable-next-line max-len
const urlRegex = new RegExp( "(http(s)?:\\/\\/.)?(www\\.|ftp:\\/\\/)?[-a-zA-Z0-9@:%._\\/+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\/+.~#?&()=]*)", "igm" );
text = text.replace( urlRegex, "" );
text = sanitizeString( text );

return text.length;
Expand Down