wordpress-seo/packages/yoastseo/spec/languageProcessing/researches/getProminentWordsForInternalLinkingSpec.js at 8ccdf721ff8521f5777dedd1b86f5515a3344fb2 · Yoast/wordpress-seo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
import prominentWordsResearch from "../../../src/languageProcessing/researches/getProminentWordsForInternalLinking";
import Paper from "../../../src/values/Paper";
import Researcher from "../../../src/languageProcessing/languages/en/Researcher";
import CatalanResearcher from "../../../src/languageProcessing/languages/ca/Researcher";
import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher";
import ProminentWord from "../../../src/languageProcessing/values/ProminentWord";
import getMorphologyData from "../../specHelpers/getMorphologyData";


const morphologyData = getMorphologyData( "en" );
const morphologyDataJA = getMorphologyData( "ja" );

describe( "relevantWords research", function() {
	it( "returns no prominent words for texts under 100 words", function() {
		const paper = new Paper( "texte et texte et texte et texte" );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const expected = {
			prominentWords: [],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns prominent words for texts with more than 300 words", function() {
		const paper = new Paper( "texte" + " et texte".repeat( 180 ), { title: "Title" } );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const expected = {
			prominentWords: [
				new ProminentWord( "texte", "texte", 181 ),
				new ProminentWord( "et", "et", 180 ),
			],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "does not break if no morphology support is added for the language " +
		"and does not filter function words if the list is not available", function() {
		const paper = new Paper( "texte " + " et texte".repeat( 399 ), { locale: "ca" } );

		const researcher = new CatalanResearcher( paper );

		const expected = {
			prominentWords: [ new ProminentWord( "texte", "texte", 400 ), new ProminentWord( "et", "et", 399 ) ],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns relevant words from the text alone if no attributes are available", function() {
		const paper = new Paper( ( "Here are a ton of syllables. Syllables are very important. I think the syllable " +
			"combinations are even more important. Syllable combinations for the win! Combinations are awesome. " +
			"So many combinations! " ).repeat( 15 ) );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const expected = {
			prominentWords: [
				new ProminentWord( "combinations", "combination", 60 ),
				new ProminentWord( "syllable", "syllable", 60 ),
				new ProminentWord( "win", "win", 15 ),
			],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "combines data from the text and from the paper attributes", function() {
		const paper = new Paper( ( "As we announced at YoastCon, we’re working together with Bing and Google to allow live indexing for " +
			"everyone who uses Yoast SEO — free and premium. " +
			"<h2>Subheading!</h2>" +
			"In an update currently planned for the end of March, we’ll " +
			"allow users to connect their sites to MyYoast, our customer portal. After that we’ll roll out live indexing, " +
			"which means every time you publish, update, or delete a post, that will be reflected almost instantly into " +
			"Bing and Google’s indices. How does this work? When you connect your site to MyYoast... " ).repeat( 6 ), {
			keyword: "live indexing Yoast SEO",
			synonyms: "live index",
			title: "Amazing title",
			description: "Awesome metadescription",
			locale: "en_EN",
		} );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		/*
		 *  The research considers relevant words coming from paper attributes 3 times more important than those coming
		 *  from the text of the paper. Therefore, the final number of occurrences can be calculated as
		 *  number_of_occurrences_in_text + 3 * number_of_occurrences_in_paper_attributes.
		 */
		const expected = {
			prominentWords: [
				/*
				*  The stem "index" occurs 18 times in the text ("indexing", "indexing" and "indices") and 2 times in the
				*  attributes ("indexing" and "index"): 18 + 2 * 3 = 24
				*/
				new ProminentWord( "index", "index", 24 ),
				new ProminentWord( "live", "live", 18 ),
				new ProminentWord( "subheading", "subhead", 18 ),
				new ProminentWord( "allow", "allow", 12 ),
				new ProminentWord( "bing", "bing", 12 ),
				new ProminentWord( "connect", "connect", 12 ),
				new ProminentWord( "google", "google", 12 ),
				new ProminentWord( "myyoast", "myyoast", 12 ),
				new ProminentWord( "site", "site", 12 ),
				new ProminentWord( "update", "update", 12 ),
				new ProminentWord( "work", "work", 12 ),
				new ProminentWord( "SEO", "seo", 9 ),
				new ProminentWord( "yoast", "yoast", 9 ),
				new ProminentWord( "customer", "custome", 6 ),
				new ProminentWord( "delete", "delete", 6 ),
				new ProminentWord( "end", "end", 6 ),
				new ProminentWord( "free", "free", 6 ),
				new ProminentWord( "march", "march", 6 ),
				new ProminentWord( "planned", "plan", 6 ),
				new ProminentWord( "portal", "portal", 6 ),
				new ProminentWord( "post", "post", 6 ),
				new ProminentWord( "premium", "premium", 6 ),
				new ProminentWord( "publish", "publish", 6 ),
				new ProminentWord( "reflected", "reflect", 6 ),
				new ProminentWord( "roll", "roll", 6 ),
				new ProminentWord( "time", "time", 6 ),
				new ProminentWord( "together", "together", 6 ),
				new ProminentWord( "uses", "use", 6 ),
				new ProminentWord( "users", "user", 6 ),
				new ProminentWord( "yoastcon", "yoastcon", 6 ),
			],
			hasMetaDescription: true,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "lowers the prominent words occurrence threshold if a language does not have morphology support (English in Free)", function() {
		const paper = new Paper( ( "Romeo and Juliet borrows from a tradition of tragic love stories dating back to antiquity. " +
								   "One of these is Pyramus and Thisbe, from Ovid's Metamorphoses, which contains parallels " +
								   "to Shakespeare's story: the lovers' parents despise each other, " +
								   "and Pyramus falsely believes his lover Thisbe is dead. " +
								   "The Ephesiaca of Xenophon of Ephesus, written in the 3rd century, also contains several similarities " +
								   "to the play, including the separation of the lovers, and a potion that induces a deathlike sleep." +
								   "One of the earliest references to the names Montague and Capulet is from Dante's Divine Comedy, " +
								   "who mentions the Montecchi (Montagues) and Capulets." +
								   "Romeo and Juliet. " +
								   "Romeo and Juliet. " +
								   "Romeo and Juliet. " ) );

		const researcher = new Researcher( paper );

		const expected = {
			prominentWords: [
				new ProminentWord( "juliet", "juliet", 4 ),
				new ProminentWord( "romeo", "romeo", 3 ),
				new ProminentWord( "lovers", "lovers", 2 ),
				new ProminentWord( "pyramus", "pyramus", 2 ),
				new ProminentWord( "thisbe", "thisbe", 2 ),
			],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "sets the prominent words occurrence threshold to 4 if a language does have morphology support ", function() {
		const paper = new Paper( ( "Romeo and Juliet borrows from a tradition of tragic love stories dating back to antiquity. " +
								   "One of these is Pyramus and Thisbe, from Ovid's Metamorphoses, which contains parallels " +
								   "to Shakespeare's story: the lovers' parents despise each other, " +
								   "and Pyramus falsely believes his lover Thisbe is dead. " +
								   "The Ephesiaca of Xenophon of Ephesus, written in the 3rd century, also contains several similarities " +
								   "to the play, including the separation of the lovers, and a potion that induces a deathlike sleep." +
								   "One of the earliest references to the names Montague and Capulet is from Dante's Divine Comedy, " +
								   "who mentions the Montecchi (Montagues) and Capulets." +
								   "Romeo and Juliet. " +
								   "Romeo and Juliet. " +
								   "Romeo and Juliet. " ) );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const expected = {
			prominentWords: [
				new ProminentWord( "juliet", "juliet", 4 ),
			],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );
} );

describe( "test for prominent words research for languages that have custom helpers", function() {
	// Japanese has custom helpers for getting words from the text, for counting text length
	// And for returning custom function to return the stem of a word.
	it( "returns no prominent words for texts under 200 characters", function() {
		const paper = new Paper( "東海道新幹線の開業前、東西の大動脈である東海道本線は高度経済成長下で線路容量が逼迫しており、抜本的な輸送力増強を迫られていた。" );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = {
			prominentWords: [],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns prominent words for texts with more than 300 words, in which the morphology data is available", function() {
		const paper = new Paper( "私の美しい猫" + "の美しい猫".repeat( 180 ), { title: "題名" } );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = {
			prominentWords: [
				new ProminentWord( "猫", "猫", 181 ),
				new ProminentWord( "美しい", "美しい", 181 ),
			],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns relevant words from the text alone if no attributes are available", function() {
		const paper = new Paper( ( "私の甘い猫は愛撫されるのが大好きです。猫はおやつが大好きです。" ).repeat( 100 ) );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = {
			prominentWords: [
				new ProminentWord( "大好き", "大好い", 200 ),
				new ProminentWord( "おやつが", "おやつい", 100 ),
				new ProminentWord( "愛撫", "愛撫", 100 ),
				new ProminentWord( "猫", "猫", 100 ),
				new ProminentWord( "甘い猫", "甘い猫", 100 ),
			],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "combines data from the text and from the paper attributes", function() {
		const paper = new Paper( ( "ネコ（猫）は、狭義には食肉目ネコ科ネコ属に分類されるリビアヤマネコ（ヨーロッパヤマネコ）が家畜化されたイエネコ（家猫、Felis silvestris catus）" +
			"に対する通称である。イヌ（犬）と並ぶ代表的なペットとして日本を含め世界中で広く飼われている。より広義には、ヤマネコやネコ科動物全般を指すこともある（後述）。" +
			"<h2>猫の種類</h2>" +
			"と同様、トラやライオンなどといった大型種を含む全てのネコ科動物を指すことがある。以下、本項では特記なき限りネコ=イエネコとして解説する。" ).repeat( 6 ), {
			keyword: "猫の種類",
			synonyms: "猫は繁殖します",
			title: "猫とその種類",
			description: "イエネコは、形態学的分析を主とする伝統的な生物学的知見によって、以前からヨーロッパヤマネコの亜種リビアヤマネコ Felis silvestris lybicaが原種とされてきた。" +
				"20世紀後半から発展した分子系統学などによる新たな知見も、従来説を裏付ける形となった。",
			locale: "ja",
		} );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = {
			prominentWords: [
				new ProminentWord( "猫", "猫", 33 ),
				new ProminentWord( "ネコ", "ネコ", 30 ),
				new ProminentWord( "種類", "種類", 24 ),
				new ProminentWord( "イエネコ", "イエネコ", 15 ),
				new ProminentWord( "として", "とした", 12 ),
				new ProminentWord( "指す", "指さ", 12 ),
				new ProminentWord( "felis", "felis", 9 ),
				new ProminentWord( "silvestris", "silvestris", 9 ),
				new ProminentWord( "ヨーロッパヤマネコ", "ヨーロッパヤマネコ", 9 ),
				new ProminentWord( "リビアヤマネコ", "リビアヤマネコ", 9 ),
				new ProminentWord( "catus", "catus", 6 ),
				new ProminentWord( "いっ", "いい", 6 ),
				new ProminentWord( "イヌ", "イヌ", 6 ),
				new ProminentWord( "トラ", "トラ", 6 ),
				new ProminentWord( "なき", "ない", 6 ),
				new ProminentWord( "ペット", "ペット", 6 ),
				new ProminentWord( "ヤマネコ", "ヤマネコ", 6 ),
				new ProminentWord( "ライオン", "ライオン", 6 ),
				new ProminentWord( "世界", "世界", 6 ),
				new ProminentWord( "並ぶ", "並ば", 6 ),
				new ProminentWord( "代表", "代表", 6 ),
				new ProminentWord( "以下", "以下", 6 ),
				new ProminentWord( "分類", "分類", 6 ),
				new ProminentWord( "化", "化", 6 ),
				new ProminentWord( "同様", "同様", 6 ),
				new ProminentWord( "含め", "含ま", 6 ),
				new ProminentWord( "含む全て", "含む全た", 6 ),
				new ProminentWord( "大型", "大型", 6 ),
				new ProminentWord( "家猫", "家猫", 6 ),
				new ProminentWord( "家畜", "家畜", 6 ),
				new ProminentWord( "属", "属", 6 ),
				new ProminentWord( "広く", "広い", 6 ),
				new ProminentWord( "広義", "広義", 6 ),
				new ProminentWord( "後述", "後述", 6 ),
				new ProminentWord( "日本", "日本", 6 ),
				new ProminentWord( "本項", "本項", 6 ),
				new ProminentWord( "物全", "物全", 6 ),
				new ProminentWord( "特記", "特記", 6 ),
				new ProminentWord( "犬", "犬", 6 ),
				new ProminentWord( "狭義", "狭義", 6 ),
				new ProminentWord( "目", "目", 6 ),
				new ProminentWord( "知見", "知見", 6 ),
				new ProminentWord( "科ネコ", "科ネコ", 6 ),
				new ProminentWord( "科動", "科動", 6 ),
				new ProminentWord( "科動物", "科動物", 6 ),
				new ProminentWord( "種", "種", 6 ),
				new ProminentWord( "般", "般", 6 ),
				new ProminentWord( "解説", "解説", 6 ),
				new ProminentWord( "通称", "通称", 6 ),
				new ProminentWord( "限り", "限っ", 6 ),
				new ProminentWord( "食肉", "食肉", 6 ),
				new ProminentWord( "飼わ", "飼い", 6 ),
			],
			hasMetaDescription: true,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns prominent words for texts with more than 300 words, in which the morphology data is not available", function() {
		const paper = new Paper( "私の美しい猫" + "の美しい猫".repeat( 180 ), { title: "題名" } );

		const researcher = new JapaneseResearcher( paper );

		const expected = {
			prominentWords: [
				new ProminentWord( "猫", "猫", 181 ),
				new ProminentWord( "美しい", "美しい", 181 ),
				new ProminentWord( "題名", "題名", 3 ),
			],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "does not count URLs and email addresses as prominent words", function() {
		const paper = new Paper( "http://blog.example.com/examples ".repeat( 180 ) + "example@something.com ".repeat( 180 ),
			{ title: "example@something.com example@something.com example@something.com" } );

		const researcher = new Researcher( paper );

		const expected = {
			prominentWords: [],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "counts domain names as prominent words", function() {
		const paper = new Paper( "yoast.com ".repeat( 180 ) );

		const researcher = new Researcher( paper );

		const expected = {
			prominentWords: [ new ProminentWord( "yoast.com", "yoast.com", 180 ) ],
			hasMetaDescription: false,
			hasTitle: false,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "does not return prominent words when the text is longer than 100 words including URLs and emails, but shorter" +
		"than 100 words when they are excluded", function() {
		const paper = new Paper( "http://blog.example.com/examples ".repeat( 180 ) + "example@something.com ".repeat( 180 ) +
			" cats".repeat( 50 ), { title: "example@something.com example@something.com example@something.com" } );

		const researcher = new Researcher( paper );

		const expected = {
			prominentWords: [],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns prominent words when the text is longer than 100 words after excluding URLs and emails", function() {
		const paper = new Paper( "http://blog.example.com/examples ".repeat( 180 ) + "example@something.com ".repeat( 180 ) +
			" cats".repeat( 101 ), { title: "example@something.com example@something.com example@something.com" } );

		const researcher = new Researcher( paper );

		const expected = {
			prominentWords: [ new ProminentWord( "cats", "cats", 101 ) ],
			hasMetaDescription: false,
			hasTitle: true,
		};

		const words = prominentWordsResearch( paper, researcher );

		expect( words ).toEqual( expected );
	} );
} );