wordpress-seo/packages/yoastseo/spec/languageProcessing/researches/getProminentWordsForInsightsSpec.js at 8ccdf721ff8521f5777dedd1b86f5515a3344fb2 · Yoast/wordpress-seo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher";
import getProminentWordsForInsights from "../../../src/languageProcessing/researches/getProminentWordsForInsights";
import Paper from "../../../src/values/Paper";
import Researcher from "../../../src/languageProcessing/languages/en/Researcher";
import CatalanResearcher from "../../../src/languageProcessing/languages/ca/Researcher";
import ProminentWord from "../../../src/languageProcessing/values/ProminentWord";
import getMorphologyData from "../../specHelpers/getMorphologyData";

const morphologyData = getMorphologyData( "en" );
const morphologyDataJA = getMorphologyData( "ja" );

describe( "getProminentWordsForInsights research", function() {
	it( "does not break if no morphology support is available for the language", function() {
		const paper = new Paper( "texte  et texte et texte et texte et texte", { locale: "ca" } );

		const researcher = new CatalanResearcher( paper );

		const expected = [
			new ProminentWord( "texte", "texte", 5 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns insights from the text alone (not attributes)", function() {
		const paper = new Paper( "Here are a ton of syllables. Syllables are very important. I think the syllable " +
			"combinations are even more important. Syllable combinations for the win! Here are a ton of syllables. " +
			"Syllables are very important. I think the syllable combinations are even more important. " +
			"I say I think the syllable combinations are even more important. Syllable combinations for the win!", { keyword: "hahahahahaha" } );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const expected = [
			new ProminentWord( "syllable", "syllable", 9 ),
			new ProminentWord( "combinations", "combination", 5 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "does not return words that were used less than 5 times", function() {
		const paper = new Paper( "As we announced at YoastCon, we’re working together with Bing and Google to allow live indexing for " +
			"everyone who uses Yoast SEO — free and premium. <h2>Subheading!</h2>In an update currently planned for the end of March, we'll " +
			"allow users to connect their sites to MyYoast, our customer portal. After that we'll roll out live indexing, " +
			"which means every time you publish, update, or delete a post, that will be reflected almost instantly into " +
			"Bing and Google’s indices. How does this work? When you connect your site to MyYoast. " +
			"As we announced at YoastCon, we’re working together with Bing and Google to allow live indexing for " +
			"everyone who uses Yoast SEO — free and premium. <h2>Subheading!</h2>In an update currently planned for the end of March, we'll " +
			"allow users to connect their sites to MyYoast, our customer portal. After that we'll roll out live indexing, " +
			"which means every time you publish, update, or delete a post, that will be reflected almost instantly into " +
			"Bing and Google’s indices. How does this work? When you connect your site to MyYoast. " +
			"As we announced at YoastCon, we’re working together with Bing and Google to allow live indexing for " +
			"everyone who uses Yoast SEO — free and premium. <h2>Subheading!</h2>In an update currently planned for the end of March, we'll " +
			"allow users to connect their sites to MyYoast, our customer portal. After that we'll roll out live indexing, " +
			"which means every time you publish, update, or delete a post, that will be reflected almost instantly into " +
			"Bing and Google’s indices. How does this work? When you connect your site to MyYoast.", {
			keyword: "live indexing Yoast SEO",
			synonyms: "live index",
			title: "Amazing title",
			description: "Awesome metadescription",
			locale: "en_EN",
		} );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		/*
		 *  The research does not consider relevant words coming from paper attributes, only the text.
		 */
		const expected = [
			new ProminentWord( "indexing", "index", 9 ),
			new ProminentWord( "allow", "allow", 6 ),
			new ProminentWord( "bing", "bing", 6 ),
			new ProminentWord( "connect", "connect", 6 ),
			new ProminentWord( "google", "google", 6 ),
			new ProminentWord( "live", "live", 6 ),
			new ProminentWord( "myyoast", "myyoast", 6 ),
			new ProminentWord( "site", "site", 6 ),
			new ProminentWord( "update", "update", 6 ),
			new ProminentWord( "work", "work", 6 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );
} );

describe( "test for prominent words research for languages that have custom helpers", function() {
	// Japanese has custom helpers for getting words from the text, for counting text length
	// And for returning custom function to return the stem of a word.
	it( "returns no prominent words for texts under 200 characters with no words that occur more than 5 times", function() {
		const paper = new Paper( "東海道新幹線の開業前、東西の大動脈である東海道本線は高度経済成長下で線路容量が逼迫しており、抜本的な輸送力増強を迫られていた。" );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const words = getProminentWordsForInsights( paper, researcher );
		expect( words ).toEqual( [] );
	} );

	it( "returns no prominent words for texts under 200 characters with some words that occur more than 5 times", function() {
		const paper = new Paper( "美しい".repeat( 6 ) + "題名".repeat( 7 ) +
			"東海道新幹線の開業前猫、東西の大動脈である東海道本線は高度経済成長下で線路容量が逼迫しており猫。"  + "大好き".repeat( 10 ) );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = [
			new ProminentWord( "大好き", "大好い", 10 ),
			new ProminentWord( "題名", "題名", 7 ),
			new ProminentWord( "美しい", "美しい", 6 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns prominent words for texts with more than 300 characters, in which the morphology data is available", function() {
		const paper = new Paper( "私の美しい猫のおやつが猫".repeat( 180 ) );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = [
			new ProminentWord( "猫", "猫", 181 ),
			new ProminentWord( "美しい", "美しい", 180 ),
			new ProminentWord( "おやつ", "おやた", 179 ),
			new ProminentWord( "猫私", "猫私", 179 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );

	it( "returns relevant words from the text alone even when the attributes are available", function() {
		const paper = new Paper( ( "私の甘い猫は愛撫されるのが大好きです。猫はおやつが大好きです。" ).repeat( 100 ), { title: "題名", keyword: "題名", metadescription: "の美しい猫" }  );

		const researcher = new JapaneseResearcher( paper );
		researcher.addResearchData( "morphology", morphologyDataJA );

		const expected = [
			new ProminentWord( "大好き", "大好い", 200 ),
			new ProminentWord( "おやつが", "おやつい", 100 ),
			new ProminentWord( "愛撫", "愛撫", 100 ),
			new ProminentWord( "猫", "猫", 100 ),
			new ProminentWord( "甘い猫", "甘い猫", 100 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );


	it( "returns prominent words for texts with more than 300 words, in which the morphology data is not available", function() {
		const paper = new Paper( "私の美しい" + "のおやつが猫".repeat( 180 ), { title: "題名" } );

		const researcher = new JapaneseResearcher( paper );

		const expected = [
			new ProminentWord( "猫", "猫", 180 ),
			new ProminentWord( "おやつ", "おやつ", 179 ),
		];

		const words = getProminentWordsForInsights( paper, researcher );

		expect( words ).toEqual( expected );
	} );
} );

describe( "test for filtering out URLs and email addresses", function() {
	it( "does not include URLs in prominent words", function() {
		const paper = new Paper( "http://blog.example.com/examples ".repeat( 180 ) + "cats ".repeat( 50 ) );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const words = getProminentWordsForInsights( paper, researcher );
		expect( words ).toEqual( [
			new ProminentWord( "cats", "cat", 50 ),
		] );
	} );

	it( "does not include email addresses in prominent words", function() {
		const paper = new Paper( "example89@something.com ".repeat( 180 ) + "cats ".repeat( 50 ) );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const words = getProminentWordsForInsights( paper, researcher );
		expect( words ).toEqual( [
			new ProminentWord( "cats", "cat", 50 ),
		] );
	} );

	it( "includes domain names in prominent words", function() {
		const paper = new Paper( "example.com ".repeat( 180 ) + "cats ".repeat( 50 ) );

		const researcher = new Researcher( paper );
		researcher.addResearchData( "morphology", morphologyData );

		const words = getProminentWordsForInsights( paper, researcher );
		expect( words ).toEqual( [
			new ProminentWord( "example.com", "example.com", 180 ),
			new ProminentWord( "cats", "cat", 50 ),
		] );
	} );
} );