Skip to content

Commit b6d7726

Browse files
Gargronykzts
andauthored
Remove language detection through cld3 (mastodon#17478)
* Remove language detection through cld3 * Update app/helpers/languages_helper.rb Co-authored-by: Yamagishi Kazutoshi <ykzts@desire.sh> Co-authored-by: Yamagishi Kazutoshi <ykzts@desire.sh>
1 parent 85b86fe commit b6d7726

16 files changed

Lines changed: 238 additions & 348 deletions

File tree

Gemfile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ gem 'addressable', '~> 2.8'
2929
gem 'bootsnap', '~> 1.10.2', require: false
3030
gem 'browser'
3131
gem 'charlock_holmes', '~> 0.7.7'
32-
gem 'iso-639'
3332
gem 'chewy', '~> 7.2'
34-
gem 'cld3', '~> 3.4.4'
3533
gem 'devise', '~> 4.8'
3634
gem 'devise-two-factor', '~> 4.0'
3735

Gemfile.lock

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ GEM
152152
elasticsearch (>= 7.12.0, < 7.14.0)
153153
elasticsearch-dsl
154154
chunky_png (1.4.0)
155-
cld3 (3.4.4)
156-
ffi (>= 1.1.0, < 1.16.0)
157155
climate_control (0.2.0)
158156
coderay (1.1.3)
159157
color_diff (0.1)
@@ -301,7 +299,6 @@ GEM
301299
terminal-table (>= 1.5.1)
302300
idn-ruby (0.1.4)
303301
ipaddress (0.8.3)
304-
iso-639 (0.3.5)
305302
jmespath (1.5.0)
306303
json (2.5.1)
307304
json-canonicalization (0.3.0)
@@ -698,7 +695,6 @@ DEPENDENCIES
698695
capybara (~> 3.36)
699696
charlock_holmes (~> 0.7.7)
700697
chewy (~> 7.2)
701-
cld3 (~> 3.4.4)
702698
climate_control (~> 0.2)
703699
color_diff (~> 0.1)
704700
concurrent-ruby
@@ -725,7 +721,6 @@ DEPENDENCIES
725721
httplog (~> 1.5.0)
726722
i18n-tasks (~> 0.9)
727723
idn-ruby
728-
iso-639
729724
json-ld
730725
json-ld-preloaded (~> 3.2)
731726
kaminari (~> 1.2)

app/helpers/languages_helper.rb

Lines changed: 217 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,237 @@
11
# frozen_string_literal: true
22

33
module LanguagesHelper
4-
HUMAN_LOCALES = {
5-
af: 'Afrikaans',
6-
ar: 'العربية',
7-
ast: 'Asturianu',
8-
bg: 'Български',
9-
bn: 'বাংলা',
10-
br: 'Breton',
11-
ca: 'Català',
12-
co: 'Corsu',
13-
cs: 'Čeština',
14-
cy: 'Cymraeg',
15-
da: 'Dansk',
16-
de: 'Deutsch',
17-
el: 'Ελληνικά',
18-
en: 'English',
19-
eo: 'Esperanto',
4+
ISO_639_1 = {
5+
aa: ['Afar', 'Afaraf'].freeze,
6+
ab: ['Abkhaz', 'аҧсуа бызшәа'].freeze,
7+
ae: ['Avestan', 'avesta'].freeze,
8+
af: ['Afrikaans', 'Afrikaans'].freeze,
9+
ak: ['Akan', 'Akan'].freeze,
10+
am: ['Amharic', 'አማርኛ'].freeze,
11+
an: ['Aragonese', 'aragonés'].freeze,
12+
ar: ['Arabic', 'اللغة العربية'].freeze,
13+
as: ['Assamese', 'অসমীয়া'].freeze,
14+
av: ['Avaric', 'авар мацӀ'].freeze,
15+
ay: ['Aymara', 'aymar aru'].freeze,
16+
az: ['Azerbaijani', 'azərbaycan dili'].freeze,
17+
ba: ['Bashkir', 'башҡорт теле'].freeze,
18+
be: ['Belarusian', 'беларуская мова'].freeze,
19+
bg: ['Bulgarian', 'български език'].freeze,
20+
bh: ['Bihari', 'भोजपुरी'].freeze,
21+
bi: ['Bislama', 'Bislama'].freeze,
22+
bm: ['Bambara', 'bamanankan'].freeze,
23+
bn: ['Bengali', 'বাংলা'].freeze,
24+
bo: ['Tibetan', 'བོད་ཡིག'].freeze,
25+
br: ['Breton', 'brezhoneg'].freeze,
26+
bs: ['Bosnian', 'bosanski jezik'].freeze,
27+
ca: ['Catalan', 'Català'].freeze,
28+
ce: ['Chechen', 'нохчийн мотт'].freeze,
29+
ch: ['Chamorro', 'Chamoru'].freeze,
30+
co: ['Corsican', 'corsu'].freeze,
31+
cr: ['Cree', 'ᓀᐦᐃᔭᐍᐏᐣ'].freeze,
32+
cs: ['Czech', 'čeština'].freeze,
33+
cu: ['Old Church Slavonic', 'ѩзыкъ словѣньскъ'].freeze,
34+
cv: ['Chuvash', 'чӑваш чӗлхи'].freeze,
35+
cy: ['Welsh', 'Cymraeg'].freeze,
36+
da: ['Danish', 'dansk'].freeze,
37+
de: ['German', 'Deutsch'].freeze,
38+
dv: ['Divehi', 'Dhivehi'].freeze,
39+
dz: ['Dzongkha', 'རྫོང་ཁ'].freeze,
40+
ee: ['Ewe', 'Eʋegbe'].freeze,
41+
el: ['Greek', 'Ελληνικά'].freeze,
42+
en: ['English', 'English'].freeze,
43+
eo: ['Esperanto', 'Esperanto'].freeze,
44+
es: ['Spanish', 'Español'].freeze,
45+
et: ['Estonian', 'eesti'].freeze,
46+
eu: ['Basque', 'euskara'].freeze,
47+
fa: ['Persian', 'فارسی'].freeze,
48+
ff: ['Fula', 'Fulfulde'].freeze,
49+
fi: ['Finnish', 'suomi'].freeze,
50+
fj: ['Fijian', 'Vakaviti'].freeze,
51+
fo: ['Faroese', 'føroyskt'].freeze,
52+
fr: ['French', 'Français'].freeze,
53+
fy: ['Western Frisian', 'Frysk'].freeze,
54+
ga: ['Irish', 'Gaeilge'].freeze,
55+
gd: ['Scottish Gaelic', 'Gàidhlig'].freeze,
56+
gl: ['Galician', 'galego'].freeze,
57+
gu: ['Gujarati', 'ગુજરાતી'].freeze,
58+
gv: ['Manx', 'Gaelg'].freeze,
59+
ha: ['Hausa', 'هَوُسَ'].freeze,
60+
he: ['Hebrew', 'עברית'].freeze,
61+
hi: ['Hindi', 'हिन्दी'].freeze,
62+
ho: ['Hiri Motu', 'Hiri Motu'].freeze,
63+
hr: ['Croatian', 'Hrvatski'].freeze,
64+
ht: ['Haitian', 'Kreyòl ayisyen'].freeze,
65+
hu: ['Hungarian', 'magyar'].freeze,
66+
hy: ['Armenian', 'Հայերեն'].freeze,
67+
hz: ['Herero', 'Otjiherero'].freeze,
68+
ia: ['Interlingua', 'Interlingua'].freeze,
69+
id: ['Indonesian', 'Bahasa Indonesia'].freeze,
70+
ie: ['Interlingue', 'Interlingue'].freeze,
71+
ig: ['Igbo', 'Asụsụ Igbo'].freeze,
72+
ii: ['Nuosu', 'ꆈꌠ꒿ Nuosuhxop'].freeze,
73+
ik: ['Inupiaq', 'Iñupiaq'].freeze,
74+
io: ['Ido', 'Ido'].freeze,
75+
is: ['Icelandic', 'Íslenska'].freeze,
76+
it: ['Italian', 'Italiano'].freeze,
77+
iu: ['Inuktitut', 'ᐃᓄᒃᑎᑐᑦ'].freeze,
78+
ja: ['Japanese', '日本語'].freeze,
79+
jv: ['Javanese', 'basa Jawa'].freeze,
80+
ka: ['Georgian', 'ქართული'].freeze,
81+
kg: ['Kongo', 'Kikongo'].freeze,
82+
ki: ['Kikuyu', 'Gĩkũyũ'].freeze,
83+
kj: ['Kwanyama', 'Kuanyama'].freeze,
84+
kk: ['Kazakh', 'қазақ тілі'].freeze,
85+
kl: ['Kalaallisut', 'kalaallisut'].freeze,
86+
km: ['Khmer', 'ខេមរភាសា'].freeze,
87+
kn: ['Kannada', 'ಕನ್ನಡ'].freeze,
88+
ko: ['Korean', '한국어'].freeze,
89+
kr: ['Kanuri', 'Kanuri'].freeze,
90+
ks: ['Kashmiri', 'कश्मीरी'].freeze,
91+
ku: ['Kurdish', 'Kurdî'].freeze,
92+
kv: ['Komi', 'коми кыв'].freeze,
93+
kw: ['Cornish', 'Kernewek'].freeze,
94+
ky: ['Kyrgyz', 'Кыргызча'].freeze,
95+
la: ['Latin', 'latine'].freeze,
96+
lb: ['Luxembourgish', 'Lëtzebuergesch'].freeze,
97+
lg: ['Ganda', 'Luganda'].freeze,
98+
li: ['Limburgish', 'Limburgs'].freeze,
99+
ln: ['Lingala', 'Lingála'].freeze,
100+
lo: ['Lao', 'ພາສາ'].freeze,
101+
lt: ['Lithuanian', 'lietuvių kalba'].freeze,
102+
lu: ['Luba-Katanga', 'Tshiluba'].freeze,
103+
lv: ['Latvian', 'latviešu valoda'].freeze,
104+
mg: ['Malagasy', 'fiteny malagasy'].freeze,
105+
mh: ['Marshallese', 'Kajin M̧ajeļ'].freeze,
106+
mi: ['Māori', 'te reo Māori'].freeze,
107+
mk: ['Macedonian', 'македонски јазик'].freeze,
108+
ml: ['Malayalam', 'മലയാളം'].freeze,
109+
mn: ['Mongolian', 'Монгол хэл'].freeze,
110+
mr: ['Marathi', 'मराठी'].freeze,
111+
ms: ['Malay', 'Bahasa Malaysia'].freeze,
112+
mt: ['Maltese', 'Malti'].freeze,
113+
my: ['Burmese', 'ဗမာစာ'].freeze,
114+
na: ['Nauru', 'Ekakairũ Naoero'].freeze,
115+
nb: ['Norwegian Bokmål', 'Norsk bokmål'].freeze,
116+
nd: ['Northern Ndebele', 'isiNdebele'].freeze,
117+
ne: ['Nepali', 'नेपाली'].freeze,
118+
ng: ['Ndonga', 'Owambo'].freeze,
119+
nl: ['Dutch', 'Nederlands'].freeze,
120+
nn: ['Norwegian Nynorsk', 'Norsk nynorsk'].freeze,
121+
no: ['Norwegian', 'Norsk'].freeze,
122+
nr: ['Southern Ndebele', 'isiNdebele'].freeze,
123+
nv: ['Navajo', 'Diné bizaad'].freeze,
124+
ny: ['Chichewa', 'chiCheŵa'].freeze,
125+
oc: ['Occitan', 'occitan'].freeze,
126+
oj: ['Ojibwe', 'ᐊᓂᔑᓈᐯᒧᐎᓐ'].freeze,
127+
om: ['Oromo', 'Afaan Oromoo'].freeze,
128+
or: ['Oriya', 'ଓଡ଼ିଆ'].freeze,
129+
os: ['Ossetian', 'ирон æвзаг'].freeze,
130+
pa: ['Panjabi', 'ਪੰਜਾਬੀ'].freeze,
131+
pi: ['Pāli', 'पाऴि'].freeze,
132+
pl: ['Polish', 'Polski'].freeze,
133+
ps: ['Pashto', 'پښتو'].freeze,
134+
pt: ['Portuguese', 'Português'].freeze,
135+
qu: ['Quechua', 'Runa Simi'].freeze,
136+
rm: ['Romansh', 'rumantsch grischun'].freeze,
137+
rn: ['Kirundi', 'Ikirundi'].freeze,
138+
ro: ['Romanian', 'Română'].freeze,
139+
ru: ['Russian', 'Русский'].freeze,
140+
rw: ['Kinyarwanda', 'Ikinyarwanda'].freeze,
141+
sa: ['Sanskrit', 'संस्कृतम्'].freeze,
142+
sc: ['Sardinian', 'sardu'].freeze,
143+
sd: ['Sindhi', 'सिन्धी'].freeze,
144+
se: ['Northern Sami', 'Davvisámegiella'].freeze,
145+
sg: ['Sango', 'yângâ tî sängö'].freeze,
146+
si: ['Sinhala', 'සිංහල'].freeze,
147+
sk: ['Slovak', 'slovenčina'].freeze,
148+
sl: ['Slovenian', 'slovenščina'].freeze,
149+
sn: ['Shona', 'chiShona'].freeze,
150+
so: ['Somali', 'Soomaaliga'].freeze,
151+
sq: ['Albanian', 'Shqip'].freeze,
152+
sr: ['Serbian', 'српски језик'].freeze,
153+
ss: ['Swati', 'SiSwati'].freeze,
154+
st: ['Southern Sotho', 'Sesotho'].freeze,
155+
su: ['Sundanese', 'Basa Sunda'].freeze,
156+
sv: ['Swedish', 'Svenska'].freeze,
157+
sw: ['Swahili', 'Kiswahili'].freeze,
158+
ta: ['Tamil', 'தமிழ்'].freeze,
159+
te: ['Telugu', 'తెలుగు'].freeze,
160+
tg: ['Tajik', 'тоҷикӣ'].freeze,
161+
th: ['Thai', 'ไทย'].freeze,
162+
ti: ['Tigrinya', 'ትግርኛ'].freeze,
163+
tk: ['Turkmen', 'Türkmen'].freeze,
164+
tl: ['Tagalog', 'Wikang Tagalog'].freeze,
165+
tn: ['Tswana', 'Setswana'].freeze,
166+
to: ['Tonga', 'faka Tonga'].freeze,
167+
tr: ['Turkish', 'Türkçe'].freeze,
168+
ts: ['Tsonga', 'Xitsonga'].freeze,
169+
tt: ['Tatar', 'татар теле'].freeze,
170+
tw: ['Twi', 'Twi'].freeze,
171+
ty: ['Tahitian', 'Reo Tahiti'].freeze,
172+
ug: ['Uyghur', 'ئۇيغۇرچە‎'].freeze,
173+
uk: ['Ukrainian', 'Українська'].freeze,
174+
ur: ['Urdu', 'اردو'].freeze,
175+
uz: ['Uzbek', 'Ўзбек'].freeze,
176+
ve: ['Venda', 'Tshivenḓa'].freeze,
177+
vi: ['Vietnamese', 'Tiếng Việt'].freeze,
178+
vo: ['Volapük', 'Volapük'].freeze,
179+
wa: ['Walloon', 'walon'].freeze,
180+
wo: ['Wolof', 'Wollof'].freeze,
181+
xh: ['Xhosa', 'isiXhosa'].freeze,
182+
yi: ['Yiddish', 'ייִדיש'].freeze,
183+
yo: ['Yoruba', 'Yorùbá'].freeze,
184+
za: ['Zhuang', 'Saɯ cueŋƅ'].freeze,
185+
zh: ['Chinese', '中文'].freeze,
186+
zu: ['Zulu', 'isiZulu'].freeze,
187+
}.freeze
188+
189+
ISO_639_3 = {
190+
ast: ['Asturian', 'Asturianu'].freeze,
191+
kab: ['Kabyle', 'Taqbaylit'].freeze,
192+
kmr: ['Northern Kurdish', 'Kurmancî'].freeze,
193+
zgh: ['Standard Moroccan Tamazight', 'ⵜⴰⵎⴰⵣⵉⵖⵜ'].freeze,
194+
}.freeze
195+
196+
SUPPORTED_LOCALES = {}.merge(ISO_639_1).merge(ISO_639_3).freeze
197+
198+
# For ISO-639-1 and ISO-639-3 language codes, we have their official
199+
# names, but for some translations, we need the names of the
200+
# regional variants specifically
201+
REGIONAL_LOCALE_NAMES = {
20202
'es-AR': 'Español (Argentina)',
21203
'es-MX': 'Español (México)',
22-
es: 'Español',
23-
et: 'Eesti',
24-
eu: 'Euskara',
25-
fa: 'فارسی',
26-
fi: 'Suomi',
27-
fr: 'Français',
28-
ga: 'Gaeilge',
29-
gd: 'Gàidhlig',
30-
gl: 'Galego',
31-
he: 'עברית',
32-
hi: 'हिन्दी',
33-
hr: 'Hrvatski',
34-
hu: 'Magyar',
35-
hy: 'Հայերեն',
36-
id: 'Bahasa Indonesia',
37-
io: 'Ido',
38-
is: 'Íslenska',
39-
it: 'Italiano',
40-
ja: '日本語',
41-
ka: 'ქართული',
42-
kab: 'Taqbaylit',
43-
kk: 'Қазақша',
44-
kmr: 'Kurmancî',
45-
kn: 'ಕನ್ನಡ',
46-
ko: '한국어',
47-
ku: 'سۆرانی',
48-
lt: 'Lietuvių',
49-
lv: 'Latviešu',
50-
mk: 'Македонски',
51-
ml: 'മലയാളം',
52-
mr: 'मराठी',
53-
ms: 'Bahasa Melayu',
54-
nl: 'Nederlands',
55-
nn: 'Nynorsk',
56-
no: 'Norsk',
57-
oc: 'Occitan',
58-
pl: 'Polski',
59204
'pt-BR': 'Português (Brasil)',
60205
'pt-PT': 'Português (Portugal)',
61-
pt: 'Português',
62-
ro: 'Română',
63-
ru: 'Русский',
64-
sa: 'संस्कृतम्',
65-
sc: 'Sardu',
66-
si: 'සිංහල',
67-
sk: 'Slovenčina',
68-
sl: 'Slovenščina',
69-
sq: 'Shqip',
70206
'sr-Latn': 'Srpski (latinica)',
71-
sr: 'Српски',
72-
sv: 'Svenska',
73-
ta: 'தமிழ்',
74-
te: 'తెలుగు',
75-
th: 'ไทย',
76-
tr: 'Türkçe',
77-
uk: 'Українська',
78-
ur: 'اُردُو',
79-
vi: 'Tiếng Việt',
80-
zgh: 'ⵜⴰⵎⴰⵣⵉⵖⵜ',
81207
'zh-CN': '简体中文',
82208
'zh-HK': '繁體中文(香港)',
83209
'zh-TW': '繁體中文(臺灣)',
84-
zh: '中文',
85210
}.freeze
86211

87212
def human_locale(locale)
88213
if locale == 'und'
89214
I18n.t('generic.none')
215+
elsif (supported_locale = SUPPORTED_LOCALES[locale.to_sym])
216+
supported_locale[1]
217+
elsif (regional_locale = REGIONAL_LOCALE_NAMES[locale.to_sym])
218+
regional_locale
90219
else
91-
HUMAN_LOCALES[locale.to_sym] || locale
220+
locale
92221
end
93222
end
223+
224+
def valid_locale_or_nil(str)
225+
return if str.blank?
226+
227+
code, = str.to_s.split(/[_-]/) # Strip out the region from e.g. en_US or ja-JP
228+
229+
return unless valid_locale?(code)
230+
231+
code
232+
end
233+
234+
def valid_locale?(locale)
235+
SUPPORTED_LOCALES.key?(locale.to_sym)
236+
end
94237
end

app/helpers/settings_helper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module SettingsHelper
44
def filterable_languages
5-
LanguageDetector.instance.language_names.select(&LanguagesHelper::HUMAN_LOCALES.method(:key?))
5+
LanguagesHelper::SUPPORTED_LOCALES.keys
66
end
77

88
def hash_to_object(hash)

app/lib/activitypub/activity/create.rb

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def process_status_params
112112
url: @status_parser.url || @status_parser.uri,
113113
account: @account,
114114
text: converted_object_type? ? converted_text : (@status_parser.text || ''),
115-
language: @status_parser.language || detected_language,
115+
language: @status_parser.language,
116116
spoiler_text: converted_object_type? ? '' : (@status_parser.spoiler_text || ''),
117117
created_at: @status_parser.created_at,
118118
edited_at: @status_parser.edited_at,
@@ -370,10 +370,6 @@ def converted_text
370370
Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n"))
371371
end
372372

373-
def detected_language
374-
LanguageDetector.instance.detect(@status_parser.text, @account) if supported_object_type?
375-
end
376-
377373
def unsupported_media_type?(mime_type)
378374
mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type)
379375
end

0 commit comments

Comments
 (0)