Skip to content

Commit 5bfe1e1

Browse files
authored
Change language detection to include hashtags as words (#11341)
1 parent 3a6fe65 commit 5bfe1e1

2 files changed

Lines changed: 4 additions & 4 deletions

File tree

app/lib/language_detector.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def simplify_text(text)
6969
new_text = remove_html(text)
7070
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
7171
new_text.gsub!(Account::MENTION_RE, '')
72-
new_text.gsub!(Tag::HASHTAG_RE, '')
72+
new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
7373
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
7474
new_text.gsub!(/\s+/, ' ')
7575
new_text

spec/lib/language_detector_spec.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@
3232
expect(result).to eq 'Our website is and also'
3333
end
3434

35-
it 'strips #hashtags from strings before detection' do
36-
string = 'Hey look at all the #animals and #fish'
35+
it 'converts #hashtags back to normal text before detection' do
36+
string = 'Hey look at all the #animals and #FishAndChips'
3737

3838
result = described_class.instance.send(:prepare_text, string)
39-
expect(result).to eq 'Hey look at all the and'
39+
expect(result).to eq 'Hey look at all the animals and fish and chips'
4040
end
4141
end
4242

0 commit comments

Comments
 (0)