Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion app/lib/formatter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def encode(html)
end

def encode_and_link_urls(html, accounts = nil, options = {})
entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false)
entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)

if accounts.is_a?(Hash)
options = accounts
Expand Down Expand Up @@ -199,6 +199,43 @@ def rewrite(text, entities)
result.flatten.join
end

def utf8_friendly_extractor(text, options = {})
old_to_new_index = [0]

escaped = text.chars.map do |c|
output = c.ord.to_s(16).length > 2 ? CGI.escape(c) : c
old_to_new_index << old_to_new_index.last + output.length
output
end.join

# Note: I couldn't obtain list_slug with @user/list-name format
# for mention so this requires additional check
special = Extractor.extract_entities_with_indices(escaped, options).map do |extract|
# exactly one of :url, :hashtag, :screen_name, :cashtag keys is present
key = (extract.keys & [:url, :hashtag, :screen_name, :cashtag]).first

new_indices = [
old_to_new_index.find_index(extract[:indices].first),
old_to_new_index.find_index(extract[:indices].last),
]

has_prefix_char = [:hashtag, :screen_name, :cashtag].include?(key)
value_indices = [
new_indices.first + (has_prefix_char ? 1 : 0), # account for #, @ or $
new_indices.last - 1,
]

next extract.merge(
:indices => new_indices,
key => text[value_indices.first..value_indices.last]
)
end

standard = Extractor.extract_entities_with_indices(text, options)

Extractor.remove_overlapping_entities(special + standard)
end

def link_to_url(entity, options = {})
url = Addressable::URI.parse(entity[:url])
html_attrs = { target: '_blank', rel: 'nofollow noopener' }
Expand Down
32 changes: 29 additions & 3 deletions spec/lib/formatter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,36 @@
end

context 'given a URL with a query string' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
context 'with escaped unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }

it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
end
end

context 'with unicode character' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }

it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&amp;q=autolink"'
end
end

context 'with unicode character at the end' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }

it 'matches the full URL' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
end
end

context 'with escaped and not escaped unicode characters' do
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }

it 'preserves escaped unicode characters' do
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;utf81=✓&amp;q=autolink"'
end
end
end

Expand Down