Skip to content

Commit b6f9a02

Browse files
author
masanbol
committed
Revert "Allow most kinds of characters in URL query (fixes mastodon#8408) (mastodon#8447)"
This reverts commit 6a5e3da.
1 parent e168d19 commit b6f9a02

2 files changed

Lines changed: 4 additions & 67 deletions

File tree

app/lib/formatter.rb

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def encode(html)
9999
end
100100

101101
def encode_and_link_urls(html, accounts = nil, options = {})
102-
entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
102+
entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false)
103103

104104
if accounts.is_a?(Hash)
105105
options = accounts
@@ -199,43 +199,6 @@ def rewrite(text, entities)
199199
result.flatten.join
200200
end
201201

202-
def utf8_friendly_extractor(text, options = {})
203-
old_to_new_index = [0]
204-
205-
escaped = text.chars.map do |c|
206-
output = c.ord.to_s(16).length > 2 ? CGI.escape(c) : c
207-
old_to_new_index << old_to_new_index.last + output.length
208-
output
209-
end.join
210-
211-
# Note: I couldn't obtain list_slug with @user/list-name format
212-
# for mention so this requires additional check
213-
special = Extractor.extract_entities_with_indices(escaped, options).map do |extract|
214-
# exactly one of :url, :hashtag, :screen_name, :cashtag keys is present
215-
key = (extract.keys & [:url, :hashtag, :screen_name, :cashtag]).first
216-
217-
new_indices = [
218-
old_to_new_index.find_index(extract[:indices].first),
219-
old_to_new_index.find_index(extract[:indices].last),
220-
]
221-
222-
has_prefix_char = [:hashtag, :screen_name, :cashtag].include?(key)
223-
value_indices = [
224-
new_indices.first + (has_prefix_char ? 1 : 0), # account for #, @ or $
225-
new_indices.last - 1,
226-
]
227-
228-
next extract.merge(
229-
:indices => new_indices,
230-
key => text[value_indices.first..value_indices.last]
231-
)
232-
end
233-
234-
standard = Extractor.extract_entities_with_indices(text, options)
235-
236-
Extractor.remove_overlapping_entities(special + standard)
237-
end
238-
239202
def link_to_url(entity, options = {})
240203
url = Addressable::URI.parse(entity[:url])
241204
html_attrs = { target: '_blank', rel: 'nofollow noopener' }

spec/lib/formatter_spec.rb

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -74,36 +74,10 @@
7474
end
7575

7676
context 'given a URL with a query string' do
77-
context 'with escaped unicode character' do
78-
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
77+
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
7978

80-
it 'matches the full URL' do
81-
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
82-
end
83-
end
84-
85-
context 'with unicode character' do
86-
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }
87-
88-
it 'matches the full URL' do
89-
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&amp;q=autolink"'
90-
end
91-
end
92-
93-
context 'with unicode character at the end' do
94-
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }
95-
96-
it 'matches the full URL' do
97-
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
98-
end
99-
end
100-
101-
context 'with escaped and not escaped unicode characters' do
102-
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }
103-
104-
it 'preserves escaped unicode characters' do
105-
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;utf81=✓&amp;q=autolink"'
106-
end
79+
it 'matches the full URL' do
80+
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&amp;q=autolink"'
10781
end
10882
end
10983

0 commit comments

Comments
 (0)