Skip to content

Commit 4e5f43c

Browse files
Gargronhiyuki2578
authored andcommitted
Fix tag normalization and migration not removing duplicate tags (mastodon#11441)
Fix mastodon#11428
1 parent 8060a75 commit 4e5f43c

3 files changed

Lines changed: 51 additions & 4 deletions

File tree

app/models/tag.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def history
6565

6666
class << self
6767
def find_or_create_by_names(name_or_names)
68-
Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name|
68+
Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name|
6969
tag = matching_name(normalized_name).first || create(name: normalized_name)
7070

7171
yield tag if block_given?
@@ -77,7 +77,7 @@ def find_or_create_by_names(name_or_names)
7777
def search_for(term, limit = 5, offset = 0)
7878
pattern = sanitize_sql_like(normalize(term.strip)) + '%'
7979

80-
Tag.where(arel_table[:name].lower.matches(pattern.downcase))
80+
Tag.where(arel_table[:name].lower.matches(pattern.mb_chars.downcase.to_s))
8181
.order(:name)
8282
.limit(limit)
8383
.offset(offset)
@@ -92,7 +92,7 @@ def find_normalized!(name)
9292
end
9393

9494
def matching_name(name_or_names)
95-
names = Array(name_or_names).map { |name| normalize(name).downcase }
95+
names = Array(name_or_names).map { |name| normalize(name).mb_chars.downcase.to_s }
9696

9797
if names.size == 1
9898
where(arel_table[:name].lower.eq(names.first))
@@ -104,7 +104,7 @@ def matching_name(name_or_names)
104104
private
105105

106106
def normalize(str)
107-
str.gsub(/\A#/, '').mb_chars.to_s
107+
str.gsub(/\A#/, '')
108108
end
109109
end
110110

db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@ class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2]
22
disable_ddl_transaction!
33

44
def up
5+
Tag.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM tags GROUP BY lower(name) HAVING count(*) > 1').to_hash.each do |row|
6+
canonical_tag_id = row['ids'].split(',').first
7+
redundant_tag_ids = row['ids'].split(',')[1..-1]
8+
9+
safety_assured do
10+
execute "UPDATE accounts_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM accounts_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
11+
execute "UPDATE statuses_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM statuses_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.status_id = t0.status_id)"
12+
execute "UPDATE featured_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM featured_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
13+
end
14+
15+
Tag.where(id: redundant_tag_ids).in_batches.delete_all
16+
end
17+
518
safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' }
619
remove_index :tags, name: 'index_tags_on_name'
720
remove_index :tags, name: 'hashtag_search_index'

spec/models/tag_spec.rb

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,40 @@
8282
end
8383
end
8484

85+
describe '.find_normalized' do
86+
it 'returns tag for a multibyte case-insensitive name' do
87+
upcase_string = 'abcABCabcABCやゆよ'
88+
downcase_string = 'abcabcabcabcやゆよ';
89+
90+
tag = Fabricate(:tag, name: downcase_string)
91+
expect(Tag.find_normalized(upcase_string)).to eq tag
92+
end
93+
end
94+
95+
describe '.matching_name' do
96+
it 'returns tags for multibyte case-insensitive names' do
97+
upcase_string = 'abcABCabcABCやゆよ'
98+
downcase_string = 'abcabcabcabcやゆよ';
99+
100+
tag = Fabricate(:tag, name: downcase_string)
101+
expect(Tag.matching_name(upcase_string)).to eq [tag]
102+
end
103+
end
104+
105+
describe '.find_or_create_by_names' do
106+
it 'runs a passed block once per tag regardless of duplicates' do
107+
upcase_string = 'abcABCabcABCやゆよ'
108+
downcase_string = 'abcabcabcabcやゆよ';
109+
count = 0
110+
111+
Tag.find_or_create_by_names([upcase_string, downcase_string]) do |tag|
112+
count += 1
113+
end
114+
115+
expect(count).to eq 1
116+
end
117+
end
118+
85119
describe '.search_for' do
86120
it 'finds tag records with matching names' do
87121
tag = Fabricate(:tag, name: "match")

0 commit comments

Comments
 (0)