@@ -13,12 +13,12 @@ class FetchLinkCardService < BaseService
1313 }iox
1414
1515 def call ( status )
16- @status = status
17- @url = parse_urls
16+ @status = status
17+ @original_url = parse_urls
1818
19- return if @url . nil? || @status . preview_cards . any?
19+ return if @original_url . nil? || @status . preview_cards . any?
2020
21- @url = @url . to_s
21+ @url = @original_url . to_s
2222
2323 RedisLock . acquire ( lock_options ) do |lock |
2424 if lock . acquired?
@@ -31,7 +31,7 @@ def call(status)
3131
3232 attach_card if @card &.persisted?
3333 rescue HTTP ::Error , OpenSSL ::SSL ::SSLError , Addressable ::URI ::InvalidURIError , Mastodon ::HostValidationError , Mastodon ::LengthValidationError => e
34- Rails . logger . debug "Error fetching link #{ @url } : #{ e } "
34+ Rails . logger . debug "Error fetching link #{ @original_url } : #{ e } "
3535 nil
3636 end
3737
@@ -47,6 +47,12 @@ def html
4747 return @html if defined? ( @html )
4848
4949 Request . new ( :get , @url ) . add_headers ( 'Accept' => 'text/html' , 'User-Agent' => Mastodon ::Version . user_agent + ' Bot' ) . perform do |res |
50+ # We follow redirects, and ideally we want to save the preview card for
51+ # the destination URL and not any link shortener in-between, so here
52+ # we set the URL to the one of the last response in the redirect chain
53+ @url = res . request . uri . to_s . to_s
54+ @card = PreviewCard . find_or_initialize_by ( url : @url ) if @card . url != @url
55+
5056 if res . code == 200 && res . mime_type == 'text/html'
5157 @html_charset = res . charset
5258 @html = res . body_with_limit
@@ -63,12 +69,15 @@ def attach_card
6369 end
6470
6571 def parse_urls
66- if @status . local?
67- urls = @status . text . scan ( URL_PATTERN ) . map { |array | Addressable ::URI . parse ( array [ 1 ] ) . normalize }
68- else
69- html = Nokogiri ::HTML ( @status . text )
70- links = html . css ( 'a' )
71- urls = links . filter_map { |a | Addressable ::URI . parse ( a [ 'href' ] ) unless skip_link? ( a ) } . filter_map ( &:normalize )
72+ urls = begin
73+ if @status . local?
74+ @status . text . scan ( URL_PATTERN ) . map { |array | Addressable ::URI . parse ( array [ 1 ] ) . normalize }
75+ else
76+ document = Nokogiri ::HTML ( @status . text )
77+ links = document . css ( 'a' )
78+
79+ links . filter_map { |a | Addressable ::URI . parse ( a [ 'href' ] ) unless skip_link? ( a ) } . filter_map ( &:normalize )
80+ end
7281 end
7382
7483 urls . reject { |uri | bad_url? ( uri ) } . first
@@ -79,18 +88,16 @@ def bad_url?(uri)
7988 uri . host . blank? || TagManager . instance . local_url? ( uri . to_s ) || !%w( http https ) . include? ( uri . scheme )
8089 end
8190
82- # rubocop:disable Naming/MethodParameterName
83- def mention_link? ( a )
91+ def mention_link? ( anchor )
8492 @status . mentions . any? do |mention |
85- a [ 'href' ] == ActivityPub ::TagManager . instance . url_for ( mention . account )
93+ anchor [ 'href' ] == ActivityPub ::TagManager . instance . url_for ( mention . account )
8694 end
8795 end
8896
89- def skip_link? ( a )
97+ def skip_link? ( anchor )
9098 # Avoid links for hashtags and mentions (microformats)
91- a [ 'rel' ] &.include? ( 'tag' ) || a [ 'class' ] &.match? ( /u-url|h-card/ ) || mention_link? ( a )
99+ anchor [ 'rel' ] &.include? ( 'tag' ) || anchor [ 'class' ] &.match? ( /u-url|h-card/ ) || mention_link? ( anchor )
92100 end
93- # rubocop:enable Naming/MethodParameterName
94101
95102 def attempt_oembed
96103 service = FetchOEmbedService . new
@@ -139,42 +146,14 @@ def attempt_oembed
139146 def attempt_opengraph
140147 return if html . nil?
141148
142- detector = CharlockHolmes ::EncodingDetector . new
143- detector . strip_tags = true
144-
145- guess = detector . detect ( @html , @html_charset )
146- encoding = guess &.fetch ( :confidence , 0 ) . to_i > 60 ? guess &.fetch ( :encoding , nil ) : nil
147- page = Nokogiri ::HTML ( @html , nil , encoding )
148- player_url = meta_property ( page , 'twitter:player' )
149-
150- if player_url && !bad_url? ( Addressable ::URI . parse ( player_url ) )
151- @card . type = :video
152- @card . width = meta_property ( page , 'twitter:player:width' ) || 0
153- @card . height = meta_property ( page , 'twitter:player:height' ) || 0
154- @card . html = content_tag ( :iframe , nil , src : player_url ,
155- width : @card . width ,
156- height : @card . height ,
157- allowtransparency : 'true' ,
158- scrolling : 'no' ,
159- frameborder : '0' )
160- else
161- @card . type = :link
162- end
163-
164- @card . title = meta_property ( page , 'og:title' ) . presence || page . at_xpath ( '//title' ) &.content || ''
165- @card . description = meta_property ( page , 'og:description' ) . presence || meta_property ( page , 'description' ) || ''
166- @card . image_remote_url = ( Addressable ::URI . parse ( @url ) + meta_property ( page , 'og:image' ) ) . to_s if meta_property ( page , 'og:image' )
167-
168- return if @card . title . blank? && @card . html . blank?
169-
170- @card . save_with_optional_image!
171- end
149+ link_details_extractor = LinkDetailsExtractor . new ( @url , @html , @html_charset )
172150
173- def meta_property ( page , property )
174- page . at_xpath ( "//meta[contains(concat(' ', normalize-space(@property), ' '), ' #{ property } ')]" ) &.attribute ( 'content' ) &.value || page . at_xpath ( "//meta[@name=\" #{ property } \" ]" ) &.attribute ( 'content' ) &.value
151+ @card = PreviewCard . find_or_initialize_by ( url : link_details_extractor . canonical_url ) if link_details_extractor . canonical_url != @card . url
152+ @card . assign_attributes ( link_details_extractor . to_preview_card_attributes )
153+ @card . save_with_optional_image! unless @card . title . blank? && @card . html . blank?
175154 end
176155
177156 def lock_options
178- { redis : Redis . current , key : "fetch:#{ @url } " , autorelease : 15 . minutes . seconds }
157+ { redis : Redis . current , key : "fetch:#{ @original_url } " , autorelease : 15 . minutes . seconds }
179158 end
180159end
0 commit comments