Skip to content

Commit 3b22296

Browse files
Gargronhiyuki2578
authored andcommitted
Add tootctl domains crawl (mastodon#9809)
1 parent a866a8c commit 3b22296

3 files changed

Lines changed: 123 additions & 2 deletions

File tree

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,5 @@ group :production do
145145
gem 'lograge', '~> 0.10'
146146
gem 'redis-rails', '~> 5.0'
147147
end
148+
149+
gem 'concurrent-ruby', require: false

Gemfile.lock

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ GEM
231231
fuubar (2.3.2)
232232
rspec-core (~> 3.0)
233233
ruby-progressbar (~> 1.4)
234-
get_process_mem (0.2.2)
234+
get_process_mem (0.2.3)
235235
globalid (0.4.1)
236236
activesupport (>= 4.2.0)
237237
goldfinger (2.1.0)
@@ -674,6 +674,7 @@ DEPENDENCIES
674674
chewy (~> 5.0)
675675
cld3 (~> 3.2.3)
676676
climate_control (~> 0.2)
677+
concurrent-ruby
677678
derailed_benchmarks
678679
devise (~> 4.5)
679680
devise-two-factor (~> 3.0)
@@ -773,4 +774,4 @@ RUBY VERSION
773774
ruby 2.5.3p105
774775

775776
BUNDLED WITH
776-
1.16.6
777+
1.17.3

lib/mastodon/domains_cli.rb

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# frozen_string_literal: true
22

3+
require 'concurrent'
34
require_relative '../../config/boot'
45
require_relative '../../config/environment'
56
require_relative 'cli_helper'
@@ -32,5 +33,122 @@ def purge(domain)
3233
say
3334
say("Removed #{removed} accounts#{dry_run}", :green)
3435
end
36+
37+
option :concurrency, type: :numeric, default: 50, aliases: [:c]
38+
option :silent, type: :boolean, default: false, aliases: [:s]
39+
option :format, type: :string, default: 'summary', aliases: [:f]
40+
desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
41+
long_desc <<-LONG_DESC
42+
Crawl the fediverse by using the Mastodon REST API endpoints that expose
43+
all known peers, and collect statistics from those peers, as long as those
44+
peers support those API endpoints. When no START is given, the command uses
45+
this server's own database of known peers to seed the crawl.
46+
47+
The --concurrency (-c) option controls the number of threads performing HTTP
48+
requests at the same time. More threads means the crawl may complete faster.
49+
50+
The --silent (-s) option controls progress output.
51+
52+
The --format (-f) option controls how the data is displayed at the end. By
53+
default (`summary`), a summary of the statistics is returned. The other options
54+
are `domains`, which returns a newline-delimited list of all discovered peers,
55+
and `json`, which dumps all the aggregated data raw.
56+
LONG_DESC
57+
def crawl(start = nil)
58+
stats = Concurrent::Hash.new
59+
processed = Concurrent::AtomicFixnum.new(0)
60+
failed = Concurrent::AtomicFixnum.new(0)
61+
start_at = Time.now.to_f
62+
seed = start ? [start] : Account.remote.domains
63+
64+
pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
65+
66+
work_unit = ->(domain) do
67+
next if stats.key?(domain)
68+
stats[domain] = nil
69+
processed.increment
70+
71+
begin
72+
Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
73+
next unless res.code == 200
74+
stats[domain] = Oj.load(res.to_s)
75+
end
76+
77+
Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
78+
next unless res.code == 200
79+
80+
Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
81+
pool.post(peer, &work_unit)
82+
end
83+
end
84+
85+
Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
86+
next unless res.code == 200
87+
stats[domain]['activity'] = Oj.load(res.to_s)
88+
end
89+
90+
say('.', :green, false) unless options[:silent]
91+
rescue StandardError
92+
failed.increment
93+
say('.', :red, false) unless options[:silent]
94+
end
95+
end
96+
97+
seed.each do |domain|
98+
pool.post(domain, &work_unit)
99+
end
100+
101+
sleep 20
102+
sleep 20 until pool.queue_length.zero?
103+
104+
pool.shutdown
105+
pool.wait_for_termination(20)
106+
ensure
107+
pool.shutdown
108+
109+
say unless options[:silent]
110+
111+
case options[:format]
112+
when 'summary'
113+
stats_to_summary(stats, processed, failed, start_at)
114+
when 'domains'
115+
stats_to_domains(stats)
116+
when 'json'
117+
stats_to_json(stats)
118+
end
119+
end
120+
121+
private
122+
123+
def stats_to_summary(stats, processed, failed, start_at)
124+
stats.compact!
125+
126+
total_domains = stats.size
127+
total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
128+
total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
129+
total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
130+
131+
say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
132+
say("Total servers: #{total_domains}", :green)
133+
say("Total registered: #{total_users}", :green)
134+
say("Total active last week: #{total_active}", :green)
135+
say("Total joined last week: #{total_joined}", :green)
136+
end
137+
138+
def stats_to_domains(stats)
139+
say(stats.keys.join("\n"))
140+
end
141+
142+
def stats_to_json(stats)
143+
totals.each_key do |domain|
144+
if totals[domain].is_a?(Hash)
145+
totals[domain]['activity'] = stats[domain]
146+
else
147+
totals.delete(domain)
148+
end
149+
end
150+
151+
say(Oj.dump(totals))
152+
end
35153
end
36154
end

0 commit comments

Comments
 (0)