mastodon/app/workers/scheduler/janitor_scheduler.rb

220 lines
6.8 KiB
Ruby

# frozen_string_literal: true
class Scheduler::JanitorScheduler
include Sidekiq::Worker
include BlocklistHelper
include ModerationHelper
include Redisable
MIN_POSTS = 6
sidekiq_options unique: :until_executed, retry: 0
def perform
@account = janitor_account
return if @account.nil?
@exclude_ids = excluded_account_ids.uniq
@exclude_domains = excluded_domains.uniq
@exclude_markov = excluded_accounts_from_env('MARKOV').uniq
prune_deleted_accounts!
suspend_abandoned_accounts!
suspend_spammers!
silence_markov!
import_blocklists!
export_suspensions!
export_activityrelay_config!
prune_database! unless redis.exists('janitor:pune_database')
end
private
def prune_deleted_accounts!
Account.local.where.not(suspended_at: nil).destroy_all
end
def prune_database!
suspended_accounts = Account.where.not(suspended_at: nil).select(:id)
suspended_domains = DomainBlock.suspend.select(:domain)
# remove statuses from suspended accounts missed by SuspendStatusService
# if its sidekiq job crashed
Status.where(account_id: suspended_accounts).in_batches do |status|
BatchedRemoveStatusService.new.call(status)
end
# prune leaves of threads that lost their context after a suspension
# keeping these around eats a pretty good amount of storage
deleted_mentions = Mention.where(account_id: suspended_accounts).select(:status_id)
Status.remote.where(account_id: deleted_mentions).in_batches do |status|
BatchedRemoveStatusService.new.call(status)
end
# remove mention entries that have no status or account attached to them
Mention.where(account_id: nil).in_batches.destroy_all
Mention.where(status_id: nil).in_batches.destroy_all
# remove media attachments that don't belong to any status
MediaAttachment.where(status_id: nil).in_batches.destroy_all
# remove custom emoji from suspended domains missed by SuspendAccountService
CustomEmoji.where(domain: suspended_domains).in_batches.destroy_all
# prune empty tags
Tag.all.find_each { |tag| tag.destroy unless tag.statuses.exists? }
# remove audit log entries with missing context
# we already use LOG_USER to avoid that problem
Admin::ActionLog.where.not(target_id: Account.select(:id)).in_batches.destroy_all
Admin::ActionLog.where.not(account_id: Account.local.select(:id)).in_batches.destroy_all
redis.setex('janitor:prune_database', 1.day, 1)
end
def suspend_abandoned_accounts!
reason = "Appears to be abandoned. Freeing up the username for someone else."
abandoned_accounts.find_each do |account|
account_policy(account.username, nil, :suspend, reason)
end
end
def suspend_spammers!
reason = 'Appears to be a spammer account.'
spammer_accounts.find_each do |spammer|
account_policy(spammer.username, spammer.domain, :suspend, reason)
end
end
def silence_markov!
reason = 'Appears to be a markov bot.'
markov_accounts.find_each do |markov|
account_policy(markov.username, markov.domain, :silence, reason)
end
end
def import_blocklists!
blocks = merged_blocklist.reject { |entry| entry[:domain].in?(@exclude_domains) }
blocks.each do |entry|
next unless domain_exists?(entry[:domain])
block = DomainBlock.create!(entry)
DomainBlockWorker.perform_async(block)
Admin::ActionLog.create(account: @account, action: :create, target: block)
user_friendly_action_log(@account, :create, block)
end
end
def export_suspensions!
outfile = ENV.fetch('JANITOR_BLOCKLIST_OUTPUT', '')
return if outfile.blank?
return unless File.file?(outfile)
File.open(outfile, 'w:UTF-8') do |file|
file.puts(DomainBlock.suspend.pluck(:domain))
end
end
def export_activityrelay_config!
outfile = ENV.fetch('ACTIVITYRELAY_OUTPUT', '')
return if outfile.blank?
return unless File.file?(outfile)
File.open(outfile, 'w:UTF-8') do |file|
formatted_allowlist = allowed_domains.uniq.map { |d| " - '#{d}'" }
formatted_blocklist = DomainBlock.suspend.pluck(:domain).map { |d| " - '#{d}'" }
file.puts('db: relay.jsonld')
file.puts("listen: #{ENV.fetch('ACTIVITYRELAY_LISTEN', '127.0.0.1')}")
file.puts("port: #{ENV.fetch('ACTIVITYRELAY_PORT', '9001')}")
file.puts("note: \"#{ENV.fetch('ACTIVITYRELAY_NOTE', "#{Setting.site_title} relay")}\"")
file.puts('ap:')
file.puts(" host: '#{ENV.fetch('ACTIVITYRELAY_HOST', "relay.#{Rails.configuration.x.local_domain}")}'")
file.puts(' blocked_instances:')
file.puts(formatted_blocklist)
file.puts(" whitelist_enabled: #{ENV.fetch('ACTIVITYRELAY_ALLOWLIST', 'true')}")
file.puts(' whitelist:')
file.puts(formatted_allowlist)
end
end
def spammer_accounts
spammer_ids = spammer_account_ids
Account.reorder(nil).where(id: spammer_ids, suspended_at: nil)
.where.not(id: @exclude_ids)
end
def markov_accounts
Account.reorder(nil).where(silenced_at: nil).where.not(id: @exclude_markov)
.where('username LIKE ? OR note ILIKE ?', '%ebooks%', '%markov%')
end
def abandoned_accounts
Account.reorder(nil).where(id: abandoned_account_ids)
end
def abandoned_users
User.select(:account_id).where('last_sign_in_at < ?', 3.months.ago)
end
def excluded_domains
existing_policy_domains | allowed_domains
end
def allowed_domains
domains_from_account_ids | excluded_from_env('DOMAINS')
end
def abandoned_account_ids
AccountStat.select(:account_id)
.where(account_id: abandoned_users)
.where('statuses_count < ?', MIN_POSTS)
end
def excluded_account_ids
local_account_ids | outgoing_follow_ids | excluded_accounts_from_env('USERNAMES')
end
def spammer_account_ids
post_spammer_ids | card_spammer_ids
end
def existing_policy_domains
DomainBlock.all.pluck(:domain)
end
def domains_from_account_ids
Account.reorder(nil).where(id: @account_ids).pluck(:domain).uniq
end
def local_account_ids
Account.local.reorder(nil).pluck(:id)
end
def outgoing_follow_ids
Account.local.reorder(nil).flat_map { |account| account.following_ids }
end
def post_spammer_ids
Status.with_public_visibility
.reorder(nil)
.where('tsv @@ to_tsquery(?)', 'womenarestupid.site & /blog/:*')
.pluck(:account_id)
end
def card_spammer_ids
PreviewCard.where('url LIKE ? OR title ILIKE ?', '%womenarestupid%', '%womenaredumb%')
.reorder(nil)
.flat_map { |card| card.statuses.pluck(:account_id) }
end
def excluded_accounts_from_env(suffix)
excluded_usernames = ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
Account.reorder(nil).where(username: excluded_usernames).pluck(:id)
end
def excluded_from_env(suffix)
ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
end
end