`monsterpit-janitor` is now built in as a sidekiq job (with better code)

staging
multiple creatures 2019-08-03 05:32:49 -05:00
parent 99d1b1ff6f
commit 6613005ae6
3 changed files with 202 additions and 0 deletions

View File

@ -0,0 +1,51 @@
module BlocklistHelper
def merged_blocklist
# ordered by preference
# prefer vulpine b/c they have easy-to-parse reason text
blocklist = vulpine_club_blocks | dialup_express_blocks | ten_forward_blocks
blocklist.uniq { |entry| entry[:domain] }
end
def dialup_express_blocks
admin_id = Account.find_remote('xenon', 'sleeping.town')&.id
return [] if admin_id.nil?
domains = ActiveRecord::Base.connection.select_values("SELECT unnest(regexp_matches(text, '\\m[\\w\\-]+\\.[\\w\-]+(?:\\.[\\w\\-]+)*', 'g')) FROM statuses WHERE account_id = #{admin_id.to_i} AND NOT reply AND created_at >= (NOW() - INTERVAL '2 days') AND tsv @@ to_tsquery('new <-> dialup <-> express <2> block') EXCEPT SELECT domain FROM domain_blocks")
domains.map! do |domain|
{domain: domain, severity: :suspend, reason: '(imported from dialup.express)'}
end
end
def ten_forward_blocks
admin_id = Account.find_remote('guinan', 'tenforward.social')&.id
return [] if admin_id.nil?
domains += ActiveRecord::Base.connection.select_values("SELECT unnest(regexp_matches(text, '\\m[\\w\\-]+\\.[\\w\-]+(?:\\.[\\w\\-]+)*', 'g')) FROM statuses WHERE account_id = #{admin_id.to_i} AND NOT reply AND created_at >= (NOW() - INTERVAL '2 days') AND tsv @@ to_tsquery('ten <-> forward <-> moderation <-> announcement') EXCEPT SELECT domain FROM domain_blocks")
domains.map! do |domain|
{domain: domain, severity: :suspend, reason: '(imported from ten.forward)'}
end
end
def vulpine_club_blocks
url = "https://raw.githubusercontent.com/vulpineclub/vulpineclub.github.io/master/_data/blocks.yml"
body = Request.new(:get, url).perform do |response|
response.code != 200 ? nil : response.body_with_limit(66.kilobytes)
end
return [] unless body.present?
yaml = YAML::load(body)
yaml.map! do |entry|
domain = entry['domain']
next if domain.blank?
severity = entry['severity'].split('/')
reject_media = 'nomedia'.in?(severity)
severity = severity[0] || 'noop'
reason = "(imported from vulpine.club) #{entry['reason']}#{entry['link'].present? ? " (#{entry['link']})" : ''}".rstrip
{domain: domain, severity: severity.to_sym, reject_media: reject_media, reason: reason}
end
end
end

View File

@ -0,0 +1,148 @@
# frozen_string_literal: true
class Scheduler::JanitorScheduler
include Sidekiq::Worker
include BlocklistHelper
include BangtagHelper
MIN_POSTS = 6
sidekiq_options unique: :until_executed, retry: 0
def perform
@account = admin_account
return if @account.nil?
@exclude_ids = excluded_account_ids
@exclude_domains = excluded_domains
@exclude_markov = excluded_accounts_from_env('MARKOV')
prune_deleted_accounts!
suspend_abandoned_accounts!
suspend_spammers!
silence_markov!
import_blocklists!
end
private
def prune_deleted_accounts!
Account.local.where.not(suspended_at: nil).destroy_all
end
def suspend_abandoned_accounts!
reason = "Appears to be abandoned. Freeing up the username for someone else."
abandoned_accounts.find_each do |account|
account_policy(account.username, nil, :suspend, reason)
end
end
def suspend_spammers!
reason = 'Appears to be a spammer account.'
spammer_accounts.find_each do |spammer|
account_policy(spammer.username, spammer.domain, :suspend, reason)
end
end
def silence_markov!
reason = 'Appears to be a markov bot.'
markov_accounts.find_each do |markov|
account_policy(markov.username, markov.domain, :silence, reason)
end
end
def import_blocklists!
blocks = merged_blocklist.reject { |entry| entry[:domain].in?(@exclude_domains) }
blocks.each do |entry|
block = DomainBlock.create!(entry)
DomainBlockWorker.perform_async(block)
Admin::ActionLog.create(account: @account, action: :create, target: block)
user_friendly_action_log(@account, :create, block)
end
end
def admin_account
account_id = ENV.fetch('JANITOR_USER', '').to_i
return if account_id == 0
Account.find_by(id: account_id)
end
def spammer_accounts
spammer_ids = spammer_account_ids
Account.reorder(nil).where(id: spammer_ids, suspended_at: nil)
.where.not(id: @exclude_ids)
end
def markov_accounts
Account.reorder(nil).where(silenced_at: nil).where.not(id: @exclude_markov)
.where('username LIKE ? OR note ILIKE ?', '%ebooks%', '%markov%')
end
def abandoned_accounts
Account.reorder(nil).where(id: abandoned_account_ids)
end
def abandoned_users
User.select(:account_id).where('last_sign_in_at < ?', 3.months.ago)
end
def excluded_domains
existing_policy_domains | domains_from_account_ids | excluded_from_env('DOMAINS')
end
def abandoned_account_ids
AccountStat.select(:account_id)
.where(account_id: abandoned_users)
.where('statuses_count < ?', MIN_POSTS)
end
def excluded_account_ids
local_account_ids | outgoing_follow_ids | excluded_accounts_from_env('USERNAMES')
end
def spammer_account_ids
post_spammer_ids | card_spammer_ids
end
def existing_policy_domains
DomainBlock.all.pluck(:domain)
end
def domains_from_account_ids
Account.reorder(nil).where(id: @account_ids).pluck(:domain).uniq
end
def local_account_ids
Account.local.reorder(nil).pluck(:id)
end
def outgoing_follow_ids
Account.local.reorder(nil).flat_map { |account| account.following_ids }
end
def post_spammer_ids
Status.with_public_visibility
.reorder(nil)
.where('tsv @@ to_tsquery(?)', 'womenarestupid.site & /blog/:*')
.pluck(:account_id)
end
def card_spammer_ids
PreviewCard.where('url LIKE ? OR title ILIKE ?', '%womenarestupid%', '%womenaredumb%')
.reorder(nil)
.flat_map { |card| card.statuses.pluck(:account_id) }
end
def excluded_accounts_from_env(suffix)
excluded_usernames = ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
Account.reorder(nil).where(username: excluded_usernames).pluck(:id)
end
def excluded_from_env(suffix)
ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
end
end

View File

@ -12,6 +12,9 @@
destructing_statuses_scheduler:
every: '1m'
class: Scheduler::DestructingStatusesScheduler
janitor_scheduler:
every: '1h'
class: Scheduler::JanitorScheduler
media_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::MediaCleanupScheduler