From c4600411f73138489e3c3870de1693a297f4fe46 Mon Sep 17 00:00:00 2001 From: multiple creatures Date: Sat, 20 Jul 2019 23:19:26 -0500 Subject: [PATCH] fix anchor tagger filename matching --- app/lib/sanitize_config.rb | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index cd2299e8b..697d88fd3 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -43,30 +43,28 @@ class Sanitize text = node.text.strip return if href == text - # try to detect filenames - href_filename = href.rpartition('/')[-1] - url_filename = text.rpartition('/')[-1] - - unless href_filename.blank? - if url_filename == href_filename - node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}" - return - end - - # many fedi servers obfuscate media filenames - ext = url_filename.rpartition('.')[-1] - if ext.downcase.in?(MEDIA_EXTENSIONS) && ext == href_filename.rpartition('.')[-1] - node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}" - return - end - end - # strip ellipse & replace keyword search obscuring text = text.sub(/ *(?:\u2026|\.\.\.)\Z/, '').gsub(/ dot /i, '.').gsub(/[\u200b-\u200d\ufeff\u200e\u200f]/, '') # href now matches text without obscuring? return if href == text + # try to detect filenames + href_filename = '/'.in?(href) ? href.rpartition('/')[2] : nil + unless href_filename.blank? + if text == href_filename + node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}" + return + end + + # many fedi servers obfuscate media filenames + ext = text.rpartition('.')[-1] + if ext.downcase.in?(MEDIA_EXTENSIONS) && ext == href_filename.rpartition('.')[2] + node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}" + return + end + end + # grab first url from link text first_url = text.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*\S*/).first