fix anchor tagger filename matching

2019-07-20 23:19:26 -05:00 · 2019-07-20 23:19:26 -05:00 · c4600411f7
parent 19fc6952b2
commit c4600411f7
1 changed files with 16 additions and 18 deletions
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@ -43,30 +43,28 @@ class Sanitize
      text = node.text.strip
      return if href == text
      # try to detect filenames
      href_filename = href.rpartition('/')[-1]
      url_filename = text.rpartition('/')[-1]
      unless href_filename.blank?
        if url_filename == href_filename
          node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}"
          return
        end
        # many fedi servers obfuscate media filenames
        ext = url_filename.rpartition('.')[-1]
        if ext.downcase.in?(MEDIA_EXTENSIONS) && ext == href_filename.rpartition('.')[-1]
          node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}"
          return
        end
      end
      # strip ellipse & replace keyword search obscuring
      text = text.sub(/ *(?:\u2026|\.\.\.)\Z/, '').gsub(/ dot /i, '.').gsub(/[\u200b-\u200d\ufeff\u200e\u200f]/, '')
      # href now matches text without obscuring?
      return if href == text
      # try to detect filenames
      href_filename = '/'.in?(href) ? href.rpartition('/')[2] : nil
      unless href_filename.blank?
        if text == href_filename
          node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}"
          return
        end
        # many fedi servers obfuscate media filenames
        ext = text.rpartition('.')[-1]
        if ext.downcase.in?(MEDIA_EXTENSIONS) && ext == href_filename.rpartition('.')[2]
          node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}"
          return
        end
      end
      # grab first url from link text
      first_url = text.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*\S*/).first