From bc22ab034bb9bbb48fb2cc93c1a617cbcd14ff9d Mon Sep 17 00:00:00 2001 From: multiple creatures Date: Fri, 19 Jul 2019 10:16:33 -0500 Subject: [PATCH] move query string sanitizer to its own module & sanitize link cards as well --- app/helpers/url_helper.rb | 37 +++++++++++++++++++++++++ app/lib/sanitize_config.rb | 33 +++------------------- app/services/fetch_link_card_service.rb | 5 +++- 3 files changed, 45 insertions(+), 30 deletions(-) create mode 100644 app/helpers/url_helper.rb diff --git a/app/helpers/url_helper.rb b/app/helpers/url_helper.rb new file mode 100644 index 000000000..dd1cf264c --- /dev/null +++ b/app/helpers/url_helper.rb @@ -0,0 +1,37 @@ +module UrlHelper + def sanitize_query_string(url) + return if url.blank? + url = Addressable::URI.parse(url) + return url.to_s if url.query.blank? + params = CGI.parse(url.query) + params.delete_if do |key| + k = key.downcase + next true if k.start_with?( + '_hs', + 'ic', + 'mc_', + 'mkt_', + 'ns_', + 'sr_', + 'utm', + 'vero_', + 'nr_', + 'ref', + ) + next true if 'track'.in?(k) + next true if [ + 'fbclid', + 'gclid', + 'ncid', + 'ocid', + 'r', + 'spm', + ].include?(k) + false + end + url.query = URI.encode_www_form(params) + return url.to_s + rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError + return '#' + end +end diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index c3b0504e4..38dcd87b7 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class Sanitize + extend UrlHelper + module Config HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze @@ -91,35 +93,8 @@ class Sanitize node = env[:node] ['href', 'src', 'cite'].each do |attr| next if node[attr].blank? - url = Addressable::URI.parse(node[attr]) - next if url.query.blank? - params = CGI.parse(url.query) - params.delete_if do |key| - k = key.downcase - next true if k.start_with?( - '_hs', - 'ic', - 'mc_', - 'mkt_', - 'ns_', - 'sr_', - 'utm', - 'vero_', - 'nr_', - 'ref', - ) - next true if 'track'.in?(k) - next true if [ - 'fbclid', - 'gclid', - 'ncid', - 'ocid', - 'r', - 'spm', - ].include?(k) - false - end - url.query = URI.encode_www_form(params) + url = Sanitize::sanitize_query_string(node[attr]) + next if url.blank? node[attr] = url end end diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 494aaed75..561daf6e1 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class FetchLinkCardService < BaseService + include UrlHelper + URL_PATTERN = %r{ ( # $1 URL (https?:\/\/) # $2 Protocol (required) @@ -17,7 +19,8 @@ class FetchLinkCardService < BaseService return if @url.nil? || @status.preview_cards.any? - @url = @url.to_s + @url = sanitize_query_string(@url.to_s) + return if @url.nil? RedisLock.acquire(lock_options) do |lock| if lock.acquired?