move query string sanitizer to its own module & sanitize link cards as well

staging
multiple creatures 2019-07-19 10:16:33 -05:00
parent 23c36c2d7c
commit bc22ab034b
3 changed files with 45 additions and 30 deletions

37
app/helpers/url_helper.rb Normal file
View File

@ -0,0 +1,37 @@
module UrlHelper
def sanitize_query_string(url)
return if url.blank?
url = Addressable::URI.parse(url)
return url.to_s if url.query.blank?
params = CGI.parse(url.query)
params.delete_if do |key|
k = key.downcase
next true if k.start_with?(
'_hs',
'ic',
'mc_',
'mkt_',
'ns_',
'sr_',
'utm',
'vero_',
'nr_',
'ref',
)
next true if 'track'.in?(k)
next true if [
'fbclid',
'gclid',
'ncid',
'ocid',
'r',
'spm',
].include?(k)
false
end
url.query = URI.encode_www_form(params)
return url.to_s
rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
return '#'
end
end

View File

@ -1,6 +1,8 @@
# frozen_string_literal: true
class Sanitize
extend UrlHelper
module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze
@ -91,35 +93,8 @@ class Sanitize
node = env[:node]
['href', 'src', 'cite'].each do |attr|
next if node[attr].blank?
url = Addressable::URI.parse(node[attr])
next if url.query.blank?
params = CGI.parse(url.query)
params.delete_if do |key|
k = key.downcase
next true if k.start_with?(
'_hs',
'ic',
'mc_',
'mkt_',
'ns_',
'sr_',
'utm',
'vero_',
'nr_',
'ref',
)
next true if 'track'.in?(k)
next true if [
'fbclid',
'gclid',
'ncid',
'ocid',
'r',
'spm',
].include?(k)
false
end
url.query = URI.encode_www_form(params)
url = Sanitize::sanitize_query_string(node[attr])
next if url.blank?
node[attr] = url
end
end

View File

@ -1,6 +1,8 @@
# frozen_string_literal: true
class FetchLinkCardService < BaseService
include UrlHelper
URL_PATTERN = %r{
( # $1 URL
(https?:\/\/) # $2 Protocol (required)
@ -17,7 +19,8 @@ class FetchLinkCardService < BaseService
return if @url.nil? || @status.preview_cards.any?
@url = @url.to_s
@url = sanitize_query_string(@url.to_s)
return if @url.nil?
RedisLock.acquire(lock_options) do |lock|
if lock.acquired?