make anchor tagging check full path
parent
ff75f5ea4b
commit
23c36c2d7c
|
@ -40,22 +40,37 @@ class Sanitize
|
||||||
text = node.text.strip
|
text = node.text.strip
|
||||||
return if href == text
|
return if href == text
|
||||||
|
|
||||||
uri = Addressable::URI.parse(node['href'])
|
# strip ellipse & replace keyword search obscuring
|
||||||
text.sub!(/ *(?:\u2026|\.\.\.)/, '')
|
text = text.sub(/ *(?:\u2026|\.\.\.)\Z/, '').gsub(/ dot /i, '.').gsub(/[\u200b-\u200d\ufeff\u200e\u200f]/, '')
|
||||||
|
|
||||||
|
# href now matches text without obscuring?
|
||||||
|
return if href == text
|
||||||
|
|
||||||
|
# grab first url from link text
|
||||||
|
first_url = text.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*\S*/).first
|
||||||
|
|
||||||
|
# if there's no link in the text mark as custom text
|
||||||
|
if first_url.nil?
|
||||||
|
node.inner_html = "\u270d\ufe0f #{node.inner_html}"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# strip trailing slashes
|
||||||
|
text.sub!(/\/+\Z/, '')
|
||||||
|
|
||||||
# href starts with link text?
|
# href starts with link text?
|
||||||
return if href.start_with?(text)
|
return if href.start_with?(text)
|
||||||
# shortened href starts with link text?
|
|
||||||
return if (uri.host + uri.path).start_with?(text)
|
|
||||||
# shorterned & normalized href starts with link text?
|
|
||||||
return if (uri.normalized_host + uri.normalized_path).start_with?(text)
|
|
||||||
|
|
||||||
# grab first domain from link text
|
# split href into parts & grab shortened href
|
||||||
text = text.downcase.gsub(' dot ', '.')
|
uri = Addressable::URI.parse(node['href'])
|
||||||
first_domain = text.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*/).first
|
short_href = uri.host + uri.path
|
||||||
|
normalized_short_href = uri.normalized_host + uri.normalized_path
|
||||||
|
|
||||||
|
# shortened href starts with link text?
|
||||||
|
return if short_href.start_with?(text) || normalized_short_href.start_with?(text)
|
||||||
|
|
||||||
# first domain in link text (if there is one) matches href domain?
|
# first domain in link text (if there is one) matches href domain?
|
||||||
if first_domain.nil? || uri.domain == first_domain
|
if short_href == first_url || normalized_short_href == first_url
|
||||||
# link text customized by author
|
# link text customized by author
|
||||||
node.inner_html = "\u270d\ufe0f #{node.inner_html}"
|
node.inner_html = "\u270d\ufe0f #{node.inner_html}"
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue