diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
new file mode 100644
index 000000000..3d88b01cd
--- /dev/null
+++ b/app/lib/extractor.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Extractor
+ extend Twitter::Extractor
+
+ module_function
+
+ def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
+ return [] unless text =~ Twitter::Regex[:at_signs]
+
+ possible_entries = []
+
+ text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
+ match_data = $LAST_MATCH_INFO
+ after = $'
+ unless after =~ Twitter::Regex[:end_mention_match]
+ start_position = match_data.char_begin(1) - 1
+ end_position = match_data.char_end(1)
+ possible_entries << {
+ screen_name: screen_name,
+ indices: [start_position, end_position],
+ }
+ end
+ end
+
+ if block_given?
+ possible_entries.each do |mention|
+ yield mention[:screen_name], mention[:indices].first, mention[:indices].last
+ end
+ end
+ possible_entries
+ end
+end
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index 6d0828a8d..12b030e11 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -13,11 +13,10 @@ class Formatter
return reformat(status.content) unless status.local?
html = status.text
- html = encode_and_link_urls(html)
+ html = encode_and_link_urls(html, status.mentions)
+
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
- html = link_mentions(html, status.mentions)
- html = link_hashtags(html)
html.html_safe # rubocop:disable Rails/OutputSafety
end
@@ -37,8 +36,6 @@ class Formatter
html = encode_and_link_urls(account.note)
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
- html = link_accounts(html)
- html = link_hashtags(html)
html.html_safe # rubocop:disable Rails/OutputSafety
end
@@ -53,51 +50,66 @@ class Formatter
HTMLEntities.new.encode(html)
end
- def encode_and_link_urls(html)
- entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false)
- entities = entities.sort_by { |entity| entity[:indices].first }
+ def encode_and_link_urls(html, mentions = nil)
+ entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false)
- chars = html.to_s.to_char_a
+ rewrite(html.dup, entities) do |entity|
+ if entity[:url]
+ link_to_url(entity)
+ elsif entity[:hashtag]
+ link_to_hashtag(entity)
+ elsif entity[:screen_name]
+ link_to_mention(entity, mentions)
+ end
+ end
+ end
+
+ def rewrite(text, entities)
+ chars = text.to_s.to_char_a
+
+ # sort by start index
+ entities = entities.sort_by do |entity|
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+ indices.first
+ end
+
+ result = []
+ last_index = entities.reduce(0) do |index, entity|
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+ result << encode(chars[index...indices.first].join)
+ result << yield(entity)
+ indices.last
+ end
+ result << encode(chars[last_index..-1].join)
+
+ result.flatten.join
+ end
+
+ def link_to_url(entity)
+ normalized_url = Addressable::URI.parse(entity[:url]).normalize
html_attrs = {
target: '_blank',
rel: 'nofollow noopener',
}
- result = ''
-
- last_index = entities.reduce(0) do |index, entity|
- normalized_url = Addressable::URI.parse(entity[:url]).normalize
- indices = entity[:indices]
- result += encode(chars[index...indices.first].join)
- result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs)
- indices.last
- end
- result += encode(chars[last_index..-1].join)
+ Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs)
end
- def link_mentions(html, mentions)
- html.gsub(Account::MENTION_RE) do |match|
- acct = Account::MENTION_RE.match(match)[1]
- mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) }
-
- mention.nil? ? match : mention_html(match, mention.account)
- end
+ def link_to_mention(entity, mentions)
+ acct = entity[:screen_name]
+ return link_to_account(acct) unless mentions
+ mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) }
+ mention ? mention_html(mention.account) : "@#{acct}"
end
- def link_accounts(html)
- html.gsub(Account::MENTION_RE) do |match|
- acct = Account::MENTION_RE.match(match)[1]
- username, domain = acct.split('@')
- domain = nil if TagManager.instance.local_domain?(domain)
- account = Account.find_remote(username, domain)
-
- account.nil? ? match : mention_html(match, account)
- end
+ def link_to_account(acct)
+ username, domain = acct.split('@')
+ domain = nil if TagManager.instance.local_domain?(domain)
+ account = Account.find_remote(username, domain)
+ account ? mention_html(account) : "@#{acct}"
end
- def link_hashtags(html)
- html.gsub(Tag::HASHTAG_RE) do |match|
- hashtag_html(match)
- end
+ def link_to_hashtag(entity)
+ hashtag_html(entity[:hashtag])
end
def link_html(url)
@@ -110,12 +122,11 @@ class Formatter
"#{prefix}#{text}#{suffix}"
end
- def hashtag_html(match)
- prefix, _, affix = match.rpartition('#')
- "#{prefix}##{affix}"
+ def hashtag_html(tag)
+ "##{tag}"
end
- def mention_html(match, account)
- "#{match.split('@').first}@#{account.username}"
+ def mention_html(account)
+ "@#{account.username}"
end
end
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index b762907b2..81eaf00e8 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -6,6 +6,10 @@ RSpec.describe Formatter do
let(:local_status) { Fabricate(:status, text: local_text, account: account) }
let(:remote_status) { Fabricate(:status, text: ' Beep boop', uri: 'beepboop', account: account) }
+ let(:local_text_with_mention) { "@#{account.username} @#{account.username}@example.com #{local_text}?x=@#{account.username} #hashtag" }
+ let(:local_status_with_mention) { Fabricate(:status, text: local_text_with_mention,
+ account: account, mentions: [Fabricate(:mention, account: account)]) }
+
describe '#format' do
subject { Formatter.instance.format(local_status) }
@@ -21,6 +25,18 @@ RSpec.describe Formatter do
expect(subject).to match('http://google.com/')
end
+ it 'contains a mention' do
+ result = Formatter.instance.format(local_status_with_mention)
+ expect(result).to match "@#{account.username}"
+ expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}}
+ expect(result).not_to match "href=\"https://example.com/@#{account.username}"
+ end
+
+ it 'contains a hashtag' do
+ result = Formatter.instance.format(local_status_with_mention)
+ expect(result).to match("/tags/hashtag\" class=\"mention hashtag\">#hashtag")
+ end
+
context 'matches a stand-alone medium URL' do
let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
it 'has valid url' do
diff --git a/spec/models/account_spec.rb b/spec/models/account_spec.rb
index 157db633a..efd87e871 100644
--- a/spec/models/account_spec.rb
+++ b/spec/models/account_spec.rb
@@ -379,6 +379,10 @@ RSpec.describe Account, type: :model do
it 'does not match URLs' do
expect(subject.match('Check this out https://medium.com/@alice/some-article#.abcdef123')).to be_nil
end
+
+ xit 'does not match URL querystring' do
+ expect(subject.match('https://example.com/?x=@alice')).to be_nil
+ end
end
describe 'validations' do