Skip to content

Commit

Permalink
Fix UTF-8 invalid byte sequence exceptions in web/email_address (cl…
Browse files Browse the repository at this point in the history
…oses #141).

* This allows `web/email_address` to scan response bodies of images for
  email addresses.
  • Loading branch information
postmodern committed Jul 15, 2024
1 parent 7f0adb2 commit e9b16c5
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
9 changes: 7 additions & 2 deletions lib/ronin/recon/builtin/web/email_addresses.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,14 @@ class EmailAddresses < WebWorker
#
def process(url)
if (body = url.body)
email_pattern = Ronin::Support::Text::Patterns::EMAIL_ADDRESS
if body.encoding == Encoding::ASCII_8BIT
# forcibly convert and scrub binary data into UTF-8 data
body = body.dup
body.force_encoding(Encoding::UTF_8)
body.scrub!
end

body.force_encoding(Encoding::UTF_8).scan(email_pattern) do |email|
body.scan(Support::Text::Patterns::EMAIL_ADDRESS) do |email|
yield EmailAddress.new(email)
end
end
Expand Down
40 changes: 36 additions & 4 deletions spec/builtin/web/email_addresses_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@
describe "#process" do
context "when URL #body exists" do
context "and email is present" do
let(:email_address1) { '[email protected]' }
let(:email_address2) { '[email protected]' }
let(:body) do
<<~HTML
<html>
<body>
<p>[email protected]</p>
<p>[email protected]</p>
<p>#{email_address1}</p>
<p>#{email_address2}</p>
</body>
</html>
HTML
end
let(:url) { Ronin::Recon::Values::URL.new("example.com", body: body) }
let(:expected_emails) do
[
Ronin::Recon::Values::EmailAddress.new("[email protected]"),
Ronin::Recon::Values::EmailAddress.new("[email protected]")
Ronin::Recon::Values::EmailAddress.new(email_address1),
Ronin::Recon::Values::EmailAddress.new(email_address2)
]
end

Expand All @@ -33,6 +35,36 @@

expect(yielded_values).to eq(expected_emails)
end

context "but the URL #body is binary data" do
let(:body) { super().encode(Encoding::ASCII_8BIT) }

it "must convert the #body into a UTF-8 String" do
yielded_values = []

subject.process(url) do |value|
yielded_values << value
end

expect(yielded_values).to eq(expected_emails)
end

context "and it contains invalid UTF-8 byte-sequences" do
let(:body) do
"\xfe\xff#{email_address1}\xfe\xff#{email_address2}\xfe\xff".b
end

it "must ignore any invalid UTF-8 byte sequences and only yield email address values" do
yielded_values = []

subject.process(url) do |value|
yielded_values << value
end

expect(yielded_values).to eq(expected_emails)
end
end
end
end

context "and email is not present" do
Expand Down

0 comments on commit e9b16c5

Please sign in to comment.