Skip to content

Commit

Permalink
Start redacting sender name and princial name in defamations
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-hank committed Aug 31, 2022
1 parent 2bd01fd commit 9dbe9cc
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 16 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). It uses [CalVer](https://calver.org/) as of May 2019.

## [22.08c](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.08c) - 2022-08-30
### Changed
* Started redacting sender and principal names in defamations.

## [22.08b](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.08b) - 2022-08-26
### Changed
* Started catching more 404 errors.
Expand Down
30 changes: 30 additions & 0 deletions app/models/defamation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,34 @@ def principal_name
def hide_identities?
(recipient_name =~ REDACTION_REGEX).present?
end

def auto_redact
custom_redactors = [
InstanceRedactor::PhoneNumberRedactor.new,
InstanceRedactor::SSNRedactor.new,
InstanceRedactor::EmailRedactor.new,
InstanceRedactor::EntityNameRedactor.new
]

instance_redactor = InstanceRedactor.new(
custom_redactors,
{
entity_name: principal&.name || sender&.name
}
)

instance_redactor.redact(self)

works.each do |work|
instance_redactor.redact(work, %w[description])

work.infringing_urls.each do |url|
instance_redactor.redact(url, %w[url])
end

work.copyrighted_urls.each do |url|
instance_redactor.redact(url, %w[url])
end
end
end
end
33 changes: 20 additions & 13 deletions app/models/instance_redactor.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
class InstanceRedactor
def initialize(redactors = [PhoneNumberRedactor.new,
SSNRedactor.new,
EmailRedactor.new])
def initialize(
redactors = [
PhoneNumberRedactor.new,
SSNRedactor.new,
EmailRedactor.new
],
options = {}
)
@redactors = redactors
@options = options
end

def redact(instance, field_or_fields = Notice::REDACTABLE_FIELDS)
Expand All @@ -29,7 +35,7 @@ def redact_all(instance_ids,

def redact_field(instance, field, text)
new_text = redactors.inject(text) do |result, redactor|
redactor.redact(result)
redactor.redact(result, @options)
end

return unless new_text != text
Expand Down Expand Up @@ -88,7 +94,7 @@ def initialize(string_or_regex)
@string_or_regex = string_or_regex
end

def redact(content)
def redact(content, _options = {})
content.gsub(@string_or_regex) { |s| STOP_WORDS.include?(s) ? s : mask }
end

Expand All @@ -98,7 +104,7 @@ def mask
end

class PhoneNumberRedactor
def redact(text)
def redact(text, _options = {})
redactor = ContentRedactor.new(
/(\()?\b(\(?\d{3}\)?.?)? # optional area code
(\d{3}[^\d]?\d{4})| # phone number, optional single-char separator
Expand All @@ -111,7 +117,7 @@ def redact(text)
end

class SSNRedactor
def redact(text)
def redact(text, _options = {})
redactor = ContentRedactor.new(
/\b(\d{3})\D?(\d{2})\D?(\d{4})\b/x
)
Expand All @@ -121,7 +127,7 @@ def redact(text)
end

class EmailRedactor
def redact(text)
def redact(text, _options = {})
redactor = ContentRedactor.new(
/\S+@\S+\.\S+[^.\s]/i
)
Expand All @@ -131,15 +137,16 @@ def redact(text)
end

class EntityNameRedactor
def initialize(name)
match = name.gsub(/[-*+?\d]/, ' ').strip.split(/\s+/)
separator = (name =~ /[a-z]/mi ? '[^a-z]' : '\s')
def redact(text, options = {})
return text unless options[:entity_name].present?

match = options[:entity_name].gsub(/[^'0-9A-Za-z ]/, '').gsub(/[-*+?\d]/, ' ').strip.split(/\s+/)
separator = (options[:entity_name] =~ /[a-z]/mi ? '[^a-z]' : '\s')
@regex_base = "(?:#{match.join('|')})(?:#{separator}*(?:#{match.join('|')}))*"
@regex = /#{@regex_base}/mi
end

def redact(text)
return text if @regex_base.blank?

redactor = ContentRedactor.new(@regex)

redactor.redact(text)
Expand Down
1 change: 0 additions & 1 deletion spec/integration/faceted_notice_search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
end

open_dropdown_for_facet('date_received_facet')
page.save_screenshot(full: true)
facet = page.find('ol.date_received_facet li:nth-child(3)').text

within_faceted_search_results_for('title', :date_received_facet, facet) do
Expand Down
109 changes: 109 additions & 0 deletions spec/integration/submit_notice_via_api_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,102 @@
expect(curb.response_code).to eq 400
end

context 'redacting defamations' do
scenario 'sender name is redacted if there is no principal provided' do
original_text = <<-BODY
My name is Tonny Bastet, do something please!!!
BODY
redacted_text = <<-BODY
My name is [REDACTED], do something please!!!
BODY
original_infringing_url = 'http://disney.com/tonny_bastet.mp4'
redacted_infringing_url = 'http://disney.com/[REDACTED].mp4'

parameters = request_hash(
default_notice_hash(
type: 'Defamation',
body: original_text,
entity_notice_roles_attributes: [
{
name: 'recipient',
entity_attributes: {
name: 'The Googs'
}
},
{
name: 'sender',
entity_attributes: {
name: 'Tonny Bastet'
}
}
],
works_attributes: [
{
description: original_text,
infringing_urls_attributes: [
{
url: original_infringing_url
}
]
}
]
)
)

submit_and_test_defamation_redaction(parameters, original_text, redacted_text, original_infringing_url, redacted_infringing_url)
end

scenario 'principal name is redacted if there is principal provided' do
original_text = <<-BODY
My name not John Bastet, it's Tonny Kokosh, do something please!!!
BODY
redacted_text = <<-BODY
My name not John Bastet, it's [REDACTED], do something please!!!
BODY
original_infringing_url = 'http://disney.com/tonny_kokosh.mp4'
redacted_infringing_url = 'http://disney.com/[REDACTED].mp4'

parameters = request_hash(
default_notice_hash(
type: 'Defamation',
body: original_text,
entity_notice_roles_attributes: [
{
name: 'recipient',
entity_attributes: {
name: 'The Googs'
}
},
{
name: 'sender',
entity_attributes: {
name: 'John Bastet'
}
},
{
name: 'principal',
entity_attributes: {
name: 'Tonny Kokosh'
}
}
],
works_attributes: [
{
description: original_text,
infringing_urls_attributes: [
{
url: original_infringing_url
}
]
}
]
)
)

submit_and_test_defamation_redaction(parameters, original_text, redacted_text, original_infringing_url, redacted_infringing_url)
end
end

private

def original_document_file(notice)
Expand Down Expand Up @@ -353,4 +449,17 @@ def request_hash(notice_hash, user = create(:user, :submitter))
authentication_token: user.authentication_token
}
end

def submit_and_test_defamation_redaction(parameters, original_text, redacted_text, original_infringing_url, redacted_infringing_url)
post_api('/notices', parameters)

notice = Notice.last

expect(notice.body).to eq redacted_text
expect(notice.body_original).to eq original_text
expect(notice.works.first.description).to eq redacted_text
expect(notice.works.first.description_original).to eq original_text
expect(notice.works.first.infringing_urls.first.url).to eq redacted_infringing_url
expect(notice.works.first.infringing_urls.first.url_original).to eq original_infringing_url
end
end
4 changes: 2 additions & 2 deletions spec/support/notice_actions.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
require 'support/sign_in'

module NoticeActions
def submit_recent_notice(title = 'A title')
def submit_recent_notice(title = 'A title', type = 'DMCA')
sign_in(create(:user, :submitter))

visit '/notices/new?type=DMCA'
visit "/notices/new?type=#{type}"

fill_in 'Title', with: title
fill_in 'Date received', with: Time.now
Expand Down

0 comments on commit 9dbe9cc

Please sign in to comment.