Skip to content

Commit

Permalink
Introduce special domains
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-hank committed Sep 5, 2022
1 parent ef769ef commit d21e3e5
Show file tree
Hide file tree
Showing 14 changed files with 140 additions and 37 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). It uses [CalVer](https://calver.org/) as of May 2019.

## [22.09](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.09) - 2022-09-05
### Added
* Introduced special domains.

## [22.08d](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.08d) - 2022-08-31
### Fixed
* Fixed double-redaction of senders/principals in defamations.
Expand Down
7 changes: 7 additions & 0 deletions app/helpers/notices_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ def placeholder_kind(user, role)
{ selected: user.entity.kind }
end

def work_url(url)
return url if SpecialDomain.where('? ~~* domain_name', url).where("why_special ? 'full_urls_only_for_researchers'").none? ||
(Current.user && (Current.user.role?(Role.researcher) || Current.user.role?(Role.super_admin)))

Work.fqdn_from_url(url)
end

private

def confidential_order?(notice)
Expand Down
2 changes: 0 additions & 2 deletions app/models/content_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ class ContentFilter < ApplicationRecord
validates :name, presence: true
validates :query, presence: true

serialize :actions, Array

def actions_enum
[
['Full notice version only for Lumen team', :full_notice_version_only_lumen_team]
Expand Down
13 changes: 13 additions & 0 deletions app/models/special_domain.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
require 'validates_automatically'

class SpecialDomain < ApplicationRecord
include ValidatesAutomatically

validates :domain_name, presence: true

def why_special_enum
[
['Full urls only for researchers', :full_urls_only_for_researchers]
]
end
end
46 changes: 26 additions & 20 deletions app/models/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def copyrighted_urls_attributes=(urls)
self.copyrighted_urls = urls.map { |url| valid_url(CopyrightedUrl, url) }.compact
end

def infringing_urls_counted_by_domain
@infringing_urls_counted_by_domain ||= count_by_domain(infringing_urls)
def infringing_urls_counted_by_fqdn
@infringing_urls_counted_by_fqdn ||= count_by_fqdn(infringing_urls)
end

def copyrighted_urls_counted_by_domain
@copyrighted_urls_counted_by_domain ||= count_by_domain(copyrighted_urls)
def copyrighted_urls_counted_by_fqdn
@copyrighted_urls_counted_by_fqdn ||= count_by_fqdn(copyrighted_urls)
end

def force_redactions
Expand All @@ -71,32 +71,38 @@ def as_json(*)
}
end

def self.fqdn_from_url(url)
begin
# Valid URIs
uri = Addressable::URI.parse(url)
fqdn = uri.host
rescue Addressable::URI::InvalidURIError
# Invalid URIs
fqdn = url
.split('/')[2]
.split(' ')[0]
.gsub(/^www\./, '')
end

fqdn
end

# == Private Methods =========================================================
private

def count_by_domain(urls)
def count_by_fqdn(urls)
counted_urls = {}

urls.each do |url|
begin
# Valid URIs
uri = Addressable::URI.parse(url.url)
domain = uri.host
rescue Addressable::URI::InvalidURIError
# Invalid URIs
domain = url.url
.split('/')[2]
.split(' ')[0]
.gsub(/^www\./, '')
end
fqdn = Work.fqdn_from_url(url.url)

if counted_urls[domain].nil?
counted_urls[domain] = {
domain: domain,
if counted_urls[fqdn].nil?
counted_urls[fqdn] = {
fqdn: fqdn,
count: 1
}
else
counted_urls[domain][:count] += 1
counted_urls[fqdn][:count] += 1
end
end

Expand Down
4 changes: 2 additions & 2 deletions app/serializers/notice_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def self.works(object)
base_works = object.works.map do |work|
{
description: work.description,
infringing_urls: work.infringing_urls_counted_by_domain,
copyrighted_urls: work.copyrighted_urls_counted_by_domain
infringing_urls: work.infringing_urls_counted_by_fqdn,
copyrighted_urls: work.copyrighted_urls_counted_by_fqdn
}
end.as_json
end
Expand Down
2 changes: 1 addition & 1 deletion app/serializers/trademark_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class TrademarkSerializer < NoticeSerializer
object.works.map do |work|
{
description: work.description,
infringing_urls: work.infringing_urls_counted_by_domain
infringing_urls: work.infringing_urls_counted_by_fqdn
}
end.as_json
end
Expand Down
12 changes: 6 additions & 6 deletions app/views/notices/_works_urls.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<span class="label original-title"><%= original_title %></span>
<ol class="list original-urls">
<% if work.copyrighted_urls.each do |url| %>
<%= content_tag_for(:li, url) do %><%= url.url %><% end %>
<%= content_tag_for(:li, url) do %><%= work_url(url.url) %><% end %>
<% end.empty? %>
No copyrighted URLs were submitted.
<% end %>
Expand All @@ -17,7 +17,7 @@
<span class="label infringing-title"><%= infringing_title %></span>
<ol class="list infringing-urls">
<% if work.infringing_urls.each do |url| %>
<%= content_tag_for(:li, url) do %><%= url.url %><% end %>
<%= content_tag_for(:li, url) do %><%= work_url(url.url) %><% end %>
<% end.empty? %>
No infringing URLs were submitted.
<% end %>
Expand All @@ -29,8 +29,8 @@
<div class="row">
<span class="label original-title"><%= original_title %></span>
<ol class="list original-urls">
<% if work.copyrighted_urls_counted_by_domain.each do |domain| %>
<li class="copyrighted_url"><%= domain[:domain] %> - <%= domain[:count] %> <%= 'URL'.pluralize(domain[:count]) %></li>
<% if work.copyrighted_urls_counted_by_fqdn.each do |fqdn| %>
<li class="copyrighted_url"><%= fqdn[:fqdn] %> - <%= fqdn[:count] %> <%= 'URL'.pluralize(fqdn[:count]) %></li>
<% end.empty? %>
No copyrighted URLs were submitted.
<% end %>
Expand All @@ -42,8 +42,8 @@
<div class="row">
<span class="label infringing-title"><%= infringing_title %></span>
<ol class="list infringing-urls">
<% if work.infringing_urls_counted_by_domain.each do |domain| %>
<li class="infringing_url"><%= domain[:domain] %> - <%= domain[:count] %> <%= 'URL'.pluralize(domain[:count]) %></li>
<% if work.infringing_urls_counted_by_fqdn.each do |fqdn| %>
<li class="infringing_url"><%= fqdn[:fqdn] %> - <%= fqdn[:count] %> <%= 'URL'.pluralize(fqdn[:count]) %></li>
<% end.empty? %>
No infringing URLs were submitted.
<% end %>
Expand Down
11 changes: 11 additions & 0 deletions db/migrate/20220901192200_create_special_domains.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateSpecialDomains < ActiveRecord::Migration[6.1]
def change
create_table :special_domains do |t|
t.string :domain_name
t.text :notes
t.jsonb :why_special

t.timestamps
end
end
end
10 changes: 9 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2022_07_21_141020) do
ActiveRecord::Schema.define(version: 2022_09_01_192200) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -495,6 +495,14 @@
t.index ["user_id"], name: "index_roles_users_on_user_id"
end

create_table "special_domains", force: :cascade do |t|
t.string "domain_name"
t.text "notes"
t.jsonb "why_special"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end

create_table "token_urls", id: :serial, force: :cascade do |t|
t.string "email"
t.string "token"
Expand Down
2 changes: 1 addition & 1 deletion spec/controllers/notices_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@

json = JSON.parse(response.body)['dmca']['works'][0]['infringing_urls'][0]
expect(json).to have_key('count')
expect(json).to have_key('domain')
expect(json).to have_key('fqdn')

get :show, params: {
id: 1, authentication_token: user.authentication_token, format: :json
Expand Down
4 changes: 2 additions & 2 deletions spec/integration/api_notice_search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
marks = notice.works.map do |work|
{
'description' => work.description,
'infringing_urls' => work.infringing_urls_counted_by_domain.as_json
'infringing_urls' => work.infringing_urls_counted_by_fqdn.as_json
}
end

Expand Down Expand Up @@ -429,7 +429,7 @@
marks = notice.works.map do |work|
{
'description' => work.description,
'infringing_urls' => work.infringing_urls_counted_by_domain.as_json
'infringing_urls' => work.infringing_urls_counted_by_fqdn.as_json
}
end

Expand Down
58 changes: 57 additions & 1 deletion spec/integration/viewing_notices.spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,63 @@
expect(page).to have_content('You are not allowed to download this document.')
end
end


context 'special domains' do
scenario 'full_urls_only_for_researchers filter' do
notice = build(:dmca)
notice.works << Work.new(
description: 'lol',
infringing_urls: [
InfringingUrl.new(url: 'https://this-domain-is-so-special.com/hey-buddy/1122'),
InfringingUrl.new(url: 'https://not-so-special.com/hey-hey/1122')
]
)
notice.save!

SpecialDomain.create!(
domain_name: '%this-domain-is-so-special.com%',
why_special: ['full_urls_only_for_researchers']
)

token_url = TokenUrl.create(
email: '[email protected]',
notice: notice,
expiration_date: Time.now + LumenSetting.get_i('truncation_token_urls_active_period').seconds
)

visit notice_url(notice, access_token: token_url.token)

expect(page).to have_content('https://not-so-special.com/hey-hey/1122')
expect(page).to have_content('this-domain-is-so-special.com')
expect(page).not_to have_content('https://this-domain-is-so-special.com/hey-buddy/1122')

user = create(:user, :super_admin)
sign_in(user)

visit notice_url(notice)

expect(page).to have_content('https://not-so-special.com/hey-hey/1122')
expect(page).to have_content('https://this-domain-is-so-special.com/hey-buddy/1122')

sign_out

user = create(:user, :researcher)
sign_in(user)

visit notice_url(notice)

expect(page).to have_content('https://not-so-special.com/hey-hey/1122')
expect(page).to have_content('https://this-domain-is-so-special.com/hey-buddy/1122')

sign_out

visit notice_url(notice)

expect(page).not_to have_content('https://not-so-special.com/hey-hey/1122')
expect(page).not_to have_content('https://this-domain-is-so-special.com/hey-buddy/1122')
end
end

def check_full_works_urls
within('#works') do
expect(page).to have_content 'http://www.example.com/original_work.pdf'
Expand Down
2 changes: 1 addition & 1 deletion spec/serializers/trademark_serializer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
mark = serialized_trademark[:marks].first

expect(mark['infringing_urls']).to eq(
work.infringing_urls_counted_by_domain.as_json
work.infringing_urls_counted_by_fqdn.as_json
)
end

Expand Down

0 comments on commit d21e3e5

Please sign in to comment.