From d21e3e5bddb24ea83ee9587f802acb5678d03c6c Mon Sep 17 00:00:00 2001 From: Peter Hankiewicz Date: Mon, 5 Sep 2022 14:41:43 +0200 Subject: [PATCH] Introduce special domains --- CHANGELOG.md | 4 ++ app/helpers/notices_helper.rb | 7 +++ app/models/content_filter.rb | 2 - app/models/special_domain.rb | 13 +++++ app/models/work.rb | 46 ++++++++------- app/serializers/notice_serializer.rb | 4 +- app/serializers/trademark_serializer.rb | 2 +- app/views/notices/_works_urls.html.erb | 12 ++-- .../20220901192200_create_special_domains.rb | 11 ++++ db/schema.rb | 10 +++- spec/controllers/notices_controller_spec.rb | 2 +- spec/integration/api_notice_search_spec.rb | 4 +- spec/integration/viewing_notices.spec.rb | 58 ++++++++++++++++++- spec/serializers/trademark_serializer_spec.rb | 2 +- 14 files changed, 140 insertions(+), 37 deletions(-) create mode 100644 app/models/special_domain.rb create mode 100644 db/migrate/20220901192200_create_special_domains.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 272f08e50..28d51353d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). It uses [CalVer](https://calver.org/) as of May 2019. +## [22.09](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.09) - 2022-09-05 +### Added +* Introduced special domains. + ## [22.08d](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.08d) - 2022-08-31 ### Fixed * Fixed double-redaction of senders/principals in defamations. diff --git a/app/helpers/notices_helper.rb b/app/helpers/notices_helper.rb index ed2d92b5d..45fc362c7 100644 --- a/app/helpers/notices_helper.rb +++ b/app/helpers/notices_helper.rb @@ -130,6 +130,13 @@ def placeholder_kind(user, role) { selected: user.entity.kind } end + def work_url(url) + return url if SpecialDomain.where('? ~~* domain_name', url).where("why_special ? 'full_urls_only_for_researchers'").none? || + (Current.user && (Current.user.role?(Role.researcher) || Current.user.role?(Role.super_admin))) + + Work.fqdn_from_url(url) + end + private def confidential_order?(notice) diff --git a/app/models/content_filter.rb b/app/models/content_filter.rb index 6d238a0ac..97ac33e05 100644 --- a/app/models/content_filter.rb +++ b/app/models/content_filter.rb @@ -6,8 +6,6 @@ class ContentFilter < ApplicationRecord validates :name, presence: true validates :query, presence: true - serialize :actions, Array - def actions_enum [ ['Full notice version only for Lumen team', :full_notice_version_only_lumen_team] diff --git a/app/models/special_domain.rb b/app/models/special_domain.rb new file mode 100644 index 000000000..6b7c08813 --- /dev/null +++ b/app/models/special_domain.rb @@ -0,0 +1,13 @@ +require 'validates_automatically' + +class SpecialDomain < ApplicationRecord + include ValidatesAutomatically + + validates :domain_name, presence: true + + def why_special_enum + [ + ['Full urls only for researchers', :full_urls_only_for_researchers] + ] + end +end diff --git a/app/models/work.rb b/app/models/work.rb index 8223bb44f..6e12f6a8e 100644 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -41,12 +41,12 @@ def copyrighted_urls_attributes=(urls) self.copyrighted_urls = urls.map { |url| valid_url(CopyrightedUrl, url) }.compact end - def infringing_urls_counted_by_domain - @infringing_urls_counted_by_domain ||= count_by_domain(infringing_urls) + def infringing_urls_counted_by_fqdn + @infringing_urls_counted_by_fqdn ||= count_by_fqdn(infringing_urls) end - def copyrighted_urls_counted_by_domain - @copyrighted_urls_counted_by_domain ||= count_by_domain(copyrighted_urls) + def copyrighted_urls_counted_by_fqdn + @copyrighted_urls_counted_by_fqdn ||= count_by_fqdn(copyrighted_urls) end def force_redactions @@ -71,32 +71,38 @@ def as_json(*) } end + def self.fqdn_from_url(url) + begin + # Valid URIs + uri = Addressable::URI.parse(url) + fqdn = uri.host + rescue Addressable::URI::InvalidURIError + # Invalid URIs + fqdn = url + .split('/')[2] + .split(' ')[0] + .gsub(/^www\./, '') + end + + fqdn + end + # == Private Methods ========================================================= private - def count_by_domain(urls) + def count_by_fqdn(urls) counted_urls = {} urls.each do |url| - begin - # Valid URIs - uri = Addressable::URI.parse(url.url) - domain = uri.host - rescue Addressable::URI::InvalidURIError - # Invalid URIs - domain = url.url - .split('/')[2] - .split(' ')[0] - .gsub(/^www\./, '') - end + fqdn = Work.fqdn_from_url(url.url) - if counted_urls[domain].nil? - counted_urls[domain] = { - domain: domain, + if counted_urls[fqdn].nil? + counted_urls[fqdn] = { + fqdn: fqdn, count: 1 } else - counted_urls[domain][:count] += 1 + counted_urls[fqdn][:count] += 1 end end diff --git a/app/serializers/notice_serializer.rb b/app/serializers/notice_serializer.rb index 76a46e48e..4ac47fd31 100644 --- a/app/serializers/notice_serializer.rb +++ b/app/serializers/notice_serializer.rb @@ -60,8 +60,8 @@ def self.works(object) base_works = object.works.map do |work| { description: work.description, - infringing_urls: work.infringing_urls_counted_by_domain, - copyrighted_urls: work.copyrighted_urls_counted_by_domain + infringing_urls: work.infringing_urls_counted_by_fqdn, + copyrighted_urls: work.copyrighted_urls_counted_by_fqdn } end.as_json end diff --git a/app/serializers/trademark_serializer.rb b/app/serializers/trademark_serializer.rb index 43f7c3102..682f8091f 100644 --- a/app/serializers/trademark_serializer.rb +++ b/app/serializers/trademark_serializer.rb @@ -15,7 +15,7 @@ class TrademarkSerializer < NoticeSerializer object.works.map do |work| { description: work.description, - infringing_urls: work.infringing_urls_counted_by_domain + infringing_urls: work.infringing_urls_counted_by_fqdn } end.as_json end diff --git a/app/views/notices/_works_urls.html.erb b/app/views/notices/_works_urls.html.erb index 4fd8808c1..ca8a7aa17 100644 --- a/app/views/notices/_works_urls.html.erb +++ b/app/views/notices/_works_urls.html.erb @@ -4,7 +4,7 @@ <%= original_title %>
    <% if work.copyrighted_urls.each do |url| %> - <%= content_tag_for(:li, url) do %><%= url.url %><% end %> + <%= content_tag_for(:li, url) do %><%= work_url(url.url) %><% end %> <% end.empty? %> No copyrighted URLs were submitted. <% end %> @@ -17,7 +17,7 @@ <%= infringing_title %>
      <% if work.infringing_urls.each do |url| %> - <%= content_tag_for(:li, url) do %><%= url.url %><% end %> + <%= content_tag_for(:li, url) do %><%= work_url(url.url) %><% end %> <% end.empty? %> No infringing URLs were submitted. <% end %> @@ -29,8 +29,8 @@
      <%= original_title %>
        - <% if work.copyrighted_urls_counted_by_domain.each do |domain| %> -
      1. <%= domain[:domain] %> - <%= domain[:count] %> <%= 'URL'.pluralize(domain[:count]) %>
      2. + <% if work.copyrighted_urls_counted_by_fqdn.each do |fqdn| %> +
      3. <%= fqdn[:fqdn] %> - <%= fqdn[:count] %> <%= 'URL'.pluralize(fqdn[:count]) %>
      4. <% end.empty? %> No copyrighted URLs were submitted. <% end %> @@ -42,8 +42,8 @@
        <%= infringing_title %>
          - <% if work.infringing_urls_counted_by_domain.each do |domain| %> -
        1. <%= domain[:domain] %> - <%= domain[:count] %> <%= 'URL'.pluralize(domain[:count]) %>
        2. + <% if work.infringing_urls_counted_by_fqdn.each do |fqdn| %> +
        3. <%= fqdn[:fqdn] %> - <%= fqdn[:count] %> <%= 'URL'.pluralize(fqdn[:count]) %>
        4. <% end.empty? %> No infringing URLs were submitted. <% end %> diff --git a/db/migrate/20220901192200_create_special_domains.rb b/db/migrate/20220901192200_create_special_domains.rb new file mode 100644 index 000000000..27fef43d1 --- /dev/null +++ b/db/migrate/20220901192200_create_special_domains.rb @@ -0,0 +1,11 @@ +class CreateSpecialDomains < ActiveRecord::Migration[6.1] + def change + create_table :special_domains do |t| + t.string :domain_name + t.text :notes + t.jsonb :why_special + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index e3714c069..13d8f9337 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_07_21_141020) do +ActiveRecord::Schema.define(version: 2022_09_01_192200) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -495,6 +495,14 @@ t.index ["user_id"], name: "index_roles_users_on_user_id" end + create_table "special_domains", force: :cascade do |t| + t.string "domain_name" + t.text "notes" + t.jsonb "why_special" + t.datetime "created_at", precision: 6, null: false + t.datetime "updated_at", precision: 6, null: false + end + create_table "token_urls", id: :serial, force: :cascade do |t| t.string "email" t.string "token" diff --git a/spec/controllers/notices_controller_spec.rb b/spec/controllers/notices_controller_spec.rb index 50d30bb5b..3b2a3455f 100644 --- a/spec/controllers/notices_controller_spec.rb +++ b/spec/controllers/notices_controller_spec.rb @@ -147,7 +147,7 @@ json = JSON.parse(response.body)['dmca']['works'][0]['infringing_urls'][0] expect(json).to have_key('count') - expect(json).to have_key('domain') + expect(json).to have_key('fqdn') get :show, params: { id: 1, authentication_token: user.authentication_token, format: :json diff --git a/spec/integration/api_notice_search_spec.rb b/spec/integration/api_notice_search_spec.rb index 7a19faa99..e5e281039 100644 --- a/spec/integration/api_notice_search_spec.rb +++ b/spec/integration/api_notice_search_spec.rb @@ -144,7 +144,7 @@ marks = notice.works.map do |work| { 'description' => work.description, - 'infringing_urls' => work.infringing_urls_counted_by_domain.as_json + 'infringing_urls' => work.infringing_urls_counted_by_fqdn.as_json } end @@ -429,7 +429,7 @@ marks = notice.works.map do |work| { 'description' => work.description, - 'infringing_urls' => work.infringing_urls_counted_by_domain.as_json + 'infringing_urls' => work.infringing_urls_counted_by_fqdn.as_json } end diff --git a/spec/integration/viewing_notices.spec.rb b/spec/integration/viewing_notices.spec.rb index 170e61cef..87251b11b 100644 --- a/spec/integration/viewing_notices.spec.rb +++ b/spec/integration/viewing_notices.spec.rb @@ -222,7 +222,63 @@ expect(page).to have_content('You are not allowed to download this document.') end end - + + context 'special domains' do + scenario 'full_urls_only_for_researchers filter' do + notice = build(:dmca) + notice.works << Work.new( + description: 'lol', + infringing_urls: [ + InfringingUrl.new(url: 'https://this-domain-is-so-special.com/hey-buddy/1122'), + InfringingUrl.new(url: 'https://not-so-special.com/hey-hey/1122') + ] + ) + notice.save! + + SpecialDomain.create!( + domain_name: '%this-domain-is-so-special.com%', + why_special: ['full_urls_only_for_researchers'] + ) + + token_url = TokenUrl.create( + email: 'user@example.com', + notice: notice, + expiration_date: Time.now + LumenSetting.get_i('truncation_token_urls_active_period').seconds + ) + + visit notice_url(notice, access_token: token_url.token) + + expect(page).to have_content('https://not-so-special.com/hey-hey/1122') + expect(page).to have_content('this-domain-is-so-special.com') + expect(page).not_to have_content('https://this-domain-is-so-special.com/hey-buddy/1122') + + user = create(:user, :super_admin) + sign_in(user) + + visit notice_url(notice) + + expect(page).to have_content('https://not-so-special.com/hey-hey/1122') + expect(page).to have_content('https://this-domain-is-so-special.com/hey-buddy/1122') + + sign_out + + user = create(:user, :researcher) + sign_in(user) + + visit notice_url(notice) + + expect(page).to have_content('https://not-so-special.com/hey-hey/1122') + expect(page).to have_content('https://this-domain-is-so-special.com/hey-buddy/1122') + + sign_out + + visit notice_url(notice) + + expect(page).not_to have_content('https://not-so-special.com/hey-hey/1122') + expect(page).not_to have_content('https://this-domain-is-so-special.com/hey-buddy/1122') + end + end + def check_full_works_urls within('#works') do expect(page).to have_content 'http://www.example.com/original_work.pdf' diff --git a/spec/serializers/trademark_serializer_spec.rb b/spec/serializers/trademark_serializer_spec.rb index f066ab9f5..3c12d165f 100644 --- a/spec/serializers/trademark_serializer_spec.rb +++ b/spec/serializers/trademark_serializer_spec.rb @@ -28,7 +28,7 @@ mark = serialized_trademark[:marks].first expect(mark['infringing_urls']).to eq( - work.infringing_urls_counted_by_domain.as_json + work.infringing_urls_counted_by_fqdn.as_json ) end