From f85b11e48ad7d6592e25fde810da3df1d21eaa48 Mon Sep 17 00:00:00 2001 From: Peter Hankiewicz Date: Thu, 2 Jun 2022 11:48:45 +0200 Subject: [PATCH] Start moving taggings to jsonb --- CHANGELOG.md | 4 ++ ...add_json_fields_for_taggings_to_notices.rb | 7 +++ db/schema.rb | 5 +- script/copy_taggings_to_notices_as_jsonb.sh | 57 +++++++++++++++++++ ...jsonb => copy_urls_to_notices_as_jsonb.sh} | 0 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20220601165646_add_json_fields_for_taggings_to_notices.rb create mode 100755 script/copy_taggings_to_notices_as_jsonb.sh rename script/{copy_urls_to_notices_as_jsonb => copy_urls_to_notices_as_jsonb.sh} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index b89131d0b..92f16075c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). It uses [CalVer](https://calver.org/) as of May 2019. +## [22.06](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.06) - 2022-06-02 +### Changed +* Started moving exisiting taggings to new `jsonb` fields in the `notice` table. + ## [22.05c](https://github.com/berkmancenter/lumendatabase/releases/tag/2022.05c) - 2022-05-31 ### Changed * Added a db constraint to avoid inserting wrong works json data. diff --git a/db/migrate/20220601165646_add_json_fields_for_taggings_to_notices.rb b/db/migrate/20220601165646_add_json_fields_for_taggings_to_notices.rb new file mode 100644 index 000000000..86bcb7bff --- /dev/null +++ b/db/migrate/20220601165646_add_json_fields_for_taggings_to_notices.rb @@ -0,0 +1,7 @@ +class AddJsonFieldsForTaggingsToNotices < ActiveRecord::Migration[6.1] + def change + add_column :notices, :tags_json, :jsonb + add_column :notices, :jurisdictions_json, :jsonb + add_column :notices, :regulations_json, :jsonb + end +end diff --git a/db/schema.rb b/db/schema.rb index b393cf8c8..108d6ac77 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_05_23_184945) do +ActiveRecord::Schema.define(version: 2022_06_01_165646) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -405,6 +405,9 @@ t.text "local_jurisdiction_laws" t.jsonb "works_json", null: false t.integer "case_id_number" + t.jsonb "tags_json" + t.jsonb "jurisdictions_json" + t.jsonb "regulations_json" t.index ["created_at"], name: "index_notices_on_created_at" t.index ["original_notice_id"], name: "index_notices_on_original_notice_id" t.index ["published"], name: "index_notices_on_published" diff --git a/script/copy_taggings_to_notices_as_jsonb.sh b/script/copy_taggings_to_notices_as_jsonb.sh new file mode 100755 index 000000000..a39117304 --- /dev/null +++ b/script/copy_taggings_to_notices_as_jsonb.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +set -e + +beg=0 +inc=1000 +max=20000000 + +while getopts b:i:m: flag +do + case "${flag}" in + b) beg=${OPTARG};; + i) inc=${OPTARG};; + m) max=${OPTARG};; + esac +done + +for i in $(seq $beg $inc $max); do + j=$(expr $i + $inc) + + cat << EOM +PROCESSING $i UNTIL $j ($(expr 100 '*' $i / $max)%) +EOM + + psql -v ON_ERROR_STOP=1 << EOM +SET enable_hashjoin = false; +SET enable_mergejoin = false; +WITH + notice_tags AS ( + SELECT + tg.name AS tag_name, + tgg.context AS context, + tgg.taggable_id + FROM taggings tgg + JOIN tags tg ON tg.id = tgg.tag_id + WHERE tgg.taggable_id >= $i AND tgg.taggable_id < $j + ), + f AS ( + SELECT + notice_tags.taggable_id AS notice_id, + jsonb_agg(tag_name) FILTER (WHERE context = 'tags' AND tag_name IS NOT NULL) AS tags, + jsonb_agg(tag_name) FILTER (WHERE context = 'jurisdictions' AND tag_name IS NOT NULL) AS jurisdictions, + jsonb_agg(tag_name) FILTER (WHERE context = 'regulations' AND tag_name IS NOT NULL) AS regulations + FROM notice_tags + GROUP BY notice_tags.taggable_id + ) + +UPDATE notices n +SET tags_json = COALESCE((SELECT f.tags FROM f WHERE f.notice_id = n.id), '[]'), + jurisdictions_json = COALESCE((SELECT f.jurisdictions FROM f WHERE f.notice_id = n.id), '[]'), + regulations_json = COALESCE((SELECT f.regulations FROM f WHERE f.notice_id = n.id), '[]') +WHERE n.id >= $i AND n.id < $j; + +VACUUM notices; +EOM + +done diff --git a/script/copy_urls_to_notices_as_jsonb b/script/copy_urls_to_notices_as_jsonb.sh similarity index 100% rename from script/copy_urls_to_notices_as_jsonb rename to script/copy_urls_to_notices_as_jsonb.sh