CREATE TABLE pushes (
id SERIAL PRIMARY KEY,
event_id INTEGER NOT NULL
REFERENCES events(event_id),
payload JSONB NOT NULL);
INSERT INTO pushes (event_id, payload)
SELECT event_id, payload FROM events WHERE type = 'PushEvent';
SELECT * FROM key_count('pushes', 'payload');
SELECT explode_json_column('pushes', 'payload');
SELECT set_concrete_type('pushes', 'before', 'TEXT');
SELECT set_concrete_type('pushes', 'distinct_size', 'INTEGER');
SELECT set_concrete_type('pushes', 'head', 'TEXT');
SELECT set_concrete_type('pushes', 'push_id', 'BIGINT');
SELECT set_concrete_type('pushes', 'ref', 'TEXT');
SELECT set_concrete_type('pushes', 'size', 'INTEGER');
SELECT * FROM type_count('pushes', 'commits');
SELECT commits->0 FROM pushes WHERE id = 1;
SELECT jsonb_array_elements(commits) FROM pushes WHERE id = 1;
CREATE TABLE push_commits (
id SERIAL PRIMARY KEY,
push_id INTEGER NOT NULL
REFERENCES PUSHES(id),
commit JSONB NOT NULL
);
INSERT INTO push_commits (push_id, commit)
SELECT id, jsonb_array_elements(commits) FROM pushes;
CREATE TABLE commits (
id SERIAL PRIMARY KEY,
payload JSONB NOT NULL
);
INSERT INTO commits (payload)
SELECT DISTINCT commit FROM push_commits;
ALTER TABLE push_commits
ADD COLUMN commit_id INTEGER
REFERENCES commits (id);
CREATE UNIQUE INDEX ON commits (payload);
CREATE UNIQUE INDEX ON commits (MD5(payload::TEXT));
CREATE INDEX ON push_commits (MD5(commit::TEXT));
Note: we don't need a unique index on push_commits
because the same commit
can be in multiple pushes.
UPDATE push_commits SET commit_id =
(SELECT id FROM commits WHERE MD5(commit::TEXT) = MD5(payload::TEXT));
ALTER TABLE push_commits
ALTER COLUMN commit_id SET NOT NULL,
DROP COLUMN commit,
DROP COLUMN id,
ADD PRIMARY KEY (push_id, commit_id);
SELECT * FROM type_count('commits', 'payload');
SELECT * FROM key_count('commits', 'payload');
SELECT explode_json_column('commits', 'payload');
SELECT set_concrete_type('commits', 'distinct', 'BOOLEAN');
SELECT set_concrete_type('commits', 'message', 'TEXT');
SELECT set_concrete_type('commits', 'sha', 'TEXT');
SELECT set_concrete_type('commits', 'url', 'TEXT');
We could extract the authors table here.
SELECT * FROM type_count('commits', 'author');
SELECT * FROM key_count('commits', 'author');
But we won't, as it doesn't cover anything new.
ALTER TABLE commits DROP COLUMN payload;
ALTER TABLE pushes DROP COLUMN payload,
DROP COLUMN commits;
UPDATE events SET payload = NULL WHERE type = 'PushEvent';