Skip to content

Commit

Permalink
analytics/datasets: make query more efficient (#1387)
Browse files Browse the repository at this point in the history
Closes #1388
  • Loading branch information
alxndrsn authored Feb 8, 2025
1 parent a1db5fb commit 2a9e1bc
Showing 1 changed file with 112 additions and 100 deletions.
212 changes: 112 additions & 100 deletions lib/model/query/analytics.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ const { runSequentially } = require('../../util/promise');
const { metricsTemplate } = require('../../data/analytics');
const oidc = require('../../util/oidc');

const DAY_RANGE = 45;
const _cutoffDate = sql`current_date - cast(${DAY_RANGE} as int)`;
const DAY_RANGE = sql`45`;
const _cutoffDate = sql`current_date - ${DAY_RANGE}`;

// Gets a pointer to the project (repeat) in the metrics report for a specific
// project, creating it first from the project metrics template if necessary
Expand Down Expand Up @@ -420,110 +420,122 @@ group by f."projectId"`);

// Datasets
const getDatasets = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId", COUNT(DISTINCT e.id) num_entities_total,
COUNT(DISTINCT CASE WHEN e."createdAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_recent,
COUNT(DISTINCT CASE WHEN e."updatedAt" IS NOT NULL THEN e.id END) num_entities_updated_total,
COUNT(DISTINCT CASE WHEN e."updatedAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_updated_recent,
COUNT(DISTINCT fd."formId") num_creation_forms,
COUNT(DISTINCT CASE WHEN f."currentDefId" IS NOT NULL THEN fa."formId" END) num_followup_forms,
MAX(COALESCE(errors.total, 0)) num_failed_entities_total,
MAX(COALESCE(errors.recent, 0)) num_failed_entities_recent,
MAX(COALESCE(updates.total, 0)) num_entity_updates_total,
MAX(COALESCE(updates.recent, 0)) num_entity_updates_recent,
MAX(COALESCE(updates.update_sub_total, 0)) num_entity_updates_sub_total,
MAX(COALESCE(updates.update_sub_recent, 0)) num_entity_updates_sub_recent,
MAX(COALESCE(updates.update_api_total, 0)) num_entity_updates_api_total,
MAX(COALESCE(updates.update_api_recent, 0)) num_entity_updates_api_recent,
MAX(COALESCE(conflict_stats.conflicts, 0)) num_entity_conflicts,
MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved,
MAX(COALESCE(creates.create_sub_total, 0)) num_entity_create_sub_total,
MAX(COALESCE(creates.create_sub_recent, 0)) num_entity_create_sub_recent,
MAX(COALESCE(creates.create_api_total, 0)) num_entity_create_api_total,
MAX(COALESCE(creates.create_api_recent, 0)) num_entity_create_api_recent,
MAX(COALESCE(bulk_creates.total, 0)) num_entity_create_bulk_total,
MAX(COALESCE(bulk_creates.recent, 0)) num_entity_create_bulk_recent
FROM datasets ds
LEFT JOIN entities e ON e."datasetId" = ds.id
LEFT JOIN (dataset_form_defs dfd
JOIN form_defs fd ON fd.id = dfd."formDefId"
) ON dfd."datasetId" = ds.id
LEFT JOIN (form_attachments fa
JOIN forms f ON f.id = fa."formId"
) ON fa."datasetId" = ds.id
LEFT JOIN (
SELECT
COUNT (a.details -> 'submissionId'::TEXT) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
dfd."datasetId"
FROM audits a
JOIN submissions s ON CAST((a.details ->> 'submissionId'::TEXT) AS integer) = s.id
JOIN submission_defs sd ON sd."submissionId" = s.id AND sd."current"
JOIN dataset_form_defs dfd ON sd."formDefId" = dfd."formDefId"
WHERE a."action" = 'entity.error'
GROUP BY dfd."datasetId"
) AS errors ON ds.id = errors."datasetId"
LEFT JOIN (
SELECT
COUNT (1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) update_sub_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) update_sub_recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) update_api_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) update_api_recent,
e."datasetId"
FROM audits a
JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id
WHERE a."action" = 'entity.update.version'
GROUP BY e."datasetId"
) as updates ON ds.id = updates."datasetId"
LEFT JOIN (
SELECT
COUNT (1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) create_sub_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_sub_recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) create_api_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_api_recent,
e."datasetId"
FROM audits a
JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id
WHERE a."action" = 'entity.create'
GROUP BY e."datasetId"
) as creates ON ds.id = creates."datasetId"
LEFT JOIN (
SELECT count(1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
e."datasetId"
FROM audits a
JOIN entity_def_sources eds on CAST((a.details ->> 'sourceId'::TEXT) AS integer) = eds."id"
JOIN entity_defs ed on ed."sourceId" = eds.id AND root=true
JOIN entities e on ed."entityId" = e.id
WHERE a."action" = 'entity.bulk.create'
GROUP BY e."datasetId"
) as bulk_creates on ds.id = bulk_creates."datasetId"
LEFT JOIN (
SELECT COUNT (1) conflicts,
SUM (CASE WHEN e."conflict" IS NULL THEN 1 ELSE 0 END) resolved,
e."datasetId"
FROM entities e
WHERE e.id IN
(SELECT DISTINCT "entityId" FROM entity_defs WHERE "conflictingProperties" IS NOT NULL)
GROUP BY e."datasetId"
) AS conflict_stats ON ds.id = conflict_stats."datasetId"
WHERE ds."publishedAt" IS NOT NULL
GROUP BY ds.id, ds."projectId"
SELECT ds.id
, ds."projectId"
, COALESCE(entz.num_entities_total , 0) AS num_entities_total
, COALESCE(entz.num_entities_recent , 0) AS num_entities_recent
, COALESCE(entz.num_entities_updated_total , 0) AS num_entities_updated_total
, COALESCE(entz.num_entities_updated_recent, 0) AS num_entities_updated_recent
, COALESCE(num_creation_forms , 0) AS num_creation_forms
, COALESCE(num_followup_forms , 0) AS num_followup_forms
, COALESCE(errors.total , 0) AS num_failed_entities_total
, COALESCE(errors.recent , 0) AS num_failed_entities_recent
, COALESCE(updates.total , 0) AS num_entity_updates_total
, COALESCE(updates.recent , 0) AS num_entity_updates_recent
, COALESCE(updates.update_sub_total , 0) AS num_entity_updates_sub_total
, COALESCE(updates.update_sub_recent , 0) AS num_entity_updates_sub_recent
, COALESCE(updates.update_api_total , 0) AS num_entity_updates_api_total
, COALESCE(updates.update_api_recent , 0) AS num_entity_updates_api_recent
, COALESCE(conflict_stats.conflicts , 0) AS num_entity_conflicts
, COALESCE(conflict_stats.resolved , 0) AS num_entity_conflicts_resolved
, COALESCE(creates.create_sub_total , 0) AS num_entity_create_sub_total
, COALESCE(creates.create_sub_recent , 0) AS num_entity_create_sub_recent
, COALESCE(creates.create_api_total , 0) AS num_entity_create_api_total
, COALESCE(creates.create_api_recent , 0) AS num_entity_create_api_recent
, COALESCE(bulk_creates.total , 0) AS num_entity_create_bulk_total
, COALESCE(bulk_creates.recent , 0) AS num_entity_create_bulk_recent
FROM datasets AS ds
LEFT JOIN (
SELECT COUNT(*) AS num_entities_total
, COUNT("createdAt" >= ${_cutoffDate} OR NULL) AS num_entities_recent
, COUNT("updatedAt" IS NOT NULL OR NULL) AS num_entities_updated_total
, COUNT("updatedAt" >= ${_cutoffDate} OR NULL) AS num_entities_updated_recent
, "datasetId"
FROM entities
GROUP BY "datasetId"
) AS entz ON entz."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(*) AS conflicts
, COUNT(conflict IS NULL OR NULL) AS resolved
, "datasetId"
FROM entities AS e
WHERE EXISTS (
SELECT 1
FROM entity_defs
WHERE "entityId" = e.id
AND "conflictingProperties" IS NOT NULL
)
GROUP BY "datasetId"
) AS conflict_stats ON conflict_stats."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(fd."formId") AS num_creation_forms
, dfd."datasetId"
FROM dataset_form_defs AS dfd
JOIN form_defs AS fd ON fd.id = dfd."formDefId"
GROUP BY dfd."datasetId"
) AS fd ON fd."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT("formId") AS num_followup_forms
, "datasetId"
FROM form_attachments
JOIN forms AS f ON f.id = "formId"
WHERE f."currentDefId" IS NOT NULL
GROUP BY "datasetId"
) AS fa ON fa."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(a.details->'submissionId') AS total
, COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent
, dfd."datasetId"
FROM audits AS a
JOIN submissions AS s ON s.id = (a.details->'submissionId')::INT
JOIN submission_defs AS sd ON sd."submissionId" = s.id AND sd.current
JOIN dataset_form_defs AS dfd ON dfd."formDefId" = sd."formDefId"
WHERE a.action = 'entity.error'
GROUP BY dfd."datasetId"
) AS errors ON errors."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(*) AS total
, COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent
, COUNT(a.details->'submissionDefId') AS update_sub_total
, COUNT(a.details->'submissionDefId' IS NOT NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS update_sub_recent
, COUNT(a.details->'submissionDefId' IS NULL OR NULL) update_api_total
, COUNT(a.details->'submissionDefId' IS NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS update_api_recent
, e."datasetId"
FROM audits AS a
JOIN entities AS e ON e.id = (a.details->'entityId')::INT
WHERE a.action = 'entity.update.version'
GROUP BY e."datasetId"
) AS updates ON updates."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(a.details->'submissionDefId') AS create_sub_total
, COUNT(a.details->'submissionDefId' IS NOT NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS create_sub_recent
, COUNT(a.details->'submissionDefId' IS NULL OR NULL) AS create_api_total
, COUNT(a.details->'submissionDefId' IS NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS create_api_recent
, e."datasetId"
FROM audits AS a
JOIN entities AS e ON e.id = (a.details->'entityId')::INT
WHERE a.action = 'entity.create'
GROUP BY e."datasetId"
) AS creates ON creates."datasetId" = ds.id
LEFT JOIN (
SELECT COUNT(*) AS total
, COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent
, e."datasetId"
FROM audits AS a
JOIN entity_def_sources AS eds ON eds.id = (a.details->'sourceId')::INT
JOIN entity_defs AS ed ON ed."sourceId" = eds.id AND root=true
JOIN entities AS e ON e.id = ed."entityId"
WHERE a.action = 'entity.bulk.create'
GROUP BY e."datasetId"
) AS bulk_creates ON bulk_creates."datasetId" = ds.id
WHERE ds."publishedAt" IS NOT NULL
`);

const getDatasetEvents = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId",
COUNT (*) num_bulk_create_events_total,
SUM (CASE WHEN audits."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) num_bulk_create_events_recent,
SUM (CASE WHEN audits."loggedAt" >= ${_cutoffDate} THEN 1 ELSE 0 END) num_bulk_create_events_recent,
MAX (CAST(sources."details"->'count' AS integer)) AS biggest_bulk_upload
FROM datasets ds
JOIN audits ON ds."acteeId" = audits."acteeId"
Expand Down

0 comments on commit 2a9e1bc

Please sign in to comment.