From 69095dad5ec3809d00e4eff8af8e9a8f324e043e Mon Sep 17 00:00:00 2001 From: Edon Gashi Date: Tue, 29 Nov 2022 11:12:47 +0100 Subject: [PATCH 1/2] Interface and types for example queries (cherry picked from commit b7b87bc23d5a1c94b286a97eace70d553582aa63) --- src/main/metabase/examples.ts | 66 +++++++++++++++++++++++ src/main/metabase/types.ts | 98 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 src/main/metabase/examples.ts create mode 100644 src/main/metabase/types.ts diff --git a/src/main/metabase/examples.ts b/src/main/metabase/examples.ts new file mode 100644 index 0000000..90658b5 --- /dev/null +++ b/src/main/metabase/examples.ts @@ -0,0 +1,66 @@ +import { Table } from './types'; + +/** An example query card. */ +type ExampleQuery = { + name: string; // Title of card. + sql: string; // SQL query. + sizeX: number; // Grid of 18 units wide. + sizeY: number; // Height of card in units. + display: 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD. + visualizationSettings: Record; // To be typed later. + + // There's also row/col properties, but we'll make some rectangle + // packing algorithm to arrange cards automatically in a section. +}; + +/** A section for a group of examples. */ +type ExamplesSection = { + title: string | null; // Markdown text as the section heading. + queries: ExampleQuery[]; // Cards in section. +}; + +function lines(...lines: string[]) { + return lines.join('\n'); +} + +export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] { + const { fields, display_name } = table; // TODO: iterate and inspect fields + + let name = table.name; + // if (requiresQuoting(name)) { + // name = `"${name}"` + // } + + // const t = getT('example-queries'); // Let's worry about i18n later... + + return [ + { + title: 'Overview', + queries: [ + { + name: `Count of ${display_name}`, + sql: lines('SELECT count(*)', `FROM ${name}`), + sizeX: 6, // 6 is a good default (3 cards per row). + sizeY: 4, // 4 is a good default. + display: 'scalar', + visualizationSettings: {}, // No visualizations for now. To be done after we finish SQL. + }, + ], + }, + { + // GROUP BY examples + title: `Distribution of ${display_name}`, + queries: [ + { + name: `${display_name} by `, + sql: lines('SELECT , count(*)', `FROM ${name}`, 'GROUP BY '), + sizeX: 6, + sizeY: 4, // For a table we might need something taller. + display: 'table', // For now we show results only as 'table'. + visualizationSettings: {}, + }, + ], + }, + // ... + ]; +} diff --git a/src/main/metabase/types.ts b/src/main/metabase/types.ts new file mode 100644 index 0000000..de0f258 --- /dev/null +++ b/src/main/metabase/types.ts @@ -0,0 +1,98 @@ +/* + * API response types extracted from sample responses and metabase source. + * Fields that we don't care about are commented out or marked as unknown. + */ + +type ISO8601Time = string; + +export type Table = { + description: string; + entity_type: string; + schema: string; + // db: unknown; + show_in_getting_started: boolean; + name: string; + fields: Field[]; + // caveats: unknown; + // segments: unknown[]; + dimension_options: Record; + updated_at: ISO8601Time; + active: boolean; + id: number; + db_id: number; + // visibility_type: unknown; + // field_order: unknown; + initial_sync_status: string; + display_name: string; + // metrics: unknown[]; + created_at: ISO8601Time; + // points_of_interest: unknown; +}; + +export type TextFieldFingerprint = { + 'percent-json': number; + 'percent-url': number; + 'percent-email': number; + 'percent-state': number; + 'average-length': number; +}; + +export type NumberFieldFingerprint = { + avg: number; + max: number; + min: number; + q1: number; + q3: number; + sd: number; +}; + +export type DateTimeFieldFingerprint = { + earliest: ISO8601Time; + latest: ISO8601Time; +}; + +export interface FieldFingerprint { + global: { + 'distinct-count'?: number; + 'nil%': number; + }; + type?: { + 'type/Text'?: TextFieldFingerprint; + 'type/Number'?: NumberFieldFingerprint; + 'type/DateTime'?: DateTimeFieldFingerprint; + }; +} + +export type Field = { + description: string | null; + database_type: string; // See https://github.com/metabase/metabase/blob/master/src/metabase/driver/postgres.clj#L504-L566 + semantic_type: string | null; // See https://github.com/metabase/metabase/blob/master/shared/src/metabase/types.cljc + // coercion_strategy: unknown; + name: string; + fingerprint_version: number; + // has_field_values: string; + // settings: unknown; + // caveats: unknown; + // fk_target_field_id: unknown; + // dimensions: unknown[]; + dimension_options: string[]; + updated_at: ISO8601Time; + // custom_position: number; + effective_type: string; + active: boolean; + // nfc_path: unknown; + // parent_id: unknown; + id: number; + last_analyzed: ISO8601Time; + position: number; + visibility_type: 'details-only' | 'hidden' | 'normal' | 'retired'; + // default_dimension_option: unknown; + // target: unknown; + preview_display: boolean; + display_name: string; + database_position: number; + fingerprint: FieldFingerprint; + created_at: ISO8601Time; + base_type: string; + // points_of_interest: unknown; +}; From 1c5792093fc81dcfc2c5d720e5cc0900d49c1f4d Mon Sep 17 00:00:00 2001 From: pdobacz <5735525+pdobacz@users.noreply.github.com> Date: Tue, 29 Nov 2022 18:02:05 +0100 Subject: [PATCH 2/2] Untested SQL examples draft --- src/main/metabase/api.ts | 6 +- src/main/metabase/examples.ts | 138 ++++++++++++++++++++++++++++------ src/shared/utils.ts | 4 + 3 files changed, 122 insertions(+), 26 deletions(-) diff --git a/src/main/metabase/api.ts b/src/main/metabase/api.ts index 7aff9c1..da1e5ff 100644 --- a/src/main/metabase/api.ts +++ b/src/main/metabase/api.ts @@ -1,5 +1,5 @@ import { ClientRequestConstructorOptions, net } from 'electron'; -import { isPostgresIdentifier } from '../../shared'; +import { postgresQuote } from '../../shared'; import { InitialQueryPayloads } from '../../types'; import { metabaseConfig, postgresConfig } from '../config'; import { getAppLanguage } from '../language'; @@ -29,10 +29,6 @@ function findAnonymizedAccessDbId(databases: Database[]) { } } -function postgresQuote(name: string) { - return isPostgresIdentifier(name) ? name : `"${name}"`; -} - const sqlHint = ` -- HINTS -- Change, add, or remove columns as desired. diff --git a/src/main/metabase/examples.ts b/src/main/metabase/examples.ts index 90658b5..eca5a91 100644 --- a/src/main/metabase/examples.ts +++ b/src/main/metabase/examples.ts @@ -1,4 +1,7 @@ -import { Table } from './types'; +import { postgresQuote } from '../../shared'; +import { Field, Table } from './types'; + +type Display = 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD. /** An example query card. */ type ExampleQuery = { @@ -6,7 +9,7 @@ type ExampleQuery = { sql: string; // SQL query. sizeX: number; // Grid of 18 units wide. sizeY: number; // Height of card in units. - display: 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD. + display: Display; visualizationSettings: Record; // To be typed later. // There's also row/col properties, but we'll make some rectangle @@ -19,18 +22,113 @@ type ExamplesSection = { queries: ExampleQuery[]; // Cards in section. }; +type ExampleInfo = { + sql: string; + name: string; +}; + function lines(...lines: string[]) { return lines.join('\n'); } -export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] { - const { fields, display_name } = table; // TODO: iterate and inspect fields +const numberFieldTypes = ['int2', 'int4', 'int8', 'float4', 'float8', 'numeric']; + +function rawGroupBySQL(column: string, table: string, displayName: string): ExampleInfo { + return { + name: `${displayName} by ${column}`, + sql: lines(`SELECT ${postgresQuote(column)}`, `FROM ${postgresQuote(table)}`, `GROUP BY ${postgresQuote(column)}`), + }; +} - let name = table.name; - // if (requiresQuoting(name)) { - // name = `"${name}"` - // } +function countDistinctSQL(column: string, table: string): ExampleInfo { + return { + name: `Distinct ${column}`, + sql: lines( + `SELECT count(distinct ${postgresQuote(column)}) as ${postgresQuote('distinct_' + column)}`, + `FROM ${postgresQuote(table)}`, + ), + }; +} + +function avgSQL(column: string, table: string): ExampleInfo { + return { + name: `Average ${column}`, + sql: lines( + `SELECT avg(${postgresQuote(column)}) as ${postgresQuote('avg_' + column)}`, + `FROM ${postgresQuote(table)}`, + ), + }; +} + +function textGeneralizedSQL(column: string, table: string, displayName: string, averageLength: number): ExampleInfo { + const nChars = Math.ceil(averageLength / 4); + const stars = '*'.repeat(Math.ceil(averageLength - nChars)); + const bucket = `substring(${postgresQuote(column)}, 1, ${nChars})`; + + return { + name: `${displayName} by ${column}`, + sql: lines(`SELECT ${bucket} || ${stars}, count(*)`, `FROM ${postgresQuote(table)}`, `GROUP BY ${bucket}`), + }; +} +function yearlyGeneralizedSQL(column: string, table: string, displayName: string): ExampleInfo { + const bucket = `extract(year from ${postgresQuote(column)})`; + + return { + name: `${displayName} by ${column} year`, + sql: lines( + `SELECT ${bucket} as ${postgresQuote(column + '_year')}, count(*)`, + `FROM ${postgresQuote(table)}`, + `GROUP BY ${bucket}`, + ), + }; +} + +function makeExampleInfos(field: Field, table: Table, aidColumns: string[]): ExampleInfo[] { + try { + if (field.semantic_type === 'type/PK' || field.database_type === 'serial') { + // No sensible example for columns being just row IDs. + return []; + } else if (aidColumns.includes(field.name)) { + // Never SELECT/GROUP BY AIDs directly, also no point in generalizing. + return [countDistinctSQL(field.name, table.name)]; + } else if (field.database_type === 'text' && field.fingerprint) { + if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) { + // Few distinct values - can GROUP BY directly. + return [rawGroupBySQL(field.name, table.name, table.display_name)]; + } else { + const averageLength = field.fingerprint.type?.['type/Text']?.['average-length']; + + // The `< 20`: we want to generalize surnames and categories but not sentences, paragraphs or addresses. + if (averageLength && averageLength < 20) { + return [textGeneralizedSQL(field.name, table.name, table.display_name, averageLength)]; + } else { + return [countDistinctSQL(field.name, table.name)]; + } + } + } else if (numberFieldTypes.includes(field.database_type) && field.fingerprint) { + if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) { + // Few distinct values - can GROUP BY directly. + return [rawGroupBySQL(field.name, table.name, table.display_name)]; + } else { + // TODO: Construct stable generalization. Temporarily revert to the average. + return [avgSQL(field.name, table.name)]; + } + } else if (field.database_type === 'timestamp') { + // TODO: using timestamps fingerprint is possible, but we need to pull in some datetime lib. + return [yearlyGeneralizedSQL(field.name, table.name, table.display_name)]; + } else { + // Fallback to the count distinct for anything else. + return [countDistinctSQL(field.name, table.name)]; + } + } catch (err) { + console.warn(`Unable to make example query for ${table.name}, ${field.name}`, err); + return []; + } +} + +export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] { + const exampleInfos = table.fields.flatMap((field) => makeExampleInfos(field, table, aidColumns)); // const t = getT('example-queries'); // Let's worry about i18n later... return [ @@ -38,8 +136,8 @@ export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSect title: 'Overview', queries: [ { - name: `Count of ${display_name}`, - sql: lines('SELECT count(*)', `FROM ${name}`), + name: `Count of ${table.display_name}`, + sql: lines('SELECT count(*)', `FROM ${table.name}`), sizeX: 6, // 6 is a good default (3 cards per row). sizeY: 4, // 4 is a good default. display: 'scalar', @@ -48,19 +146,17 @@ export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSect ], }, { - // GROUP BY examples - title: `Distribution of ${display_name}`, - queries: [ - { - name: `${display_name} by `, - sql: lines('SELECT , count(*)', `FROM ${name}`, 'GROUP BY '), + title: `Overview of ${table.display_name} columns`, + queries: exampleInfos.map(({ name, sql }) => { + return { + name: name, + sql: sql, sizeX: 6, - sizeY: 4, // For a table we might need something taller. - display: 'table', // For now we show results only as 'table'. + sizeY: 4, // TODO: For a table we might need something taller. + display: 'table' as Display, // For now we show results only as 'table'. visualizationSettings: {}, - }, - ], + }; + }), }, - // ... ]; } diff --git a/src/shared/utils.ts b/src/shared/utils.ts index c64fac8..088fdb0 100644 --- a/src/shared/utils.ts +++ b/src/shared/utils.ts @@ -71,3 +71,7 @@ const tableNameRE = /^[a-z_][a-z0-9$_]*$/; export function isPostgresIdentifier(name: string): boolean { return !postgresReservedKeywords.includes(name) && tableNameRE.test(name); } + +export function postgresQuote(name: string): string { + return isPostgresIdentifier(name) ? name : `"${name}"`; +}