Skip to content

Commit

Permalink
Untested SQL examples draft
Browse files Browse the repository at this point in the history
  • Loading branch information
pdobacz committed Nov 30, 2022
1 parent 69095da commit 1c57920
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 26 deletions.
6 changes: 1 addition & 5 deletions src/main/metabase/api.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ClientRequestConstructorOptions, net } from 'electron';
import { isPostgresIdentifier } from '../../shared';
import { postgresQuote } from '../../shared';
import { InitialQueryPayloads } from '../../types';
import { metabaseConfig, postgresConfig } from '../config';
import { getAppLanguage } from '../language';
Expand Down Expand Up @@ -29,10 +29,6 @@ function findAnonymizedAccessDbId(databases: Database[]) {
}
}

function postgresQuote(name: string) {
return isPostgresIdentifier(name) ? name : `"${name}"`;
}

const sqlHint = `
-- HINTS
-- Change, add, or remove columns as desired.
Expand Down
138 changes: 117 additions & 21 deletions src/main/metabase/examples.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { Table } from './types';
import { postgresQuote } from '../../shared';
import { Field, Table } from './types';

type Display = 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD.

/** An example query card. */
type ExampleQuery = {
name: string; // Title of card.
sql: string; // SQL query.
sizeX: number; // Grid of 18 units wide.
sizeY: number; // Height of card in units.
display: 'table' | 'bar' | 'row' | 'scalar' | 'map'; // Other types TBD.
display: Display;
visualizationSettings: Record<string, unknown>; // To be typed later.

// There's also row/col properties, but we'll make some rectangle
Expand All @@ -19,27 +22,122 @@ type ExamplesSection = {
queries: ExampleQuery[]; // Cards in section.
};

type ExampleInfo = {
sql: string;
name: string;
};

function lines(...lines: string[]) {
return lines.join('\n');
}

export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] {
const { fields, display_name } = table; // TODO: iterate and inspect fields
const numberFieldTypes = ['int2', 'int4', 'int8', 'float4', 'float8', 'numeric'];

function rawGroupBySQL(column: string, table: string, displayName: string): ExampleInfo {
return {
name: `${displayName} by ${column}`,
sql: lines(`SELECT ${postgresQuote(column)}`, `FROM ${postgresQuote(table)}`, `GROUP BY ${postgresQuote(column)}`),
};
}

let name = table.name;
// if (requiresQuoting(name)) {
// name = `"${name}"`
// }
function countDistinctSQL(column: string, table: string): ExampleInfo {
return {
name: `Distinct ${column}`,
sql: lines(
`SELECT count(distinct ${postgresQuote(column)}) as ${postgresQuote('distinct_' + column)}`,
`FROM ${postgresQuote(table)}`,
),
};
}

function avgSQL(column: string, table: string): ExampleInfo {
return {
name: `Average ${column}`,
sql: lines(
`SELECT avg(${postgresQuote(column)}) as ${postgresQuote('avg_' + column)}`,
`FROM ${postgresQuote(table)}`,
),
};
}

function textGeneralizedSQL(column: string, table: string, displayName: string, averageLength: number): ExampleInfo {
const nChars = Math.ceil(averageLength / 4);
const stars = '*'.repeat(Math.ceil(averageLength - nChars));
const bucket = `substring(${postgresQuote(column)}, 1, ${nChars})`;

return {
name: `${displayName} by ${column}`,
sql: lines(`SELECT ${bucket} || ${stars}, count(*)`, `FROM ${postgresQuote(table)}`, `GROUP BY ${bucket}`),
};
}

function yearlyGeneralizedSQL(column: string, table: string, displayName: string): ExampleInfo {
const bucket = `extract(year from ${postgresQuote(column)})`;

return {
name: `${displayName} by ${column} year`,
sql: lines(
`SELECT ${bucket} as ${postgresQuote(column + '_year')}, count(*)`,
`FROM ${postgresQuote(table)}`,
`GROUP BY ${bucket}`,
),
};
}

function makeExampleInfos(field: Field, table: Table, aidColumns: string[]): ExampleInfo[] {
try {
if (field.semantic_type === 'type/PK' || field.database_type === 'serial') {
// No sensible example for columns being just row IDs.
return [];
} else if (aidColumns.includes(field.name)) {
// Never SELECT/GROUP BY AIDs directly, also no point in generalizing.
return [countDistinctSQL(field.name, table.name)];
} else if (field.database_type === 'text' && field.fingerprint) {
if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) {
// Few distinct values - can GROUP BY directly.
return [rawGroupBySQL(field.name, table.name, table.display_name)];
} else {
const averageLength = field.fingerprint.type?.['type/Text']?.['average-length'];

// The `< 20`: we want to generalize surnames and categories but not sentences, paragraphs or addresses.
if (averageLength && averageLength < 20) {
return [textGeneralizedSQL(field.name, table.name, table.display_name, averageLength)];
} else {
return [countDistinctSQL(field.name, table.name)];
}
}
} else if (numberFieldTypes.includes(field.database_type) && field.fingerprint) {
if (field.fingerprint.global['distinct-count'] && field.fingerprint.global['distinct-count'] < 10) {
// Few distinct values - can GROUP BY directly.
return [rawGroupBySQL(field.name, table.name, table.display_name)];
} else {
// TODO: Construct stable generalization. Temporarily revert to the average.
return [avgSQL(field.name, table.name)];
}
} else if (field.database_type === 'timestamp') {
// TODO: using timestamps fingerprint is possible, but we need to pull in some datetime lib.
return [yearlyGeneralizedSQL(field.name, table.name, table.display_name)];
} else {
// Fallback to the count distinct for anything else.
return [countDistinctSQL(field.name, table.name)];
}
} catch (err) {
console.warn(`Unable to make example query for ${table.name}, ${field.name}`, err);
return [];
}
}

export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSection[] {
const exampleInfos = table.fields.flatMap((field) => makeExampleInfos(field, table, aidColumns));
// const t = getT('example-queries'); // Let's worry about i18n later...

return [
{
title: 'Overview',
queries: [
{
name: `Count of ${display_name}`,
sql: lines('SELECT count(*)', `FROM ${name}`),
name: `Count of ${table.display_name}`,
sql: lines('SELECT count(*)', `FROM ${table.name}`),
sizeX: 6, // 6 is a good default (3 cards per row).
sizeY: 4, // 4 is a good default.
display: 'scalar',
Expand All @@ -48,19 +146,17 @@ export function exampleQueries(table: Table, aidColumns: string[]): ExamplesSect
],
},
{
// GROUP BY examples
title: `Distribution of ${display_name}`,
queries: [
{
name: `${display_name} by <column>`,
sql: lines('SELECT <column>, count(*)', `FROM ${name}`, 'GROUP BY <column>'),
title: `Overview of ${table.display_name} columns`,
queries: exampleInfos.map(({ name, sql }) => {
return {
name: name,
sql: sql,
sizeX: 6,
sizeY: 4, // For a table we might need something taller.
display: 'table', // For now we show results only as 'table'.
sizeY: 4, // TODO: For a table we might need something taller.
display: 'table' as Display, // For now we show results only as 'table'.
visualizationSettings: {},
},
],
};
}),
},
// ...
];
}
4 changes: 4 additions & 0 deletions src/shared/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ const tableNameRE = /^[a-z_][a-z0-9$_]*$/;
export function isPostgresIdentifier(name: string): boolean {
return !postgresReservedKeywords.includes(name) && tableNameRE.test(name);
}

export function postgresQuote(name: string): string {
return isPostgresIdentifier(name) ? name : `"${name}"`;
}

0 comments on commit 1c57920

Please sign in to comment.