Skip to content

Commit

Permalink
Only get types and procedures from unqualified schemas
Browse files Browse the repository at this point in the history
As an auxiliary improvement, use the `regclass` etc. types to refer to
catalog table entries instead of hardcoding oids. Incidentally this
fixes fetching of table and column comments on Cockroach.
  • Loading branch information
plcplc committed Jan 30, 2024
1 parent 21f0802 commit fe97bc9
Show file tree
Hide file tree
Showing 12 changed files with 160 additions and 77 deletions.
17 changes: 14 additions & 3 deletions crates/connectors/ndc-postgres/src/configuration/version2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::configuration::version1;
use crate::configuration::IsolationLevel;

pub use version1::{
default_comparison_operator_mapping, default_excluded_schemas, default_unqualified_schemas,
ConnectionUri, PoolSettings, ResolvedSecret,
default_comparison_operator_mapping, default_excluded_schemas, ConnectionUri, PoolSettings,
ResolvedSecret,
};

const CONFIGURATION_QUERY: &str = include_str!("version2.sql");
Expand Down Expand Up @@ -50,6 +50,17 @@ impl RawConfiguration {
}
}

pub fn default_unqualified_schemas() -> Vec<String> {
vec![
// For the sake of having the user's tables to appear unqualified by default.
"public".to_string(),
// For the sake of having types and procedures appear unqualified by default.
"pg_catalog".to_string(),
"tiger".to_string(),
// "topology".to_string(),
]
}

/// Options which only influence how the configuration server updates the configuration
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
Expand Down Expand Up @@ -82,7 +93,7 @@ impl Default for ConfigureOptions {
fn default() -> ConfigureOptions {
ConfigureOptions {
excluded_schemas: version1::default_excluded_schemas(),
unqualified_schemas: version1::default_unqualified_schemas(),
unqualified_schemas: default_unqualified_schemas(),
comparison_operator_mapping: version1::default_comparison_operator_mapping(),
// we'll change this to `Some(MutationsVersions::V1)` when we
// want to "release" this behaviour
Expand Down
139 changes: 89 additions & 50 deletions crates/connectors/ndc-postgres/src/configuration/version2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@
-- The data model of these tables is quite involved and carries with it decades
-- of legacy. Supporting notes on this are kept in 'introspection-notes.md'.
--
-- TODO: This uses unqualified table (and view) and constraint names.
-- We will need to qualify them at some point. This makes the aliases seem
-- redundant, but they will change in the future.
-- If similar named tables exist in different schemas it is arbitrary
-- which one we pick currently! (c.f. Citus schemas 'columnar' and
-- 'columnar_internal' which both have a 'chunk' table)

-- When debugging in 'psql', uncomment the lines below to be able to run the
-- query with arguments set.

Expand Down Expand Up @@ -50,6 +43,26 @@ WITH
NOT (ns.nspname = ANY ($1))
),

-- These are the schemas of which types and procedures will be
-- exported unqualified.
unqualified_schemas AS
(
SELECT DISTINCT
schema_name,
ns.oid as schema_id
FROM
-- Unless 'pg_catalog' (added as a default during version-2) is
-- considered a schema to use unqualified we won't get access to any
-- built-in types or procedures.
-- Therefore it has been put here to preserve behavior. It should be
-- removed once we cut 'version3.sql'.
UNNEST($2 || '{"pg_catalog"}'::text[]) AS t(schema_name)
INNER JOIN
pg_namespace
AS ns
ON (ns.nspname = schema_name)
),

-- Tables and views etc. are recorded in `pg_class`, see
-- https://www.postgresql.org/docs/current/catalog-pg-class.html for its
-- schema.
Expand Down Expand Up @@ -129,8 +142,9 @@ WITH
-- (remember that, since 'pg_class' records all tables (and other relations)
-- that exist in the database, it also has a record of itself).
--
-- We assume 'classoid' to be stable and will just use literal values rather
-- than actually looking them up in pg_class.
-- Rather than using literal numerical oids in this query we use the special
-- built-in datatype 'regclass' which resolves names to oids automatically.
-- See https://www.postgresql.org/docs/current/datatype-oid.html
column_comments AS
(
SELECT
Expand All @@ -146,7 +160,7 @@ WITH
FROM
pg_description
WHERE
classoid = 1259
classoid = 'pg_catalog.pg_class'::regclass
) AS comm
INNER JOIN
columns
Expand All @@ -161,7 +175,7 @@ WITH
FROM
pg_description
WHERE
classoid = 1259
classoid = 'pg_catalog.pg_class'::regclass
AND objsubid = 0
),

Expand All @@ -178,6 +192,11 @@ WITH
-- typedelim
FROM
pg_catalog.pg_type AS t
INNER JOIN
-- Until the schema is made part of our model of types we only consider
-- those defined in the public schema.
unqualified_schemas
ON (t.typnamespace = unqualified_schemas.schema_id)
WHERE
-- We currently filter out pseudo (polymorphic) types, because our schema
-- can only deal with monomorphic types.
Expand All @@ -198,41 +217,42 @@ WITH
-- 'r' for range
-- 'm' for multi-range
)
AND NOT (
-- Exclude arrays (see 'array_types' below).
t.typelem != 0 -- whether you can subscript into the type
AND typcategory = 'A' -- The parsers considers this type an array for
-- the purpose of selecting preferred implicit casts.
AND NOT
(
-- Exclude arrays (see 'array_types' below).
t.typelem != 0 -- whether you can subscript into the type
AND typcategory = 'A' -- The parsers considers this type an array for
-- the purpose of selecting preferred implicit casts.
)
-- Ignore types that are (primarily) for internal postgres use.
-- This is a good candidate for a configuration option.
AND NOT typname IN
(
'aclitem',
'cid',
'gidx',
'name',
'oid',
'pg_dependencies',
'pg_lsn',
'pg_mcv_list',
'pg_ndistinct',
'pg_node_tree',
'regclass',
'regcollation',
'regconfig',
'regdictionary',
'regnamespace',
'regoper',
'regoperator',
'regproc',
'regprocedure',
'regrole',
'regtype',
'tid',
'xid',
'xid8'
)
-- Ignore types that are (primarily) for internal postgres use.
-- This is a good candidate for a configuration option.
AND NOT typname IN
(
'aclitem',
'cid',
'gidx',
'name',
'oid',
'pg_dependencies',
'pg_lsn',
'pg_mcv_list',
'pg_ndistinct',
'pg_node_tree',
'regclass',
'regcollation',
'regconfig',
'regdictionary',
'regnamespace',
'regoper',
'regoperator',
'regproc',
'regprocedure',
'regrole',
'regtype',
'tid',
'xid',
'xid8'
)
),
array_types AS
(
Expand All @@ -248,6 +268,11 @@ WITH
scalar_types
AS et
ON (et.type_id = t.typelem)
INNER JOIN
-- Until the schema is made part of our model of types we only consider
-- types defined in the public schema.
unqualified_schemas
USING (schema_id)
WHERE
-- See 'scalar_types' above
t.typtype NOT IN
Expand Down Expand Up @@ -346,8 +371,13 @@ WITH
INNER JOIN scalar_types
AS ret_type
ON (ret_type.type_id = proc.prorettype)
INNER JOIN
-- Until the schema is made part of our model of types we only consider
-- types defined in the public schema.
unqualified_schemas
on (unqualified_schemas.schema_id = proc.pronamespace)
WHERE
ret_type.type_name = 'bool'
ret_type.type_id = 'pg_catalog.bool'::regtype
-- We check that we only consider procedures which take two regular
-- arguments.
AND cardinality(proc.proargtypes) = 2
Expand Down Expand Up @@ -412,8 +442,13 @@ WITH
scalar_types
AS t_res
ON (op.oprresult = t_res.type_id)
INNER JOIN
-- Until the schema is made part of our model of operators we only consider
-- those defined in the public schema.
unqualified_schemas
ON (unqualified_schemas.schema_id = op.oprnamespace)
WHERE
t_res.type_name = 'bool'
t_res.type_id = 'pg_catalog.bool'::regtype
ORDER BY op.oprname
),

Expand Down Expand Up @@ -779,7 +814,7 @@ FROM
SELECT
jsonb_object_agg(
CASE
WHEN s.schema_name = ANY ($2)
WHEN unqualified_schemas.schema_id IS NOT NULL
THEN rel.relation_name
ELSE s.schema_name || '_' || rel.relation_name
END,
Expand All @@ -803,14 +838,18 @@ FROM
relations
AS rel

LEFT OUTER JOIN
unqualified_schemas
USING (schema_id)

LEFT OUTER JOIN
table_comments
AS comm
USING (relation_id)

INNER JOIN schemas
AS s
USING (schema_id)
ON (rel.schema_id = s.schema_id)

-- Columns
INNER JOIN
Expand Down Expand Up @@ -1095,7 +1134,7 @@ FROM
--
-- EXECUTE configuration(
-- '{"information_schema", "tiger", "pg_catalog", "topology"}'::varchar[],
-- '{}'::varchar[],
-- '{"public", "pg_catalog", "tiger"}'::varchar[],
-- '[
-- {"operatorName": "=", "exposedName": "_eq"},
-- {"operatorName": "!=", "exposedName": "_neq"},
Expand Down
Loading

0 comments on commit fe97bc9

Please sign in to comment.