Skip to content

Commit

Permalink
Support column_list in CONTAINS clause
Browse files Browse the repository at this point in the history
Previously in Babelfish, the feature of fulltext search for simple terms
in CONTAINS clause was not supported for multiple columns. The TSQL
syntax of Fulltext search for simple terms in CONTAINS clause over
multiple columns was unidentifiable over TDS endpoint of Babelfish and
would throw syntax error. To fix this, we have created a new grammar to
support the TSQL multiple column Fulltext search syntax for simple terms in
CONTAINS clause over TDS endpoint of Bablefish and we have also worked
over the rewriting logic of the Fulltext search CONTAINS clause query to
accept columns as a list and then by creating their Column Reference nodes
we will concatenate them in a multiple column syntax supported by PostgreSQL.

TASK: BABEL-4239
Signed-off-by: Jaspal Singh [email protected]
  • Loading branch information
Jaspal Singh committed Jan 30, 2025
1 parent 3d2b8f4 commit 51d511d
Show file tree
Hide file tree
Showing 15 changed files with 2,101 additions and 83 deletions.
82 changes: 67 additions & 15 deletions contrib/babelfishpg_tsql/runtime/functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2777,21 +2777,73 @@ type_name(PG_FUNCTION_ARGS)
Datum
replace_special_chars_fts(PG_FUNCTION_ARGS)
{
text *input_text = PG_GETARG_TEXT_P(0);
char *input_str = text_to_cstring(input_text);
char *output_str;
text *result_text;

/* Modify the input_str in place */
output_str = replace_special_chars_fts_impl(input_str);

/* Convert the modified input_str back to text */
result_text = cstring_to_text(output_str);

/* Free the memory allocated for input_str */
pfree(input_str);
pfree(output_str);
PG_RETURN_TEXT_P(result_text);
/* Get the list argument as an array */
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
Datum *elements;
bool *nulls;
int nelems;
text *result_text;
char *output_str;
char *s;
StringInfoData buf;

/* initialize a StringInfoData with an empty string */
initStringInfo(&buf);

/* Deconstruct the array to extract elements and nulls from the array*/
deconstruct_array(array, TEXTOID, -1, false, 'i', &elements, &nulls, &nelems);

for(int i = 0; i < nelems; i++)
{

text *input_text;
char *input_str;
char *col_str;

/* if at the current index has a null then move to next index */
if(nulls[i])
{
continue;
}

input_text = DatumGetTextP(elements[i]);
input_str = text_to_cstring(input_text);


/* Modify the input_str in place */
if(input_str != NULL)
{
col_str = replace_special_chars_fts_impl(input_str);
}

if (col_str != NULL)
{
/* Append new columns to the output_str */
appendStringInfoString(&buf, col_str);

/* append the concates if this is not the last column to be added */
if(i<nelems-1){
appendStringInfo(&buf, "||' '||");
}
pfree(col_str);
}
}
output_str = (&buf)->data;

/* Convert the modified input_str back to text with null safety */
if(output_str != NULL)
{
/* create a copy of the output string */
s = pstrdup(output_str);
result_text = cstring_to_text(s);
PG_RETURN_TEXT_P(result_text);
}

/* Null safety check */
else
{
PG_RETURN_NULL();
}
}

Datum
Expand Down
3 changes: 2 additions & 1 deletion contrib/babelfishpg_tsql/sql/sys_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,8 @@ LANGUAGE C STABLE STRICT;

-- This function performs replacing special characters to their corresponding unique hashes
-- in the search condition or the full text search CONTAINS predicate
CREATE OR REPLACE FUNCTION sys.replace_special_chars_fts(IN phrase text) RETURNS TEXT AS
-- to handle dynamic number of columns as argument we use VARIADIC that expects list of type text
CREATE OR REPLACE FUNCTION sys.replace_special_chars_fts(VARIADIC texts text[]) RETURNS TEXT AS
'babelfishpg_tsql', 'replace_special_chars_fts'
LANGUAGE C IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION sys.replace_special_chars_fts TO PUBLIC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ CREATE OR REPLACE FUNCTION sys.hashbytes(IN alg sys.VARCHAR, IN data sys.bbf_var
AS 'babelfishpg_tsql', 'hashbytes' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
GRANT EXECUTE ON FUNCTION sys.hashbytes(IN alg sys.VARCHAR, IN sys.bbf_varbinary) TO PUBLIC;

-- This function performs replacing special characters to their corresponding unique hashes
-- in the search condition or the full text search CONTAINS predicate
CREATE OR REPLACE FUNCTION sys.replace_special_chars_fts(VARIADIC texts text[]) RETURNS TEXT AS
'babelfishpg_tsql', 'replace_special_chars_fts'
LANGUAGE C IMMUTABLE STRICT;
GRANT EXECUTE ON FUNCTION sys.replace_special_chars_fts TO PUBLIC;

CREATE OR REPLACE FUNCTION sys.babelfish_conv_to_varchar(IN typename TEXT,
IN arg anyelement,
IN p_style NUMERIC DEFAULT -1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

%type <node> openjson_expr
%type <node> openjson_col_def
%type <list> var_name_list opt_var_name_list
%type <list> openjson_col_defs
%type <str> optional_path
%type <boolean> optional_asJson
Expand Down
145 changes: 81 additions & 64 deletions contrib/babelfishpg_tsql/src/backend_parser/gram-tsql-epilogue.y.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#define list_make_ptr_cell(v) ((ListCell) {.ptr_value = (v)})

void
pgtsql_parser_init(base_yy_extra_type *yyext)
{
Expand Down Expand Up @@ -782,7 +784,7 @@ is_json_query(List *name)
* where pgconfig = babelfish_fts_contains_pgconfig('<contains_search_condition>')
*/
static Node *
TsqlExpressionContains(char *colId, Node *search_expr, core_yyscan_t yyscanner)
TsqlExpressionContains(List *colId, Node *search_expr, core_yyscan_t yyscanner)
{
A_Expr *fts;
Node *to_tsvector_call, *to_tsquery_call;
Expand All @@ -802,73 +804,88 @@ TsqlExpressionContains(char *colId, Node *search_expr, core_yyscan_t yyscanner)

/* Transform column_name into to_tsvector(pgconfig, replace_special_chars_fts(column_name)) */
static Node *
makeToTSVectorFuncCall(char *colId, core_yyscan_t yyscanner, Node *pgconfig)
makeToTSVectorFuncCall(List *colId, core_yyscan_t yyscanner, Node *pgconfig)
{
Node *col;
List *args;
Node *replaceSpecialCharsFunc;
List *replaceSpecialCharsArgs;
char *schemaName = NULL;
char *tableName = NULL;
char *columnName = NULL;
char *dot1, *dot2;

/* Find the first and second dots in the column identifier */
dot1 = strchr(colId, '.');
dot2 = (dot1) ? strchr(dot1 + 1, '.') : NULL;

if (dot1)
{
if (dot2)
{
/* If two dots are found, then the input is in the format "schema_name.table_name.column_name"
* Parse the schema name, table name, and column name accordingly
*/
*dot1 = '\0';
*dot2 = '\0';
schemaName = colId;
tableName = dot1 + 1;
columnName = dot2 + 1;
}
else
{
/* If only one dot is found, then the input is in the format "table_name.column_name" or "alias_table.column_name"
* Parse the table name and column name accordingly
*/
*dot1 = '\0';
tableName = colId;
columnName = dot1 + 1;
}
}
else
{
/* If no dots are found, then the input is just the column name
* Set the column name directly
*/
columnName = colId;
}

if (schemaName)
{
/* If a schema name is present, create column reference to the schema first then append the table name and column name */
col = (Node *) makeColumnRef(schemaName, NIL, -1, yyscanner);
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(tableName));
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(columnName));
}
else if (tableName)
{
/* If a table name is present, create column reference to the table first then append the column name */
col = (Node *) makeColumnRef(tableName, NIL, -1, yyscanner);
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(columnName));
}
else
{
/* Create a ColumnRef node for the column */
col = (Node *) makeColumnRef(columnName, NIL, -1, yyscanner);
}

/* Create a function call for replace_special_chars_fts(column_name) */
replaceSpecialCharsArgs = list_make1(col);

/* length of list of columns passed as colId */
int len = colId->length;

/* inititalize the list of columns as null as we are using lappend function to append column(s)
* so its necessary to have a null list to start with
*/
replaceSpecialCharsArgs = NIL;

for(int i = 0; i < len; i++)
{
Node *col;
char *colName = (colId->elements[i]).ptr_value;
char *schemaName = NULL;
char *tableName = NULL;
char *columnName = NULL;
char *dot1, *dot2;

/* Find the first and second dots in the column identifier */
dot1 = strchr(colName, '.');
dot2 = (dot1) ? strchr(dot1 + 1, '.') : NULL;

if (dot1)
{
if (dot2)
{
/* If two dots are found, then the input is in the format "schema_name.table_name.column_name"
* Parse the schema name, table name, and column name accordingly
*/
*dot1 = '\0';
*dot2 = '\0';
schemaName = colName;
tableName = dot1 + 1;
columnName = dot2 + 1;
}
else
{
/* If only one dot is found, then the input is in the format "table_name.column_name" or "alias_table.column_name"
* Parse the table name and column name accordingly
*/
*dot1 = '\0';
tableName = colName;
columnName = dot1 + 1;
}
}
else
{
/* If no dots are found, then the input is just the column name
* Set the column name directly
*/
columnName = colName;
}

if (schemaName)
{
/* If a schema name is present, create column reference to the schema first then append the table name and column name */
col = (Node *) makeColumnRef(schemaName, NIL, -1, yyscanner);
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(tableName));
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(columnName));
}
else if (tableName)
{
/* If a table name is present, create column reference to the table first then append the column name */
col = (Node *) makeColumnRef(tableName, NIL, -1, yyscanner);
((ColumnRef *) col)->fields = lappend(((ColumnRef *) col)->fields, makeString(columnName));
}
else
{
/* Create a ColumnRef node for the column */
col = (Node *) makeColumnRef(columnName, NIL, -1, yyscanner);
}

/* append the created ColumnRef node in the replaceSpecialCharsArgs list */
replaceSpecialCharsArgs = lappend(replaceSpecialCharsArgs, col);
}

/* Create a function call for replace_special_chars_fts(column_list) */
replaceSpecialCharsFunc = (Node *) makeFuncCall(TsqlSystemFuncName("replace_special_chars_fts"), replaceSpecialCharsArgs, COERCE_EXPLICIT_CALL, -1);

/* Create the final function call to_tsvector(pgconfig, replace_special_chars_fts(column_name)) */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ static bool isNVarCharType(char *typenameStr);
static Node *TsqlJsonModifyMakeFuncCall(Node *expr, Node *path, Node *newValue);
static bool is_json_query(List *name);

static Node *TsqlExpressionContains(char *colId, Node *search_expr, core_yyscan_t yyscanner);
static Node *makeToTSVectorFuncCall(char *colId, core_yyscan_t yyscanner, Node *pgconfig);
static Node *TsqlExpressionContains(List *colId, Node *search_expr, core_yyscan_t yyscanner);
static Node *makeToTSVectorFuncCall(List *colId, core_yyscan_t yyscanner, Node *pgconfig);
static Node *makeToTSQueryFuncCall(Node *search_expr, Node *pgconfig);

char *construct_unique_index_name(char *index_name, char *relation_name);
Expand Down
24 changes: 23 additions & 1 deletion contrib/babelfishpg_tsql/src/backend_parser/gram-tsql-rule.y
Original file line number Diff line number Diff line change
Expand Up @@ -2082,7 +2082,7 @@ func_expr_common_subexpr:
parser_errposition(@1)));
}
}
| TSQL_CONTAINS '(' var_name ',' tsql_contains_search_condition ')'
| TSQL_CONTAINS '(' opt_var_name_list ',' tsql_contains_search_condition ')'
{
$$ = TsqlExpressionContains($3, $5, yyscanner);
}
Expand All @@ -2109,6 +2109,28 @@ func_expr_common_subexpr:
}
;

opt_var_name_list:
var_name
{
$$ = list_make1($1);
}
| '(' var_name_list ')'
{
$$ = $2;
}
;

var_name_list:
var_name
{
$$ = list_make1($1);
}
| var_name_list ',' var_name
{
$$ = lappend($1, $3);
}
;

tsql_contains_search_condition:
a_expr
{
Expand Down
36 changes: 36 additions & 0 deletions test/JDBC/expected/fts-contains-multicol-vu-cleanup.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
-- tsql user=jdbc_user password=12345678
-- enable FULLTEXT
SELECT set_config('babelfishpg_tsql.escape_hatch_fulltext', 'ignore', 'false')
GO
~~START~~
text
ignore
~~END~~


-- Remove fulltext index
DROP FULLTEXT INDEX ON test_tb
GO

DROP FULLTEXT INDEX ON fts_schema.test_tb1
GO

-- Remove the table
DROP TABLE IF EXISTS test_tb
GO

DROP TABLE IF EXISTS fts_schema.test_tb1
GO

-- Remove schema
DROP SCHEMA IF EXISTS fts_schema
GO

-- disable FULLTEXT
SELECT set_config('babelfishpg_tsql.escape_hatch_fulltext', 'strict', 'false')
GO
~~START~~
text
strict
~~END~~

Loading

0 comments on commit 51d511d

Please sign in to comment.