From 7a8e2d305568cbb6964f40c6638e942f96fe684f Mon Sep 17 00:00:00 2001 From: Roshan Kanwar Date: Thu, 23 Nov 2023 18:27:55 +0530 Subject: [PATCH] Fix bugs for translation logic for simple_term and execution logic for FULLTEXT INDEX stmts (#2045) Bug fixes for simple_term support in CONTAINS clause and FULLTEXT INDEX creation/deletion statements: Fix the buffer overflow issue on translation of simple_term. Fix multiple spaces issue between words in simple_term which caused exception. Remove the util function exec_utility_cmd_helper and use a normal wrapper. Issues Resolved JIRA: BABEL-4379, BABEL-4383 Signed-off-by: Roshan Kanwar rskanwar@amazon.com --- contrib/babelfishpg_tsql/src/fts.c | 14 +- contrib/babelfishpg_tsql/src/fts_parser.y | 134 ++++++------------ contrib/babelfishpg_tsql/src/fts_scan.l | 11 +- contrib/babelfishpg_tsql/src/pl_exec-2.c | 28 +++- test/JDBC/expected/fts-contains-vu-verify.out | 70 +++++++++ .../fts-contains-vu-verify.mix | 27 ++++ 6 files changed, 190 insertions(+), 94 deletions(-) diff --git a/contrib/babelfishpg_tsql/src/fts.c b/contrib/babelfishpg_tsql/src/fts.c index 389c40c9e54..9ddf8acdaf3 100644 --- a/contrib/babelfishpg_tsql/src/fts.c +++ b/contrib/babelfishpg_tsql/src/fts.c @@ -10,11 +10,21 @@ PG_FUNCTION_INFO_V1(babelfish_fts_rewrite); Datum babelfish_fts_rewrite(PG_FUNCTION_ARGS) { - text* input_text = PG_GETARG_TEXT_P(0); - char* input_str = text_to_cstring(input_text); + text* input_text; + char* input_str; char* translated_query; text* result_text = NULL; // Initialize result_text to NULL + if (PG_ARGISNULL(0)) + { + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("Incorrect syntax near the keyword 'null'"))); + } + + input_text = PG_GETARG_TEXT_P(0); + input_str = text_to_cstring(input_text); + if (!pltsql_allow_fulltext_parser) { ereport(ERROR, diff --git a/contrib/babelfishpg_tsql/src/fts_parser.y b/contrib/babelfishpg_tsql/src/fts_parser.y index a94d3867182..b0350546991 100644 --- a/contrib/babelfishpg_tsql/src/fts_parser.y +++ b/contrib/babelfishpg_tsql/src/fts_parser.y @@ -1,5 +1,6 @@ %{ #include "postgres.h" +#include "lib/stringinfo.h" #include #include "fts_data.h" @@ -21,8 +22,7 @@ static char *scanbuf; static int scanbuflen; static char* translate_simple_term(const char* s); -static char *trim(char *s); -static char *trimInsideQuotes(char *s); +static char *trim(char *s, bool insideQuotes); %} @@ -114,136 +114,92 @@ simple_term_list: */ static char* translate_simple_term(const char* inputStr) { + int inputLength; + StringInfoData output; + const char* inputPtr; + char* trimmedInputStr; - int inputLength; - int outputSize; - char* output; - const char* inputPtr; - char* outputPtr; - char* trimmedInputStr; - - // Check for empty input - if (inputStr == NULL) { - return NULL; + // Check for empty input - this should not be possible based on lexer rules, but check just in case + if (!inputStr || !(inputLength = strlen(inputStr))) { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Null or empty full-text predicate."))); } - trimmedInputStr = (char*)palloc(strlen(inputStr) + 1); - strcpy(trimmedInputStr, inputStr); + trimmedInputStr = pstrdup(inputStr); // removing trailing and leading spaces - trim(trimmedInputStr); + trim(trimmedInputStr, false); inputLength = strlen(trimmedInputStr); - + // Check if the input is a phrase enclosed in double quotes if (trimmedInputStr[0] == '"' && trimmedInputStr[inputLength - 1] == '"') { - trimInsideQuotes(trimmedInputStr); + trim(trimmedInputStr, true); inputLength = strlen(trimmedInputStr); - outputSize = inputLength - 2; // Exclude both quotes - output = (char*)palloc(outputSize + 1); // +1 for the null terminator - if (output == NULL) { - return NULL; - } + + initStringInfo(&output); // Initialize pointers for input and output inputPtr = trimmedInputStr; - outputPtr = output; while (*inputPtr != '\0') { if (*inputPtr == ' ') { // Replace space with "<->" - *outputPtr++ = '<'; - *outputPtr++ = '-'; - *outputPtr++ = '>'; + while (*(inputPtr + 1) == ' ') { + // Handle multiples spaces between words and skip over additional spaces + inputPtr++; + } + appendStringInfoString(&output, "<->"); } else { // Copy the character - *outputPtr++ = *inputPtr; + appendStringInfoChar(&output, *inputPtr); } inputPtr++; } - *outputPtr = '\0'; - - return output; + pfree(trimmedInputStr); + return output.data; } else { // It's a single word, so no transformation needed - return strdup(trimmedInputStr); + return trimmedInputStr; } } -// Function to remove leading and trailing spaces of a string -static char *trim(char *s) { +/* + * Function to remove leading and trailing spaces of a string + * If flag is true then it removes spaces inside double quotes + */ +static char *trim(char *s, bool insideQuotes) { size_t length; size_t start; size_t end; size_t newLength; + bool inQuotes; - if (s == NULL) { - return NULL; - } - - length = strlen(s); - if (length == 0) { - return s; // Empty string, nothing to trim + /* + * Empty string, nothing to trim + * for the empty input, we're automatically throwing error, + * so if string is NULL or empty, this clause won't pose any issue, it's just a safety check + */ + if (!s || !(length = strlen(s))) { + return s; } + inQuotes = false; start = 0; end = length - 1; - // Trim leading spaces - while (start < length && isspace(s[start])) { - start++; - } - - // Trim trailing spaces - while (end > start && isspace(s[end])) { - end--; - } - - // Calculate the new length - newLength = end - start + 1; - - // Shift the non-space part to the beginning of the string - memmove(s, s + start, newLength); - - // Null-terminate the result - s[newLength] = '\0'; - - return s; -} - -// Function to remove leading and trailing spaces inside double quotes -static char *trimInsideQuotes(char *s) { - size_t length; - size_t start; - size_t end; - size_t i; - size_t newLength; - bool insideQuotes; - - if (s == NULL) { - return NULL; - } - - length = strlen(s); - if (length == 0) { - return s; // Empty string, nothing to trim - } - - insideQuotes = false; - start = 1; - end = length - 2; - - for (i = 0; i < length; i++) { + for (size_t i = 0; i < length; i++) { if (s[i] == '"') { - insideQuotes = !insideQuotes; + inQuotes = !inQuotes; } - if (!insideQuotes) { - // Trim leading spaces inside quotes + if (!inQuotes || insideQuotes) { + // Trim leading spaces while (start < length && isspace(s[start])) { start++; } - // Trim trailing spaces inside quotes + // Trim trailing spaces while (end > start && isspace(s[end])) { end--; } diff --git a/contrib/babelfishpg_tsql/src/fts_scan.l b/contrib/babelfishpg_tsql/src/fts_scan.l index 0ee1a82376b..9322e8c4f55 100644 --- a/contrib/babelfishpg_tsql/src/fts_scan.l +++ b/contrib/babelfishpg_tsql/src/fts_scan.l @@ -34,7 +34,7 @@ static YY_BUFFER_STATE scanbufhandle; \"[^"]+\*\" { if (yytext[strlen(yytext)-2] == ' ') { yyerror(NULL, "Syntax error, space is not allowed before *"); } yylval = yytext; return PREFIX_TERM_TOKEN; } // Handle prefix terms [a-zA-Z0-9]+ { yylval = yytext; return WORD_TOKEN; } // Handle individual words -\"[^"*]+\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases +\"[^"*]*\" { yylval = yytext; return TEXT_TOKEN; } // Handle double-quoted phrases FORMSOF\s*\(\s*(INFLECTIONAL|THESAURUS)\s*,\s*((\w+)|(\"[\w\s]+\"))(\s*,\s*((\w+)|(\"[\w\s]+\")))*\s*\) { yylval = yytext; return GENERATION_TERM_TOKEN; } // Handle FORMSOF generation term @@ -64,6 +64,14 @@ fts_scanner_init(const char *str) if (YY_CURRENT_BUFFER) yy_delete_buffer(YY_CURRENT_BUFFER); + + /* + * Check for empty input + */ + if (!str || !*str || (strspn(str, " \t\n\r") == slen)) { + yyerror(NULL, "Null or empty full-text predicate."); + } + /* * Make a scan buffer with special termination needed by flex. */ @@ -72,7 +80,6 @@ fts_scanner_init(const char *str) memcpy(scanbuf, str, slen); scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); - BEGIN(INITIAL); } diff --git a/contrib/babelfishpg_tsql/src/pl_exec-2.c b/contrib/babelfishpg_tsql/src/pl_exec-2.c index 60f155fa7c9..67e230d86b5 100644 --- a/contrib/babelfishpg_tsql/src/pl_exec-2.c +++ b/contrib/babelfishpg_tsql/src/pl_exec-2.c @@ -3757,6 +3757,9 @@ exec_stmt_fulltextindex(PLtsql_execstate *estate, PLtsql_stmt_fulltextindex *stm Oid datdba; bool login_is_db_owner; bool is_create; + List *res; + Node *res_stmt; + PlannedStmt *wrapper; Assert(stmt->schema_name != NULL); @@ -3834,6 +3837,29 @@ exec_stmt_fulltextindex(PLtsql_execstate *estate, PLtsql_stmt_fulltextindex *stm /* The above query will be * executed using ProcessUtility() */ - exec_utility_cmd_helper(query_str); + res = raw_parser(query_str, RAW_PARSE_DEFAULT); + res_stmt = ((RawStmt *) linitial(res))->stmt; + + /* need to make a wrapper PlannedStmt */ + wrapper = makeNode(PlannedStmt); + wrapper->commandType = CMD_UTILITY; + wrapper->canSetTag = false; + wrapper->utilityStmt = res_stmt; + wrapper->stmt_location = 0; + wrapper->stmt_len = 1; + + /* do this step */ + ProcessUtility(wrapper, + is_create ? "(CREATE FULLTEXT INDEX STATEMENT )" : "(DELETE FULLTEXT INDEX STATEMENT )", + false, + PROCESS_UTILITY_SUBCOMMAND, + NULL, + NULL, + None_Receiver, + NULL); + + /* make sure later steps can see the object created here */ + CommandCounterIncrement(); + return PLTSQL_RC_OK; } diff --git a/test/JDBC/expected/fts-contains-vu-verify.out b/test/JDBC/expected/fts-contains-vu-verify.out index 5d88b8ad0a9..510192f1b42 100644 --- a/test/JDBC/expected/fts-contains-vu-verify.out +++ b/test/JDBC/expected/fts-contains-vu-verify.out @@ -41,6 +41,58 @@ int#!#text ~~END~~ +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 '' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Null or empty full-text predicate.)~~ + + +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 ' ' +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Null or empty full-text predicate.)~~ + + +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 NULL +GO +~~ERROR (Code: 33557097)~~ + +~~ERROR (Message: Incorrect syntax near the keyword 'null')~~ + + +EXEC fts_contains_vu_prepare_p1 ' "" ' +GO +~~START~~ +int#!#text +~~END~~ + + +EXEC fts_contains_vu_prepare_p1 ' ""' +GO +~~START~~ +int#!#text +~~END~~ + + +EXEC fts_contains_vu_prepare_p1 '"" ' +GO +~~START~~ +int#!#text +~~END~~ + + +EXEC fts_contains_vu_prepare_p1 '" "' +GO +~~START~~ +int#!#text +~~END~~ + + EXEC fts_contains_vu_prepare_p1 'other' GO ~~START~~ @@ -167,6 +219,24 @@ int#!#text ~~END~~ +EXEC fts_contains_vu_prepare_p1 '"come back"' +GO +~~START~~ +int#!#text +6#!#Walter Hill , the director of The Warriors , strove to give a comic-book depiction of the gang 's flight from the Bronx back to their Coney Island turf +8#!# After making their way through rival gangs ' turf in Manhattan and then back to Coney Island , the Warriors defeat the gang responsible for Cyrus ' death +158#!# What were the largest or most active homegrown companies back in the late ' 80s -- Portland Rep , New Rose , Storefront Portland Civic Theater -- long ago folded +174#!#" Back then , there was this grittier , shoestring quality that imbued almost every company , " Mulligan says +176#!# But when I look back at that earlier renaissance of the ' 80s , the truth was the talent pool of the city needed to step up +290#!# I ca n't wait to get back in the ring with Michael +325#!#Judge Linnane , who adjourned the hearing briefly until the terms of settlement were lodged in the court file , put the summary judgment application back for mention on March 6 next +362#!#FRENKEL : Saher says that she never thought she would become a refugee , and it scares her to think about what 's happening back in her hometown of Daraa +416#!# At Mission Dolores , CyArk worked hard to get behind the very ornately carved reredos , a false wall in back of the altar that was made in Mexico and shipped to the Mission by boat in 1796 +720#!#The Australopithecus boisei skull was one of several important discoveries Leakey made in her career -- all made despite the fact that she had nearly @ @ @ @ @ @ @ @ @ @ She got thrown out of school very early on and never wanted to go back , and became hugely interested in archaeology , " Richard Leakey , her son , told Archaeology magazine +889#!# In her book Lots of Candles , Plenty of Cake , Anna Quindlen , the 60-year-old novelist and journalist who famously chronicled thirtysomething life back in the 1980s , wrote that she started using Botox for her frown lines in her mid-50s because she did n't want to look cross when she was n't +~~END~~ + + EXEC fts_contains_vu_prepare_p1 '"much of the"' GO ~~START~~ diff --git a/test/JDBC/input/full_text_search/fts-contains-vu-verify.mix b/test/JDBC/input/full_text_search/fts-contains-vu-verify.mix index 1b4c97a090e..800b1dd5a29 100644 --- a/test/JDBC/input/full_text_search/fts-contains-vu-verify.mix +++ b/test/JDBC/input/full_text_search/fts-contains-vu-verify.mix @@ -20,6 +20,30 @@ GO EXEC fts_contains_vu_prepare_p1 'love' GO +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 '' +GO + +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 ' ' +GO + +-- Test for empty string input, should throw error +EXEC fts_contains_vu_prepare_p1 NULL +GO + +EXEC fts_contains_vu_prepare_p1 ' "" ' +GO + +EXEC fts_contains_vu_prepare_p1 ' ""' +GO + +EXEC fts_contains_vu_prepare_p1 '"" ' +GO + +EXEC fts_contains_vu_prepare_p1 '" "' +GO + EXEC fts_contains_vu_prepare_p1 'other' GO @@ -50,6 +74,9 @@ GO EXEC fts_contains_vu_prepare_p1 '"come back"' GO +EXEC fts_contains_vu_prepare_p1 '"come back"' +GO + EXEC fts_contains_vu_prepare_p1 '"much of the"' GO