From 9c45b35546f5a0298ec1d68a4e75112f596d1cf2 Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Mon, 28 Oct 2024 12:03:14 +0100 Subject: [PATCH] [Thesaurus] Ability to cut-off the amount of alternative queries --- .../Config/ThesaurusConfig.php | 10 +++++ .../Plugin/QueryRewrite.php | 40 +++++++++++++++++-- .../Test/Unit/Plugin/QueryRewriteTest.php | 30 +++++++++++++- .../etc/adminhtml/elasticsuite_relevance.xml | 10 ++++- .../etc/elasticsuite_relevance.xml | 1 + .../i18n/de_DE.csv | 5 +++ .../i18n/en_US.csv | 5 +++ .../i18n/fr_FR.csv | 5 +++ .../i18n/nl_NL.csv | 5 +++ 9 files changed, 103 insertions(+), 8 deletions(-) diff --git a/src/module-elasticsuite-thesaurus/Config/ThesaurusConfig.php b/src/module-elasticsuite-thesaurus/Config/ThesaurusConfig.php index e09269e31..f89ab7377 100644 --- a/src/module-elasticsuite-thesaurus/Config/ThesaurusConfig.php +++ b/src/module-elasticsuite-thesaurus/Config/ThesaurusConfig.php @@ -62,6 +62,16 @@ public function getMaxRewrites() return (int) $this->general['max_rewrites']; } + /** + * Max allowed alternative queries generated by the synonym engine. + * + * @return int + */ + public function getMaxRewrittenQueries() + { + return (int) $this->general['max_rewritten_queries']; + } + /** * Is the synonyms search enabled ? * diff --git a/src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php b/src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php index 1ec26a3a9..7971c4d77 100644 --- a/src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php +++ b/src/module-elasticsuite-thesaurus/Plugin/QueryRewrite.php @@ -17,6 +17,8 @@ use Smile\ElasticsuiteCore\Search\Request\Query\Fulltext\QueryBuilder; use Smile\ElasticsuiteCore\Api\Search\Request\ContainerConfigurationInterface; use Smile\ElasticsuiteCore\Search\Request\Query\QueryFactory; +use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfig; +use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfigFactory; use Smile\ElasticsuiteThesaurus\Model\Index; use Smile\ElasticsuiteCore\Api\Search\SpellcheckerInterface; use Smile\ElasticsuiteCore\Search\Request\QueryInterface; @@ -35,6 +37,11 @@ class QueryRewrite */ private $queryFactory; + /** + * @var ThesaurusConfigFactory + */ + private $thesaurusConfigFactory; + /** * @var Index */ @@ -48,12 +55,17 @@ class QueryRewrite /** * Constructor. * - * @param QueryFactory $queryFactory Search request query factory. - * @param Index $index Synonym index. + * @param QueryFactory $queryFactory Search request query factory. + * @param ThesaurusConfigFactory $thesaurusConfigFactory Thesaurus configuration factory. + * @param Index $index Synonym index. */ - public function __construct(QueryFactory $queryFactory, Index $index) - { + public function __construct( + QueryFactory $queryFactory, + ThesaurusConfigFactory $thesaurusConfigFactory, + Index $index + ) { $this->queryFactory = $queryFactory; + $this->thesaurusConfigFactory = $thesaurusConfigFactory; $this->index = $index; } @@ -141,6 +153,26 @@ private function getWeightedRewrites($queryText, $containerConfig, $originalBoos $rewrites = $rewrites + $this->index->getQueryRewrites($containerConfig, $currentQueryText, $originalBoost); } + $maxRewrittenQueries = $this->getThesaurusConfig($containerConfig)->getMaxRewrittenQueries(); + if ($maxRewrittenQueries > 0) { + $rewrites = array_slice($rewrites, 0, $maxRewrittenQueries, true); + } + return $rewrites; } + + /** + * Return thesaurus/relevance configuration. + * + * @param ContainerConfigurationInterface $containerConfig Container configuration. + * + * @return ThesaurusConfig + */ + private function getThesaurusConfig(ContainerConfigurationInterface $containerConfig) + { + $storeId = $containerConfig->getStoreId(); + $containerName = $containerConfig->getName(); + + return $this->thesaurusConfigFactory->create($storeId, $containerName); + } } diff --git a/src/module-elasticsuite-thesaurus/Test/Unit/Plugin/QueryRewriteTest.php b/src/module-elasticsuite-thesaurus/Test/Unit/Plugin/QueryRewriteTest.php index ddd1225ff..bc139cb4c 100644 --- a/src/module-elasticsuite-thesaurus/Test/Unit/Plugin/QueryRewriteTest.php +++ b/src/module-elasticsuite-thesaurus/Test/Unit/Plugin/QueryRewriteTest.php @@ -28,6 +28,8 @@ use Smile\ElasticsuiteCore\Search\Request\Query\Builder; use Smile\ElasticsuiteCore\Search\Request\Query\QueryFactory; use Smile\ElasticsuiteCore\Search\Request\QueryInterface; +use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfig; +use Smile\ElasticsuiteThesaurus\Config\ThesaurusConfigFactory; use Smile\ElasticsuiteThesaurus\Model\Index as ThesaurusIndex; use Smile\ElasticsuiteThesaurus\Plugin\QueryRewrite; use Smile\ElasticsuiteThesaurus\Test\Unit\FulltextQueryBuilderInterceptor; @@ -89,11 +91,13 @@ public function testMultipleSearchQueryDepthBuilder() $containerConfig = $this->getContainerConfigMock($this->fields); $spellingType = SpellcheckerInterface::SPELLING_TYPE_EXACT; + $thesaurusConfigFactory = $this->getThesaurusConfigFactoryMock(); + $thesaurusIndex = $this->getMockBuilder(ThesaurusIndex::class) ->disableOriginalConstructor() ->getMock(); - $queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusIndex); + $queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex); $queryBuilderInterceptor = $this->getQueryBuilderWithPlugin($queryFactory, $queryRewritePlugin); /* @@ -130,11 +134,13 @@ public function testMultipleSearchQueryDepthBuilderWithRewrites() $containerConfig = $this->getContainerConfigMock($this->fields); $spellingType = SpellcheckerInterface::SPELLING_TYPE_EXACT; + $thesaurusConfigFactory = $this->getThesaurusConfigFactoryMock(); + $thesaurusIndex = $this->getMockBuilder(ThesaurusIndex::class) ->disableOriginalConstructor() ->getMock(); - $queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusIndex); + $queryRewritePlugin = new QueryRewrite($queryFactory, $thesaurusConfigFactory, $thesaurusIndex); $queryBuilderInterceptor = $this->getQueryBuilderWithPlugin($queryFactory, $queryRewritePlugin); $thesaurusIndex->expects($this->exactly(2))->method('getQueryRewrites')->withConsecutive( @@ -219,6 +225,26 @@ private function getQueryFactory($queryTypes) return new QueryFactory($factories); } + /** + * Mock the thesaurus config factory. + * + * @return \PHPUnit\Framework\MockObject\MockObject + */ + private function getThesaurusConfigFactoryMock() + { + $thesaurusConfig = $this->getMockBuilder(ThesaurusConfig::class) + ->disableOriginalConstructor() + ->getMock(); + $thesaurusConfig->method('getMaxRewrittenQueries')->will($this->returnValue(0)); + + $thesaurusConfigFactory = $this->getMockBuilder(ThesaurusConfigFactory::class) + ->disableOriginalConstructor() + ->getMock(); + $thesaurusConfigFactory->method('create')->will($this->returnValue($thesaurusConfig)); + + return $thesaurusConfigFactory; + } + /** * Mock the configuration used by the query builder. * diff --git a/src/module-elasticsuite-thesaurus/etc/adminhtml/elasticsuite_relevance.xml b/src/module-elasticsuite-thesaurus/etc/adminhtml/elasticsuite_relevance.xml index ba0b34a8f..156a632a1 100644 --- a/src/module-elasticsuite-thesaurus/etc/adminhtml/elasticsuite_relevance.xml +++ b/src/module-elasticsuite-thesaurus/etc/adminhtml/elasticsuite_relevance.xml @@ -23,8 +23,14 @@ - + + + + + + + integer validate-number validate-zero-or-greater @@ -41,7 +47,7 @@ - + diff --git a/src/module-elasticsuite-thesaurus/etc/elasticsuite_relevance.xml b/src/module-elasticsuite-thesaurus/etc/elasticsuite_relevance.xml index 48e7257f3..c256ece23 100644 --- a/src/module-elasticsuite-thesaurus/etc/elasticsuite_relevance.xml +++ b/src/module-elasticsuite-thesaurus/etc/elasticsuite_relevance.xml @@ -18,6 +18,7 @@ 2 + 0 1 diff --git a/src/module-elasticsuite-thesaurus/i18n/de_DE.csv b/src/module-elasticsuite-thesaurus/i18n/de_DE.csv index 84df0317b..c756ee8a1 100644 --- a/src/module-elasticsuite-thesaurus/i18n/de_DE.csv +++ b/src/module-elasticsuite-thesaurus/i18n/de_DE.csv @@ -30,6 +30,11 @@ "Total of %1 record(s) were deleted.","%1 Eintrag / Einträge wurden gelöscht." "You saved the thesaurus %1.","Der Thesaurus %1 wurde gespeichert." "Thesaurus Configuration","Thesaurus Einstellungen" +"General Configuration","General Configuration" +"Max Allowed Rewrites","Max Allowed Rewrites" +"Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms.","Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms." +"Max Alternative Search Queries","Max Alternative Search Queries" +"Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation).","Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation)." "Synonyms Configuration","Synonyme Einstellungen" "Enable Synonyms Search","Aktiviere Suche nach Synonymen" "Synonyms Weight Divider","Verteilung der Gewichtung von Synonymen" diff --git a/src/module-elasticsuite-thesaurus/i18n/en_US.csv b/src/module-elasticsuite-thesaurus/i18n/en_US.csv index 57eefe7b3..af4bc84f6 100644 --- a/src/module-elasticsuite-thesaurus/i18n/en_US.csv +++ b/src/module-elasticsuite-thesaurus/i18n/en_US.csv @@ -30,6 +30,11 @@ Synonyms,Synonyms "Total of %1 record(s) were deleted.","Total of %1 record(s) were deleted." "You saved the thesaurus %1.","You saved the thesaurus %1." "Thesaurus Configuration","Thesaurus Configuration" +"General Configuration","General Configuration" +"Max Allowed Rewrites","Max Allowed Rewrites" +"Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms.","Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms." +"Max Alternative Search Queries","Max Alternative Search Queries" +"Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation).","Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation)." "Synonyms Configuration","Synonyms Configuration" "Enable Synonyms Search","Enable Synonyms Search" "Synonyms Weight Divider","Synonyms Weight Divider" diff --git a/src/module-elasticsuite-thesaurus/i18n/fr_FR.csv b/src/module-elasticsuite-thesaurus/i18n/fr_FR.csv index d133accbf..13ef87326 100644 --- a/src/module-elasticsuite-thesaurus/i18n/fr_FR.csv +++ b/src/module-elasticsuite-thesaurus/i18n/fr_FR.csv @@ -30,6 +30,11 @@ Synonyms,Synonymes "Total of %1 record(s) were deleted.","%1 enregistrement(s) ont été supprimé(s)." "You saved the thesaurus %1.","Thésaurus %1 sauvegardé." "Thesaurus Configuration","Configuration du thésaurus" +"General Configuration","Configuration Générale" +"Max Allowed Rewrites","Nombre maximum de remplacements" +"Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms.","Nombre maximal de règles de thésaurus appliquées consécutivement à une requête de recherche pour générer des requêtes alternatives. Ce nombre s'applique d'abord aux règles de synonymes puis aux règles d'expansion. Par exemple si la valeur du paramètre est 2, chaque requête alternative sera le résultat de l'application d'au plus 2 règles de synonymes et de 2 règles d'expansion. Mais si vous avez 10 règles de synonymes et 5 règles d'expansions, elles pourraient au final être toutes appliquées par paires. Soyez donc prudent sur l'augmentation de ce paramètre, particulièrement si vous avez déjà beaucoup de règles avec de grandes listes de termes alternatifs." +"Max Alternative Search Queries","Nombre maximum de recherches alternatives" +"Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation).","Nombre de recherches alternatives réellement prises en compte. Utilisez ce paramètre si vous rencontrez des problèmes de performance de votre cluster liés à un énorme volume de règles de thésaurus. Valeur par défaut: 0 (pas de limitation)." "Synonyms Configuration","Configuration des synonymes" "Enable Synonyms Search","Activer la recherche par synonyme" "Synonyms Weight Divider","Pondérateur de poids pour les synonymes" diff --git a/src/module-elasticsuite-thesaurus/i18n/nl_NL.csv b/src/module-elasticsuite-thesaurus/i18n/nl_NL.csv index 5fcbfd270..8922d5cee 100644 --- a/src/module-elasticsuite-thesaurus/i18n/nl_NL.csv +++ b/src/module-elasticsuite-thesaurus/i18n/nl_NL.csv @@ -30,6 +30,11 @@ "Total of %1 record(s) were deleted.","Totaal van %1 record(s) werden verwijderd." "You saved the thesaurus %1.","Je hebt de thesaurus %1 gered." "Thesaurus Configuration","Thesaurus configuratie" +"General Configuration","General Configuration" +"Max Allowed Rewrites","Max Allowed Rewrites" +"Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms.","Maximum number of thesaurus rules applied at a given time to a given search query to produce alternative queries. That number applies first to the synonyms rules and then the expansion rules. For instance if the setting's value is 2, it means each alternative query will be the result of the application of at most 2 synonyms rules and at most 2 expansion rules. But if you have 10 synonym rules and 5 expansion rules, they could all end up being applied by pairs. So be careful about augmenting this setting's value, especially if you already have a lot of rules with long lists of alternative terms." +"Max Alternative Search Queries","Max Alternative Search Queries" +"Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation).","Maximum number of alternative search queries taken into account. Use this setting if you have performance issues arising on your cluster related to a huge volume of thesaurus rules. Defaults to 0 (no limitation)." "Synonyms Configuration","Configuratie synoniemen" "Enable Synonyms Search","Synoniemen zoeken inschakelen" "Synonyms Weight Divider","Synoniemen Gewicht Verdeler"