Merge pull request #38 from certego/develop

0.5.0
certego · Nov 16, 2022 · 76da1cb · 76da1cb
2 parents 25ffd13 + 0414e34
commit 76da1cb
Show file tree

Hide file tree

Showing 3 changed files with 309 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -20,14 +20,7 @@ Unfortunately, our knowledge is limited, so here we go. If you find a solution t
 The main idea, is that the `filter` should work like an `aggregation`. 
 For doing so, and with keeping the compatibility on how MongoEngine works (i.e. the filter should return a queryset of `Document`) we had to do some work.  
 Calling `.aggregate` instead has to work as MongoEngine expect, meaning a list of dictionaries. 
-#### Features
 
-##### Validation
-We also decided to have, optionally, a validation of the index.
-Two things are checked:
-- The index actually exists (If you query a non-existing index, Atlas as default behaviour will not raise any error).
-- The fields that you are querying are actually indexed(If you query a field that is not indexed, Atlas as default behaviour will not raise any error, and will return an empty list).
-To make these check, you need to call the function `ensure_index` on the queryset:
 
 
 ## Usage
@@ -66,4 +59,53 @@ obj3_from_atlas = MyDocument.atlas.get(AtlasQ(wrong_field="value")) # raises Atl
 
 
 
-```
+```
+
+##  Extended Features
+
+### Validation
+We also decided to have, optionally, a validation of the index.
+Two things are checked:
+- The index actually exists (If you query a non-existing index, Atlas as default behaviour will not raise any error).
+- The fields that you are querying are actually indexed(If you query a field that is not indexed, Atlas as default behaviour will not raise any error, and will return an empty list).
+To make these check, you need to call the function `ensure_index` on the queryset:
+
+### EmbeddedDocuments
+Embedded documents are queried in two different ways, depending on how you created your Search Index.
+Remember to ensure the index so that AtlasQ can know how your index is defined
+If you used the [embeddedDocuments](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings/#std-label-bson-data-types-embedded-documents) type, AtlasQ will use the [embeddedDocument](https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/) query syntax.
+Otherwise, if you used the [document](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings/#document) type, or you did not ensure the index, a normal `text` search with the `.` syntax will be used.
+
+Given a Collection as:
+```python3
+from mongoengine import Document, EmbeddedDocument, EmbeddedDocumentListField, fields
+
+class MyDocument(Document):
+    class MyEmbeddedDocument(EmbeddedDocument):
+        field1 = fields.StringField(required=True)
+        field2 = fields.StringField(required=True)
+
+    list = EmbeddedDocumentListField(MyEmbeddedDocument)    
+
+```
+and given the following document in the collection
+```python3
+
+MyDocument(list=[MyEmbeddedDocument(field1="aaa", field2="bbb"), MyEmbeddedDocument(field1="ccc", field2="ddd")])
+MyDocument(list=[MyEmbeddedDocument(field1="aaa", field2="ddd"), MyEmbeddedDocument(field1="ccc", field2="bbb")])
+```
+the following query will retrieve both the documents, instead of only the first
+```python3
+assert MyDocument.objects.filter(list__field1="aaa", list__field2="bbb").count() == 2
+
+```
+This is done because each clause will check that `one` document match it, not the these condition must be on the same object.
+
+To solve this, inside AtlasQ, if you write multiple condition that refer to the same EmbeddedObject in a *single* AtlasQ
+object, all the condition must match a single object; if the conditions are in multiple AtlasQ object, the default behaviour will be used
+
+```python3
+assert MyDocument.atlas.filter(list__field1="aaa", list__field2="bbb").count() == 1
+assert MyDocument.atlas.filter(AtlasQ(list__field1="aaa")& AtlasQ(list__field2="bbb")).count() == 2
+```
+
diff --git a/atlasq/queryset/transform.py b/atlasq/queryset/transform.py
@@ -67,16 +67,17 @@ def __init__(self, atlas_query, atlas_index: AtlasIndex):
     def _regex(self, path: str, value: str):
         return {"regex": {"query": value, "path": path}}
 
-    def _embedded_document(self, path: str, content: Dict):
+    def _embedded_document(self, path: str, content: Dict, positive: bool):
+        operator = "must" if positive else "mustNot"
         return {
             "embeddedDocument": {
                 "path": path,
-                "operator": content,
+                "operator": {"compound": {operator: [content]}},
             }
         }
 
     def _convert_to_embedded_document(
-        self, path: List[str], operator: Dict, start: str = ""
+        self, path: List[str], operator: Dict, positive: bool, start: str = ""
     ):
         element = path.pop(0)
         partial_path = f"{start}.{element}" if start else element
@@ -90,9 +91,17 @@ def _convert_to_embedded_document(
 
         if not path:
             return operator
+
+        new_operator = self._convert_to_embedded_document(
+            path, operator, start=partial_path, positive=positive
+        )
         return self._embedded_document(
             partial_path,
-            self._convert_to_embedded_document(path, operator, start=partial_path),
+            new_operator,
+            True
+            if operator != new_operator
+            else positive,  # this cover the case of multiple embeddedDocument,
+            # where only the last one must be set to negative
         )
 
     def _exists(self, path: str) -> Dict:
@@ -229,21 +238,53 @@ def transform(self) -> Tuple[List[Dict], List[Dict], List[Dict]]:
                     obj = self._text(path, value)
 
             if obj:
-                # we are wrapping the result to an embedded document
-                obj = self._convert_to_embedded_document(path.split("."), obj)
-
                 if self.atlas_index.ensured:
-
                     self._ensure_path_is_indexed(path.split("."))
-                logger.debug(obj)
-
-                if to_go == 1:
-                    affirmative.append(obj)
+                # we are wrapping the result to an embedded document
+                converted = self._convert_to_embedded_document(
+                    path.split("."), obj, positive=to_go == 1
+                )
+                if obj != converted:
+                    # we have an embedded object
+                    # the mustNot is done inside the embedded document clause
+                    affirmative = self.merge_embedded_documents(converted, affirmative)
                 else:
-                    negative.append(obj)
+                    if to_go == 1:
+                        affirmative.append(converted)
+                    else:
+                        negative.append(converted)
         if other_aggregations:
             logger.warning(
                 "CARE! You are generating a query that uses other aggregations other than text search!"
                 f" Aggregations generated are {other_aggregations}"
             )
         return affirmative, negative, other_aggregations
+
+    @staticmethod
+    def merge_embedded_documents(obj: Dict, list_of_obj: List[Dict]) -> List[Dict]:
+        list_of_obj = list(list_of_obj)  # I hate function that change stuff in place
+        assert "embeddedDocument" in obj
+        assert "path" in obj["embeddedDocument"]
+        assert "operator" in obj["embeddedDocument"]
+        assert "compound" in obj["embeddedDocument"]["operator"]
+        # path that we want merge
+        path = obj["embeddedDocument"]["path"]
+        keys = list(obj["embeddedDocument"]["operator"]["compound"].keys())
+        assert len(keys) == 1
+        operator = keys[0]  # values could be (must, mustNot)
+        # the actual query
+        content = obj["embeddedDocument"]["operator"]["compound"][operator]
+        for already_present_obj in list_of_obj:
+            # we check for a correspondence
+            if path == already_present_obj["embeddedDocument"]["path"]:
+                # we merge the objects
+                already_present_obj["embeddedDocument"]["operator"][
+                    "compound"
+                ].setdefault(operator, []).extend(content)
+                # we can exit since we are sure that it can be only 1 hit for path
+                # if this method is called at every embedded object
+                break
+        # otherwise we just add the object if no hit has been found
+        else:
+            list_of_obj.append(obj)
+        return list_of_obj