From 18c46e50617d15a607a1d14310f8bbcfceb61d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 24 Oct 2024 19:13:57 +0200 Subject: [PATCH] datastore: add facet support in mongodb datastore, #TASK-7151, #TASK-7134 --- .../datastore/mongodb/MongoDBCollection.java | 34 ++--- .../datastore/mongodb/MongoDBQueryUtils.java | 127 +++++++++++++++++- .../mongodb/MongoDBCollectionTest.java | 76 ++++++++++- 3 files changed, 204 insertions(+), 33 deletions(-) diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java index ed76bdcb..c4a42f36 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java @@ -331,31 +331,25 @@ public DataResult aggregate(List operations, ComplexTypeC QueryOptions options) { long start = startQuery(); - DataResult queryResult; - MongoDBIterator iterator = mongoDBNativeQuery.aggregate(operations, converter, options); -// MongoCursor iterator = output.iterator(); List list = new LinkedList<>(); - if (queryResultWriter != null) { - try { - queryResultWriter.open(); + if (operations != null && operations.size() > 0) { + MongoDBIterator iterator = mongoDBNativeQuery.aggregate(operations, converter, options); + if (queryResultWriter != null) { + try { + queryResultWriter.open(); + while (iterator.hasNext()) { + queryResultWriter.write(iterator.next()); + } + queryResultWriter.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } else { while (iterator.hasNext()) { - queryResultWriter.write(iterator.next()); + list.add((T) iterator.next()); } - queryResultWriter.close(); - } catch (IOException e) { - throw new RuntimeException(e.getMessage(), e); - } - } else { -// if (converter != null) { -// while (iterator.hasNext()) { -// list.add(converter.convertToDataModelType(iterator.next())); -// } -// } else { - while (iterator.hasNext()) { - list.add((T) iterator.next()); } -// } } queryResult = endQuery(list, start); return queryResult; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index e33ced35..48b92744 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -18,6 +18,8 @@ import com.mongodb.client.model.*; import org.apache.commons.lang3.StringUtils; +import org.bson.BsonDocument; +import org.bson.BsonInt32; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.commons.datastore.core.Query; @@ -27,13 +29,12 @@ import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; + /** * Created by imedina on 17/01/16. */ @@ -41,12 +42,16 @@ public class MongoDBQueryUtils { @Deprecated private static final String REGEX_SEPARATOR = "(\\w+|\\^)"; -// private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); + // private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~/?|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_NUMERIC_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_BOOLEAN_PATTERN = Pattern.compile("^(!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)"); + private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); + private static final Pattern RANGE_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+):([.0-9]+)\\]:([.0-9]+)"); + public static final String TO_REPLACE_DOTS = "."; + // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated private static final Pattern DEPRECATED_PATTERN = Pattern.compile("^(=?\\^|=?\\$)([^=/<>~!]+[.]*)$"); @@ -80,6 +85,15 @@ public enum ComparisonOperator { BETWEEN } + public enum Accumulator { + count, + avg, + min, + max, + stdDevPop, + stdDevSamp, + bucket + } public static Bson createFilter(String mongoDbField, String queryParam, Query query) { return createFilter(mongoDbField, queryParam, query, QueryParam.Type.TEXT, ComparisonOperator.EQUALS, LogicalOperator.OR); @@ -497,7 +511,7 @@ public static Bson createFilter(String mongoDbField, List queryValues, Co * @return the Bson query. */ protected static Bson createDateFilter(String mongoDbField, List dateValues, ComparisonOperator comparator, - QueryParam.Type type) { + QueryParam.Type type) { Bson filter = null; Object date = null; @@ -641,6 +655,107 @@ public static List createGroupBy(Bson query, List groupByField, St } } + public static List createFacet(Bson query, String facetField) { + if (facetField == null || StringUtils.isEmpty(facetField.trim())) { + return new ArrayList<>(); + } + String cleanFacetField = facetField.replace(" ", ""); + ArrayList facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";"))); + return createFacet(query, facetFields); + } + + private static List createFacet(Bson query, List facetFields) { + Set includeFields = new HashSet<>(); + + List boundaries = new ArrayList<>(); + List facets = new ArrayList<>(); + for (String facetField : facetFields) { + Facet facet = null; + if (facetField.contains(",")) { + Document id = new Document(); + for (String field : facetField.split(",")) { + String cleanField = field.replace(".", TO_REPLACE_DOTS); + id.append(cleanField, "$" + field); + includeFields.add(field); + } + facet = new Facet(facetField.replace(".", TO_REPLACE_DOTS).replace(",", "_"), Arrays.asList(Aggregates.group(id, + Accumulators.sum("count", 1)))); + } else { + Accumulator accumulator; + String field; + Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); + if (matcher.matches()) { + accumulator = Accumulator.valueOf(matcher.group(1)); + field = matcher.group(2); + } else { + matcher = RANGE_PATTERN.matcher(facetField); + if (matcher.matches()) { + accumulator = bucket; + field = matcher.group(1); + double start = Double.parseDouble(matcher.group(2)); + double end = Double.parseDouble(matcher.group(3)); + double step = Double.parseDouble(matcher.group(4)); + for (double i = start; i <= end; i += step) { + boundaries.add(i); + } + } else { + accumulator = count; + field = facetField; + } + } + includeFields.add(field); + + String cleanField = field.replace(".", TO_REPLACE_DOTS); + String id = "$" + field; + switch (accumulator) { + case count: { + facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1)))); + break; + } + case avg: { + facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id)))); + break; + } + case min: { + facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id)))); + break; + } + case max: { + facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id)))); + break; + } + case stdDevPop: { + facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevPop(stdDevPop.name(), id)))); + break; + } + case stdDevSamp: { + facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field, + Accumulators.stdDevSamp("stdDevSamp", id)))); + break; + } + case bucket: { + facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries, + new BucketOptions() + .defaultBucket("Other") + .output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1)))))); + break; + } + default: { + break; + } + } + } + if (facet != null) { + facets.add(facet); + } + } + + Bson match = Aggregates.match(query); + Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields))); + return Arrays.asList(match, project, Aggregates.facet(facets)); + } + public static void parseQueryOptions(List operations, QueryOptions options) { if (options != null) { Bson projection = getProjection(options); diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 377d610c..f2ae97c9 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -55,6 +55,7 @@ public class MongoDBCollectionTest { public ExpectedException thrown = ExpectedException.none(); public static final List NAMES = Arrays.asList("John", "Jack", "Javi"); public static final List SURNAMES = Arrays.asList("Doe", "Davis", null); + public static final List COLORS = Arrays.asList("red", "green", "yellow", "blue"); @BeforeClass public static void beforeClass() throws Exception { @@ -88,16 +89,35 @@ public static class User { public String surname; public int age; public int number; + public House house; + + public static class House { + public String color; + public int numRooms; + public int m2; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("House{"); + sb.append("color='").append(color).append('\''); + sb.append(", numRooms=").append(numRooms); + sb.append(", m2=").append(m2); + sb.append('}'); + return sb.toString(); + } + } @Override public String toString() { - return "User{" - + "id:" + id - + ", name:\"" + name + '"' - + ", surname:\"" + surname + '"' - + ", age:" + age - + ", number:" + number - + '}'; + final StringBuilder sb = new StringBuilder("User{"); + sb.append("id=").append(id); + sb.append(", name='").append(name).append('\''); + sb.append(", surname='").append(surname).append('\''); + sb.append(", age=").append(age); + sb.append(", number=").append(number); + sb.append(", house=").append(house); + sb.append('}'); + return sb.toString(); } } @@ -111,6 +131,11 @@ private static MongoDBCollection createTestCollection(String test, int size) { document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size()))); document.put("age", (int) i % 5); document.put("number", (int) i * i); + Document house = new Document(); + house.put("color", COLORS.get(random.nextInt(COLORS.size()))); + house.put("numRooms", (int) (i % 7) + 1); + house.put("m2", (int) i * 23); + document.put("house", house); mongoDBCollection.nativeQuery().insert(document, null); } return mongoDBCollection; @@ -450,6 +475,43 @@ public void testAggregate() { assertTrue(result.contains(queryResult.getResults().get(0))); } + @Test + public void testFacet() { + DataResult allResults = mongoDBCollection.find(new Document(), null); + System.out.println("allResults.getNumResults() = " + allResults.getNumResults()); + + Document match = new Document("age", new BasicDBObject("$gt", 2)); +// List facets = MongoDBQueryUtils.createFacet(match, ""); + List facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "name,surname"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)"); +// List facets = MongoDBQueryUtils.createFacet(match, "name,house.color"); +// List facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:1000000]:100000"); +// List facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:1000000]:100000"); + System.out.println("facets = " + facets); + DataResult aggregate = mongoDBCollection.aggregate(facets, null); + System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults()); + System.out.println(">>>>>>>>> facet results"); + for (Document result : aggregate.getResults()) { + System.out.println("result = " + result); + } + + int counter = 0; + for (Document result : allResults.getResults()) { + if (result.getInteger("age") > 2) { + counter++; + } + } + System.out.println(">>>>>>>>> all results age > 2: " + counter + " of " + allResults.getNumResults()); + for (Document result : allResults.getResults()) { + if (result.getInteger("age") > 2) { + System.out.println("result = " + result); + } + } + } + + @Test public void testInsert() throws Exception { Long countBefore = mongoDBCollectionInsertTest.count().getNumMatches();