Skip to content

Commit

Permalink
datastore: add facet support in mongodb datastore, #TASK-7151, #TASK-…
Browse files Browse the repository at this point in the history
…7134
  • Loading branch information
jtarraga committed Oct 24, 2024
1 parent 2077871 commit 18c46e5
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -331,31 +331,25 @@ public <T> DataResult<T> aggregate(List<? extends Bson> operations, ComplexTypeC
QueryOptions options) {

long start = startQuery();

DataResult<T> queryResult;
MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
// MongoCursor<Document> iterator = output.iterator();
List<T> list = new LinkedList<>();
if (queryResultWriter != null) {
try {
queryResultWriter.open();
if (operations != null && operations.size() > 0) {
MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
if (queryResultWriter != null) {
try {
queryResultWriter.open();
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
list.add((T) iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
// if (converter != null) {
// while (iterator.hasNext()) {
// list.add(converter.convertToDataModelType(iterator.next()));
// }
// } else {
while (iterator.hasNext()) {
list.add((T) iterator.next());
}
// }
}
queryResult = endQuery(list, start);
return queryResult;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import com.mongodb.client.model.*;
import org.apache.commons.lang3.StringUtils;
import org.bson.BsonDocument;
import org.bson.BsonInt32;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.opencb.commons.datastore.core.Query;
Expand All @@ -27,26 +29,29 @@
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*;

/**
* Created by imedina on 17/01/16.
*/
public class MongoDBQueryUtils {

@Deprecated
private static final String REGEX_SEPARATOR = "(\\w+|\\^)";
// private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$");
// private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$");
private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~/?|==?)([^=<>~!]+.*)$");
private static final Pattern OPERATION_NUMERIC_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|==?)([^=<>~!]+.*)$");
private static final Pattern OPERATION_BOOLEAN_PATTERN = Pattern.compile("^(!=|!?=?~|==?)([^=<>~!]+.*)$");
private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)");

private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)");
private static final Pattern RANGE_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+):([.0-9]+)\\]:([.0-9]+)");
public static final String TO_REPLACE_DOTS = "&#46;";

// TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'.
@Deprecated
private static final Pattern DEPRECATED_PATTERN = Pattern.compile("^(=?\\^|=?\\$)([^=/<>~!]+[.]*)$");
Expand Down Expand Up @@ -80,6 +85,15 @@ public enum ComparisonOperator {
BETWEEN
}

public enum Accumulator {
count,
avg,
min,
max,
stdDevPop,
stdDevSamp,
bucket
}

public static Bson createFilter(String mongoDbField, String queryParam, Query query) {
return createFilter(mongoDbField, queryParam, query, QueryParam.Type.TEXT, ComparisonOperator.EQUALS, LogicalOperator.OR);
Expand Down Expand Up @@ -497,7 +511,7 @@ public static <T> Bson createFilter(String mongoDbField, List<T> queryValues, Co
* @return the Bson query.
*/
protected static Bson createDateFilter(String mongoDbField, List<String> dateValues, ComparisonOperator comparator,
QueryParam.Type type) {
QueryParam.Type type) {
Bson filter = null;

Object date = null;
Expand Down Expand Up @@ -641,6 +655,107 @@ public static List<Bson> createGroupBy(Bson query, List<String> groupByField, St
}
}

public static List<Bson> createFacet(Bson query, String facetField) {
if (facetField == null || StringUtils.isEmpty(facetField.trim())) {
return new ArrayList<>();
}
String cleanFacetField = facetField.replace(" ", "");
ArrayList<String> facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";")));
return createFacet(query, facetFields);
}

private static List<Bson> createFacet(Bson query, List<String> facetFields) {
Set<String> includeFields = new HashSet<>();

List<Double> boundaries = new ArrayList<>();
List<Facet> facets = new ArrayList<>();
for (String facetField : facetFields) {
Facet facet = null;
if (facetField.contains(",")) {
Document id = new Document();
for (String field : facetField.split(",")) {
String cleanField = field.replace(".", TO_REPLACE_DOTS);
id.append(cleanField, "$" + field);
includeFields.add(field);
}
facet = new Facet(facetField.replace(".", TO_REPLACE_DOTS).replace(",", "_"), Arrays.asList(Aggregates.group(id,
Accumulators.sum("count", 1))));
} else {
Accumulator accumulator;
String field;
Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField);
if (matcher.matches()) {
accumulator = Accumulator.valueOf(matcher.group(1));
field = matcher.group(2);
} else {
matcher = RANGE_PATTERN.matcher(facetField);
if (matcher.matches()) {
accumulator = bucket;
field = matcher.group(1);
double start = Double.parseDouble(matcher.group(2));
double end = Double.parseDouble(matcher.group(3));
double step = Double.parseDouble(matcher.group(4));
for (double i = start; i <= end; i += step) {
boundaries.add(i);
}
} else {
accumulator = count;
field = facetField;
}
}
includeFields.add(field);

String cleanField = field.replace(".", TO_REPLACE_DOTS);
String id = "$" + field;
switch (accumulator) {
case count: {
facet = new Facet(cleanField + "Counts", Arrays.asList(Aggregates.group(id, Accumulators.sum(count.name(), 1))));
break;
}
case avg: {
facet = new Facet(cleanField + "Avg", Arrays.asList(Aggregates.group(field, Accumulators.avg(avg.name(), id))));
break;
}
case min: {
facet = new Facet(cleanField + "Min", Arrays.asList(Aggregates.group(field, Accumulators.min(min.name(), id))));
break;
}
case max: {
facet = new Facet(cleanField + "Max", Arrays.asList(Aggregates.group(field, Accumulators.max(max.name(), id))));
break;
}
case stdDevPop: {
facet = new Facet(cleanField + "StdDevPop", Arrays.asList(Aggregates.group(field,
Accumulators.stdDevPop(stdDevPop.name(), id))));
break;
}
case stdDevSamp: {
facet = new Facet(cleanField + "stdDevSamp", Arrays.asList(Aggregates.group(field,
Accumulators.stdDevSamp("stdDevSamp", id))));
break;
}
case bucket: {
facet = new Facet(cleanField + "Ranges", Aggregates.bucket(id, boundaries,
new BucketOptions()
.defaultBucket("Other")
.output(new BsonField("count", new BsonDocument("$sum", new BsonInt32(1))))));
break;
}
default: {
break;
}
}
}
if (facet != null) {
facets.add(facet);
}
}

Bson match = Aggregates.match(query);
Bson project = Aggregates.project(Projections.include(new ArrayList<>(includeFields)));
return Arrays.asList(match, project, Aggregates.facet(facets));
}

public static void parseQueryOptions(List<Bson> operations, QueryOptions options) {
if (options != null) {
Bson projection = getProjection(options);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public class MongoDBCollectionTest {
public ExpectedException thrown = ExpectedException.none();
public static final List<String> NAMES = Arrays.asList("John", "Jack", "Javi");
public static final List<String> SURNAMES = Arrays.asList("Doe", "Davis", null);
public static final List<String> COLORS = Arrays.asList("red", "green", "yellow", "blue");

@BeforeClass
public static void beforeClass() throws Exception {
Expand Down Expand Up @@ -88,16 +89,35 @@ public static class User {
public String surname;
public int age;
public int number;
public House house;

public static class House {
public String color;
public int numRooms;
public int m2;

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("House{");
sb.append("color='").append(color).append('\'');
sb.append(", numRooms=").append(numRooms);
sb.append(", m2=").append(m2);
sb.append('}');
return sb.toString();
}
}

@Override
public String toString() {
return "User{"
+ "id:" + id
+ ", name:\"" + name + '"'
+ ", surname:\"" + surname + '"'
+ ", age:" + age
+ ", number:" + number
+ '}';
final StringBuilder sb = new StringBuilder("User{");
sb.append("id=").append(id);
sb.append(", name='").append(name).append('\'');
sb.append(", surname='").append(surname).append('\'');
sb.append(", age=").append(age);
sb.append(", number=").append(number);
sb.append(", house=").append(house);
sb.append('}');
return sb.toString();
}
}

Expand All @@ -111,6 +131,11 @@ private static MongoDBCollection createTestCollection(String test, int size) {
document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size())));
document.put("age", (int) i % 5);
document.put("number", (int) i * i);
Document house = new Document();
house.put("color", COLORS.get(random.nextInt(COLORS.size())));
house.put("numRooms", (int) (i % 7) + 1);
house.put("m2", (int) i * 23);
document.put("house", house);
mongoDBCollection.nativeQuery().insert(document, null);
}
return mongoDBCollection;
Expand Down Expand Up @@ -450,6 +475,43 @@ public void testAggregate() {
assertTrue(result.contains(queryResult.getResults().get(0)));
}

@Test
public void testFacet() {
DataResult<Document> allResults = mongoDBCollection.find(new Document(), null);
System.out.println("allResults.getNumResults() = " + allResults.getNumResults());

Document match = new Document("age", new BasicDBObject("$gt", 2));
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "");
List<Bson> facets = MongoDBQueryUtils.createFacet(match, "count(name);name,surname;avg(age);min(age);max(age);number[0:1000000]:100000");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "name,surname");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms)");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "avg(house.m2)");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "name,house.color");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "avg(house.numRooms);count(house.color);name,house.color;avg(house.m2);min(house.m2);max(house.m2);house.m2[0:1000000]:100000");
// List<Bson> facets = MongoDBQueryUtils.createFacet(match, "house.m2[0:1000000]:100000");
System.out.println("facets = " + facets);
DataResult<Document> aggregate = mongoDBCollection.aggregate(facets, null);
System.out.println("aggregate.getNumResults() = " + aggregate.getNumResults());
System.out.println(">>>>>>>>> facet results");
for (Document result : aggregate.getResults()) {
System.out.println("result = " + result);
}

int counter = 0;
for (Document result : allResults.getResults()) {
if (result.getInteger("age") > 2) {
counter++;
}
}
System.out.println(">>>>>>>>> all results age > 2: " + counter + " of " + allResults.getNumResults());
for (Document result : allResults.getResults()) {
if (result.getInteger("age") > 2) {
System.out.println("result = " + result);
}
}
}


@Test
public void testInsert() throws Exception {
Long countBefore = mongoDBCollectionInsertTest.count().getNumMatches();
Expand Down

0 comments on commit 18c46e5

Please sign in to comment.