diff --git a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java index 2e4a3876..753938c0 100644 --- a/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java +++ b/commons-datastore/commons-datastore-core/src/main/java/org/opencb/commons/datastore/core/FacetField.java @@ -21,7 +21,6 @@ /** * Created by jtarraga on 09/03/17. */ - public class FacetField { private String name; private long count; @@ -38,6 +37,13 @@ public FacetField(String name, long count, List<Bucket> buckets) { this.buckets = buckets; } + public FacetField(String name, long count, String aggregationName, List<Double> aggregationValues) { + this.name = name; + this.count = count; + this.aggregationName = aggregationName; + this.aggregationValues = aggregationValues; + } + public FacetField(String name, String aggregationName, List<Double> aggregationValues) { this.name = name; this.aggregationName = aggregationName; @@ -78,7 +84,7 @@ public FacetField setCount(long count) { } public FacetField addCount(long delta) { - this.count += delta; + this.count = this.count + delta; return this; } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java index ed76bdcb..8b26c9eb 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBCollection.java @@ -97,8 +97,7 @@ private <T> DataResult<T> endQuery(List result, long numMatches, double start) { long end = System.currentTimeMillis(); int numResults = (result != null) ? result.size() : 0; - DataResult<T> queryResult = new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null); - return queryResult; + return new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null); } private DataResult endWrite(long start) { @@ -331,31 +330,25 @@ public <T> DataResult<T> aggregate(List<? extends Bson> operations, ComplexTypeC QueryOptions options) { long start = startQuery(); - DataResult<T> queryResult; - MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options); -// MongoCursor<Document> iterator = output.iterator(); List<T> list = new LinkedList<>(); - if (queryResultWriter != null) { - try { - queryResultWriter.open(); + if (operations != null && !operations.isEmpty()) { + MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options); + if (queryResultWriter != null) { + try { + queryResultWriter.open(); + while (iterator.hasNext()) { + queryResultWriter.write(iterator.next()); + } + queryResultWriter.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } else { while (iterator.hasNext()) { - queryResultWriter.write(iterator.next()); + list.add(iterator.next()); } - queryResultWriter.close(); - } catch (IOException e) { - throw new RuntimeException(e.getMessage(), e); - } - } else { -// if (converter != null) { -// while (iterator.hasNext()) { -// list.add(converter.convertToDataModelType(iterator.next())); -// } -// } else { - while (iterator.hasNext()) { - list.add((T) iterator.next()); } -// } } queryResult = endQuery(list, start); return queryResult; @@ -435,7 +428,7 @@ public DataResult update(ClientSession clientSession, List<? extends Bson> queri return endWrite( wr.getMatchedCount(), - wr.getInsertedCount() + wr.getUpserts().size(), + (long) wr.getInsertedCount() + wr.getUpserts().size(), wr.getModifiedCount(), wr.getDeletedCount(), 0, @@ -553,8 +546,7 @@ public DataResult createIndex(Bson keys, ObjectMap options) { } mongoDBNativeQuery.createIndex(keys, i); - DataResult dataResult = endQuery(Collections.emptyList(), start); - return dataResult; + return endQuery(Collections.emptyList(), start); } public void dropIndexes() { @@ -564,15 +556,13 @@ public void dropIndexes() { public DataResult dropIndex(Bson keys) { long start = startQuery(); mongoDBNativeQuery.dropIndex(keys); - DataResult dataResult = endQuery(Collections.emptyList(), start); - return dataResult; + return endQuery(Collections.emptyList(), start); } public DataResult<Document> getIndex() { long start = startQuery(); List<Document> index = mongoDBNativeQuery.getIndex(); - DataResult<Document> queryResult = endQuery(index, start); - return queryResult; + return endQuery(index, start); } diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java new file mode 100644 index 00000000..68f9472a --- /dev/null +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBDocumentToFacetFieldsConverter.java @@ -0,0 +1,228 @@ +package org.opencb.commons.datastore.mongodb; + +import org.apache.commons.lang3.StringUtils; +import org.bson.Document; +import org.opencb.commons.datastore.core.ComplexTypeConverter; +import org.opencb.commons.datastore.core.FacetField; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.*; + +import static org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter.TO_REPLACE_DOTS; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; + +public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter<List<FacetField>, Document> { + + private static final Map<String, String> MONTH_MAP = new HashMap<>(); + + static { + MONTH_MAP.put("01", "Jan"); + MONTH_MAP.put("02", "Feb"); + MONTH_MAP.put("03", "Mar"); + MONTH_MAP.put("04", "Apr"); + MONTH_MAP.put("05", "May"); + MONTH_MAP.put("06", "Jun"); + MONTH_MAP.put("07", "Jul"); + MONTH_MAP.put("08", "Aug"); + MONTH_MAP.put("09", "Sep"); + MONTH_MAP.put("10", "Oct"); + MONTH_MAP.put("11", "Nov"); + MONTH_MAP.put("12", "Dec"); + } + + @Override + public List<FacetField> convertToDataModelType(Document document) { + if (document == null || document.entrySet().size() == 0) { + return Collections.emptyList(); + } + + String facetFieldName; + List<FacetField> facets = new ArrayList<>(); + for (Map.Entry<String, Object> entry : document.entrySet()) { + String key = entry.getKey(); + List<Document> documentValues = (List<Document>) entry.getValue(); + if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX) || key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) + || key.endsWith(DAY_SUFFIX)) { + facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); + + List<FacetField.Bucket> buckets = new ArrayList<>(documentValues.size()); + long total = 0; + for (Document documentValue : documentValues) { + + long counter = documentValue.getInteger(count.name()); + String bucketValue = ""; + Object internalIdValue = documentValue.get(INTERNAL_ID); + if (internalIdValue instanceof String) { + bucketValue = (String) internalIdValue; + } else if (internalIdValue instanceof Boolean + || internalIdValue instanceof Integer + || internalIdValue instanceof Long + || internalIdValue instanceof Double) { + bucketValue = internalIdValue.toString(); + } else if (internalIdValue instanceof Document) { + bucketValue = StringUtils.join(((Document) internalIdValue).values(), SEPARATOR); + if (key.endsWith(COUNTS_SUFFIX)) { + facetFieldName = key.substring(0, key.indexOf(COUNTS_SUFFIX)); + } + } + + List<FacetField> bucketFacetFields = null; + if (key.endsWith(FACET_ACC_SUFFIX)) { + String[] split = key.split(SEPARATOR); + String name = split[2]; + String aggregationName = split[1]; + Double value; + if (documentValue.get(aggregationName) instanceof Integer) { + value = 1.0d * documentValue.getInteger(aggregationName); + } else if (documentValue.get(aggregationName) instanceof Long) { + value = 1.0d * documentValue.getLong(aggregationName); + } else { + value = documentValue.getDouble(aggregationName); + } + List<Double> aggregationValues = Collections.singletonList(value); + FacetField facetField = new FacetField(name.replace(TO_REPLACE_DOTS, "."), aggregationName, aggregationValues); + // Perhaps it’s redundant, as it is also set in the bucket + facetField.setCount(counter); + bucketFacetFields = Collections.singletonList(facetField); + } + + buckets.add(new FacetField.Bucket(bucketValue, counter, bucketFacetFields)); + total += counter; + } + FacetField facetField = new FacetField(facetFieldName, total, buckets); + facetField.setAggregationName(count.name()); + if (key.endsWith(YEAR_SUFFIX) || key.endsWith(MONTH_SUFFIX) || key.endsWith(DAY_SUFFIX)) { + Collections.sort(buckets, Comparator.comparing(FacetField.Bucket::getValue)); + if (key.endsWith(MONTH_SUFFIX)) { + for (FacetField.Bucket b : buckets) { + String[] split = b.getValue().split(SEPARATOR); + b.setValue(MONTH_MAP.get(split[1]) + " " + split[0]); + } + } else if (key.endsWith(DAY_SUFFIX)) { + for (FacetField.Bucket b : buckets) { + String[] split = b.getValue().split(SEPARATOR); + b.setValue(split[2] + " " + MONTH_MAP.get(split[1]) + " " + split[0]); + } + } + // Remove the data field and keep year, month and day + List<String> labels = new ArrayList<>(Arrays.asList(key.split(SEPARATOR))); + labels.remove(0); + facetField.setAggregationName(StringUtils.join(labels, SEPARATOR).toLowerCase(Locale.ROOT)); + } + facets.add(facetField); + } else if (key.endsWith(RANGES_SUFFIX)) { + List<FacetField.Bucket> buckets = new ArrayList<>(documentValues.size()); + int total = 0; + + String[] split = key.split(SEPARATOR); + double start = Double.parseDouble(split[1].replace(TO_REPLACE_DOTS, ".")); + double end = Double.parseDouble(split[2].replace(TO_REPLACE_DOTS, ".")); + double step = Double.parseDouble(split[3].replace(TO_REPLACE_DOTS, ".")); + + int other = 0; + for (double i = start; i <= end; i += step) { + int bucketCount = getBucketCountFromRanges(i, documentValues); + FacetField.Bucket bucket = new FacetField.Bucket(String.valueOf(roundToTwoSignificantDecimals(i)), bucketCount, null); + buckets.add(bucket); + total += bucketCount; + } + + for (Document value : documentValues) { + if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) { + other = value.getInteger(count.name()); + } + } + facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, "."); + if (other > 0) { + FacetField.Bucket bucket = new FacetField.Bucket("Other", other, null); + buckets.add(bucket); + total += bucket.getCount(); + } + FacetField facetField = new FacetField(facetFieldName, total, buckets) + .setStart(start) + .setEnd(end) + .setStep(step); + facets.add(facetField); + } else { + Document documentValue = ((List<Document>) entry.getValue()).get(0); + MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue); + switch (accumulator) { + case sum: + case avg: + case max: + case min: + case stdDevPop: + case stdDevSamp: { + List<Double> fieldValues = new ArrayList<>(); + if (documentValue.get(accumulator.name()) instanceof Integer) { + fieldValues.add(1.0d * documentValue.getInteger(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof Long) { + fieldValues.add(1.0d * documentValue.getLong(accumulator.name())); + } else if (documentValue.get(accumulator.name()) instanceof List) { + List<Number> list = (List<Number>) documentValue.get(accumulator.name()); + for (Number number : list) { + fieldValues.add(number.doubleValue()); + } + } else { + fieldValues.add(documentValue.getDouble(accumulator.name())); + } + long count = 0; + if (documentValue.containsKey("count")) { + count = Long.valueOf(documentValue.getInteger("count")); + } + facetFieldName = documentValue.getString(INTERNAL_ID).replace(TO_REPLACE_DOTS, "."); + facets.add(new FacetField(facetFieldName, count, accumulator.name(), fieldValues)); + break; + } + default: { + // Do nothing, exception is raised + } + } + } + } + return facets; + } + + private MongoDBQueryUtils.Accumulator getAccumulator(Document document) { + for (Map.Entry<String, Object> entry : document.entrySet()) { + try { + MongoDBQueryUtils.Accumulator accumulator = MongoDBQueryUtils.Accumulator.valueOf(entry.getKey()); + return accumulator; + } catch (IllegalArgumentException e) { + // Do nothing + } + } + throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ", ") + + "Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ",")); + } + + @Override + public Document convertToStorageType(List<FacetField> facetFields) { + throw new RuntimeException("Not yet implemented"); + } + + private static double roundToTwoSignificantDecimals(double value) { + if (value == 0) { + return 0; + } + + BigDecimal bd = new BigDecimal(value); + int integerDigits = bd.precision() - bd.scale(); + int scale = Math.max(0, 2 + integerDigits); + return bd.setScale(scale, RoundingMode.HALF_UP).doubleValue(); + } + + + private int getBucketCountFromRanges(double inputRange, List<Document> documentValues) { + for (Document document : documentValues) { + if (!OTHER.equals(document.get(INTERNAL_ID))) { + if (inputRange == document.getDouble(INTERNAL_ID)) { + return document.getInteger(count.name()); + } + } + } + return 0; + } +} diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java index b926cb7a..ad2a9ad4 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBNativeQuery.java @@ -121,6 +121,7 @@ public <T> MongoDBIterator<T> aggregate(ClientSession clientSession, List<? exte // we need to be sure that the List is mutable List<Bson> bsonOperations = new ArrayList<>(operations); parseQueryOptions(bsonOperations, options); + System.out.println("bsonOperations = " + bsonOperations); MongoDBIterator<T> iterator = null; if (bsonOperations.size() > 0) { long numMatches = -1; diff --git a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java index e33ced35..aafd191f 100644 --- a/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java +++ b/commons-datastore/commons-datastore-mongodb/src/main/java/org/opencb/commons/datastore/mongodb/MongoDBQueryUtils.java @@ -18,6 +18,8 @@ import com.mongodb.client.model.*; import org.apache.commons.lang3.StringUtils; +import org.bson.BsonDocument; +import org.bson.BsonInt32; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.commons.datastore.core.Query; @@ -27,26 +29,58 @@ import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static com.mongodb.client.model.Aggregates.*; +import static com.mongodb.client.model.Projections.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.bucket; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.count; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** * Created by imedina on 17/01/16. */ public class MongoDBQueryUtils { - @Deprecated - private static final String REGEX_SEPARATOR = "(\\w+|\\^)"; -// private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~|==?|=?\\^|=?\\$)([^=<>~!]+.*)$"); private static final Pattern OPERATION_STRING_PATTERN = Pattern.compile("^(!=?|!?=?~/?|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_NUMERIC_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_BOOLEAN_PATTERN = Pattern.compile("^(!=|!?=?~|==?)([^=<>~!]+.*)$"); private static final Pattern OPERATION_DATE_PATTERN = Pattern.compile("^(<=?|>=?|!=|!?=?~|=?=?)([0-9]+)(-?)([0-9]*)"); + private static final Pattern FUNC_ACCUMULATOR_PATTERN = Pattern.compile("([a-zA-Z]+)\\(([.a-zA-Z0-9]+)\\)"); + public static final String RANGE_MARK = ".."; + public static final String RANGE_MARK1 = "["; + public static final String RANGE_MARK2 = "]"; + private static final String RANGE_SPLIT_MARK = "\\.\\."; + private static final Pattern RANGE_START_PATTERN = Pattern.compile("([.a-zA-Z0-9]+)\\[([.0-9]+)"); + private static final Pattern RANGE_END_PATTERN = Pattern.compile("([.0-9]+)\\]:([.0-9]+)"); + public static final String INVALID_FORMAT_MSG = "Invalid format "; + public static final String RANGE_FORMAT_MSG = " for range aggregation. Valid format is: field[start..end]:step, e.g: size[0..1000]:200"; + + public static final String YEAR_FACET_MARK = "[YEAR]"; + public static final String MONTH_FACET_MARK = "[MONTH]"; + public static final String DAY_FACET_MARK = "[DAY]"; + + public static final String INTERNAL_ID = "_id"; + public static final String OTHER = "Other"; + + public static final String FACET_ACC_SUFFIX = "Acc"; + public static final String COUNTS_SUFFIX = "Counts"; + public static final String SUM_SUFFIX = "Sum"; + public static final String MIN_SUFFIX = "Min"; + public static final String MAX_SUFFIX = "Max"; + public static final String AVG_SUFFIX = "Avg"; + public static final String STDDEVPOP_SUFFIX = "StdDevPop"; + public static final String STDDEVSAMP_SUFFIX = "StdDevSamp"; + public static final String YEAR_SUFFIX = "Year"; + public static final String MONTH_SUFFIX = "Month"; + public static final String DAY_SUFFIX = "Day"; + public static final String RANGES_SUFFIX = "Ranges"; + public static final String SEPARATOR = "__"; + // TODO: Added on 10/08/2021 to deprecate STARTS_WITH and ENDS_WITH regex. They need to be done within '/'. @Deprecated private static final Pattern DEPRECATED_PATTERN = Pattern.compile("^(=?\\^|=?\\$)([^=/<>~!]+[.]*)$"); @@ -80,6 +114,19 @@ public enum ComparisonOperator { BETWEEN } + public enum Accumulator { + count, + sum, + avg, + min, + max, + stdDevPop, + stdDevSamp, + bucket, + year, + month, + day + } public static Bson createFilter(String mongoDbField, String queryParam, Query query) { return createFilter(mongoDbField, queryParam, query, QueryParam.Type.TEXT, ComparisonOperator.EQUALS, LogicalOperator.OR); @@ -497,7 +544,7 @@ public static <T> Bson createFilter(String mongoDbField, List<T> queryValues, Co * @return the Bson query. */ protected static Bson createDateFilter(String mongoDbField, List<String> dateValues, ComparisonOperator comparator, - QueryParam.Type type) { + QueryParam.Type type) { Bson filter = null; Object date = null; @@ -599,12 +646,12 @@ public static List<Bson> createGroupBy(Bson query, String groupByField, String i return createGroupBy(query, Arrays.asList(groupByField.split(",")), idField, count); } else { Bson match = Aggregates.match(query); - Bson project = Aggregates.project(Projections.include(groupByField, idField)); + Bson project = project(Projections.include(groupByField, idField)); Bson group; if (count) { - group = Aggregates.group("$" + groupByField, Accumulators.sum("count", 1)); + group = group("$" + groupByField, Accumulators.sum("count", 1)); } else { - group = Aggregates.group("$" + groupByField, Accumulators.addToSet("features", "$" + idField)); + group = group("$" + groupByField, Accumulators.addToSet("features", "$" + idField)); } return Arrays.asList(match, project, group); } @@ -624,7 +671,7 @@ public static List<Bson> createGroupBy(Bson query, List<String> groupByField, St // add all group-by fields to the projection together with the aggregation field name List<String> groupByFields = new ArrayList<>(groupByField); groupByFields.add(idField); - Bson project = Aggregates.project(Projections.include(groupByFields)); + Bson project = project(Projections.include(groupByFields)); // _id document creation to have the multiple id Document id = new Document(); @@ -633,14 +680,351 @@ public static List<Bson> createGroupBy(Bson query, List<String> groupByField, St } Bson group; if (count) { - group = Aggregates.group(id, Accumulators.sum("count", 1)); + group = group(id, Accumulators.sum("count", 1)); } else { - group = Aggregates.group(id, Accumulators.addToSet("features", "$" + idField)); + group = group(id, Accumulators.addToSet("features", "$" + idField)); } return Arrays.asList(match, project, group); } } + public static List<Bson> createFacet(Bson query, String facetField) { + return createFacet(query, facetField, QueryOptions.DESCENDING); + } + + public static List<Bson> createFacet(Bson query, String facetField, String order) { + // Sanity check + if (facetField == null || StringUtils.isEmpty(facetField.trim())) { + return new ArrayList<>(); + } + String cleanFacetField = facetField.replace(" ", ""); + + // Multiple facets separated by ; + ArrayList<String> facetFields = new ArrayList<>(Arrays.asList(cleanFacetField.split(";"))); + return createFacet(query, facetFields, order); + } + + private static List<Bson> createFacet(Bson query, List<String> facetFields, String order) { + List<Facet> facetList = new ArrayList<>(); + Set<String> includeFields = new HashSet<>(); + List<Bson> unwindList = new ArrayList<>(); + List<Bson> dateProjections = new ArrayList<>(); + + // For each facet field passed we will create a MongoDB facet, thre are 4 types of facets: + // 1. Facet combining fields with commas. In this case, only 'count' is supported as accumulator. + for (String facetField : facetFields) { + Facet facet = null; + + // 1. Check if it is a facet combining fields with commas. In this case, only 'count' is supported as accumulator. + // Example: aggregationFields=format,type + if (facetField.contains(",")) { + Document fields = new Document(); + for (String field : facetField.split(",")) { + fields.append(field, "$" + field); + includeFields.add(field); + } + Bson bsonSort; + if (QueryOptions.ASCENDING.equals(order)) { + bsonSort = sort(Sorts.ascending(count.name())); + } else { + bsonSort = sort(Sorts.descending(count.name())); + } + facet = new Facet( + facetField.replace(",", SEPARATOR) + COUNTS_SUFFIX, + Arrays.asList(group(fields, Accumulators.sum(Accumulator.count.name(), 1)), bsonSort)); + } else { + Accumulator accumulator; + String groupField; + String accumulatorField = null; + List<Double> boundaries = new ArrayList<>(); + + // 2. Facet with accumulators (count, avg, min, max,...) or range (bucket) + Matcher matcher = FUNC_ACCUMULATOR_PATTERN.matcher(facetField); + if (matcher.matches()) { + try { + accumulator = Accumulator.valueOf(matcher.group(1)); + groupField = matcher.group(2); + } catch (IllegalArgumentException e) { + List<Accumulator> validAccumulators = Arrays.stream(Accumulator.values()) + .filter(acc -> !acc.name().equalsIgnoreCase(bucket.name()) + && !acc.name().equalsIgnoreCase(year.name()) + && !acc.name().equalsIgnoreCase(month.name()) + && !acc.name().equalsIgnoreCase(day.name())) + .collect(Collectors.toList()); + throw new IllegalArgumentException("Invalid accumulator function '" + matcher.group(1) + "'. Valid accumulator" + + " functions: " + StringUtils.join(validAccumulators, ", ")); + + } + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(YEAR_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - YEAR_FACET_MARK.length()); + accumulator = year; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(MONTH_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - MONTH_FACET_MARK.length()); + accumulator = month; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 4, 2)))); + } else if (facetField.toUpperCase(Locale.ROOT).endsWith(DAY_FACET_MARK)) { + groupField = facetField.substring(0, facetField.length() - DAY_FACET_MARK.length()); + accumulator = day; + + // Add projections + dateProjections.add(computed(groupField + SEPARATOR + year.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 0, 4)))); + dateProjections.add(computed(groupField + SEPARATOR + month.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 4, 2)))); + dateProjections.add(computed(groupField + SEPARATOR + day.name(), new Document("$substrCP", + Arrays.asList("$" + groupField, 6, 2)))); + } else { + // 3. Facet with range aggregation + if (facetField.contains(RANGE_MARK) || facetField.contains(RANGE_MARK1) || facetField.contains(RANGE_MARK2)) { + String[] split = facetField.split(RANGE_SPLIT_MARK); + if (split.length == 2) { + Matcher matcher1 = RANGE_START_PATTERN.matcher(split[0]); + Matcher matcher2 = RANGE_END_PATTERN.matcher(split[1]); + if (matcher1.matches() && matcher2.matches()) { + accumulator = bucket; + groupField = matcher1.group(1); + double start = Double.parseDouble(matcher1.group(2)); + double end = Double.parseDouble(matcher2.group(1)); + double step = Double.parseDouble(matcher2.group(2)); + double i; + for (i = start; i <= end; i += step) { + boundaries.add(i); + } + if (boundaries.get(boundaries.size() - 1) < end) { + boundaries.add(i); + } + + String facetName = groupField + SEPARATOR + start + SEPARATOR + end + SEPARATOR + step + SEPARATOR + + RANGES_SUFFIX; + facet = new Facet(facetName, Aggregates.bucket("$" + groupField, boundaries, + new BucketOptions() + .defaultBucket(OTHER) + .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); + } else { + throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); + } + } else { + throw new IllegalArgumentException(INVALID_FORMAT_MSG + facetField + RANGE_FORMAT_MSG); + } + } else { + // 4. Facet with count as default accumulator + if (facetField.contains(":")) { + String[] split = facetField.split("[:\\(\\)]"); + groupField = split[0]; + accumulator = Accumulator.valueOf(split[1]); + accumulatorField = split[2]; + } else { + groupField = facetField; + accumulator = count; + } + } + } + + includeFields.add(groupField); + if (StringUtils.isNotEmpty(accumulatorField)) { + includeFields.add(accumulatorField); + } + + // Get MongoDB facet + if (facet == null) { + facet = getMongoDBFacet(groupField, accumulator, accumulatorField, boundaries, order); + } + + // Unwind in any case + Set<String> unwindFields = new HashSet<>(); + if (StringUtils.isNotEmpty(groupField)) { + unwindFields.addAll(getUnwindFields(groupField)); + } + if (StringUtils.isNotEmpty(accumulatorField)) { + unwindFields.addAll(getUnwindFields(accumulatorField)); + } + // We must order the "unwind" fields + List<String> unwindFieldList = new ArrayList<>(unwindFields); + unwindFieldList.sort(Comparator.comparingInt(s -> s.length() - s.replace(".", "").length())); + for (String unwindField : unwindFieldList) { + unwindList.add(Aggregates.unwind("$" + unwindField)); + } + } + + // Add facet to the list of facets to be executed + if (facet != null) { + facetList.add(facet); + } + } + + // Build and return the MongoDB pipeline for facets: match, project, [unwind,] aggregates + List<Bson> result = new ArrayList<>(); + // 1 - Match + result.add(Aggregates.match(query)); + // 2 - Project + + List<Bson> projections = new ArrayList<>(); + + // 2.1 - Include fields + for (String field : includeFields) { + projections.add(include(field)); + } + + // 2.2 - Compute data fields + projections.addAll(dateProjections); + + result.add(project(fields(projections))); + + // 3 - Unwind + if (!unwindList.isEmpty()) { + result.addAll(unwindList); + } + + // 4 - Aggregates (dot notation management for facets) + result.add(GenericDocumentComplexConverter.replaceDots(Document.parse(facet(facetList).toBsonDocument().toJson()))); + return result; + } + + private static Collection<String> getUnwindFields(String field) { + List<String> unwindFields = new ArrayList<>(); + String[] split = field.split("\\."); + String acc = ""; + for (String s : split) { + if (!StringUtils.isEmpty(acc)) { + acc += "."; + } + acc += s; + unwindFields.add(acc); + } + return unwindFields; + } + + private static Facet getMongoDBFacet(String groupField, Accumulator accumulator, String accumulatorField, List<Double> boundaries, + String order) { + String groupFieldId = groupField; + String accumulatorId = "$" + groupField; + String facetName = null; + if (StringUtils.isNotEmpty(accumulatorField)) { + groupFieldId = "$" + groupField; + accumulatorId = "$" + accumulatorField; + facetName = groupField + SEPARATOR + accumulator + SEPARATOR + accumulatorField + SEPARATOR + FACET_ACC_SUFFIX; + } + + Facet facet; + switch (accumulator) { + case count: { + facetName = groupField + SEPARATOR + COUNTS_SUFFIX; + + Bson bsonSort; + if (QueryOptions.ASCENDING.equals(order)) { + bsonSort = sort(Sorts.ascending(count.name())); + } else { + bsonSort = sort(Sorts.descending(count.name())); + } +// facet = new Facet( +// facetField.replace(",", SEPARATOR) + COUNTS_SUFFIX, +// Arrays.asList(group(fields, Accumulators.sum(Accumulator.count.name(), 1)), bsonSort)); + + facet = new Facet(facetName, Arrays.asList(group("$" + groupField, Accumulators.sum(count.name(), 1)), bsonSort)); + break; + } + case year: { + facetName = groupField + SEPARATOR + YEAR_SUFFIX; + facet = new Facet(facetName, group("$" + groupField + SEPARATOR + year.name(), Accumulators.sum(count.name(), 1))); + break; + } + case month: { + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX; + + Document fields = new Document(); + fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); + fields.append(groupField + SEPARATOR + month.name(), "$" + groupField + SEPARATOR + month.name()); + + facet = new Facet(facetName, group(fields, Accumulators.sum(count.name(), 1))); + break; + } + case day: { + facetName = groupField + SEPARATOR + YEAR_SUFFIX + SEPARATOR + MONTH_SUFFIX + SEPARATOR + DAY_SUFFIX; + + Document fields = new Document(); + fields.append(groupField + SEPARATOR + year.name(), "$" + groupField + SEPARATOR + year.name()); + fields.append(groupField + SEPARATOR + month.name(), "$" + groupField + SEPARATOR + month.name()); + fields.append(groupField + SEPARATOR + day.name(), "$" + groupField + SEPARATOR + day.name()); + + facet = new Facet(facetName, group(fields, Accumulators.sum(count.name(), 1))); + break; + } + case sum: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + SUM_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.sum(sum.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case avg: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + AVG_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.avg(avg.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case min: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + MIN_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.min(min.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case max: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + MAX_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.max(max.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case stdDevPop: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + STDDEVPOP_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.stdDevPop(stdDevPop.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case stdDevSamp: { + if (StringUtils.isEmpty(facetName)) { + facetName = groupField + SEPARATOR + STDDEVSAMP_SUFFIX; + } + facet = new Facet(facetName, group(groupFieldId, + Arrays.asList(Accumulators.stdDevSamp(stdDevSamp.name(), accumulatorId), Accumulators.sum(count.name(), 1)))); + break; + } + case bucket: +// { +// // Nothing to do +// facetName = groupField + SEPARATOR + RANGES_SUFFIX; +// facet = new Facet(facetName, Aggregates.bucket(accumulatorId, boundaries, +// new BucketOptions() +// .defaultBucket(OTHER) +// .output(new BsonField(count.name(), new BsonDocument("$sum", new BsonInt32(1)))))); +// break; +// } + default: { + facet = null; + break; + } + } + + return facet; + } + public static void parseQueryOptions(List<Bson> operations, QueryOptions options) { if (options != null) { Bson projection = getProjection(options); @@ -695,7 +1079,7 @@ public static Bson getSkip(QueryOptions options) { public static Bson getProjection(QueryOptions options) { Bson projection = getProjection(null, options); - return projection != null ? Aggregates.project(projection) : null; + return projection != null ? project(projection) : null; } protected static Bson getProjection(Bson projection, QueryOptions options) { @@ -749,7 +1133,7 @@ protected static Bson getProjection(Bson projection, QueryOptions options) { projections.add(include); // MongoDB allows to exclude _id when include is present if (excludeId) { - projections.add(Projections.excludeId()); + projections.add(excludeId()); } } else { if (exclude != null) { @@ -783,7 +1167,7 @@ protected static Bson getProjection(Bson projection, QueryOptions options) { } if (projections.size() > 0) { - projectionResult = Projections.fields(projections); + projectionResult = fields(projections); } return projectionResult; diff --git a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java index 377d610c..b89a37b4 100644 --- a/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java +++ b/commons-datastore/commons-datastore-mongodb/src/test/java/org/opencb/commons/datastore/mongodb/MongoDBCollectionTest.java @@ -20,34 +20,40 @@ import com.mongodb.MongoBulkWriteException; import com.mongodb.MongoWriteException; import com.mongodb.client.model.Filters; +import org.apache.commons.lang3.StringUtils; import org.bson.Document; import org.bson.conversions.Bson; import org.hamcrest.CoreMatchers; import org.junit.*; import org.junit.rules.ExpectedException; -import org.opencb.commons.datastore.core.DataResult; -import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.commons.datastore.core.QueryResultWriter; +import org.opencb.commons.datastore.core.*; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.*; import static org.junit.Assert.*; +import static org.opencb.commons.datastore.core.QueryOptions.SORT; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*; +import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*; /** * Created by imedina on 29/03/14. */ public class MongoDBCollectionTest { + public static final String EMPTY = "***EMPTY***"; private static MongoDataStoreManager mongoDataStoreManager; private static MongoDataStore mongoDataStore; private static MongoDBCollection mongoDBCollection; private static MongoDBCollection mongoDBCollectionInsertTest; private static MongoDBCollection mongoDBCollectionUpdateTest; private static MongoDBCollection mongoDBCollectionRemoveTest; + private static MongoDBCollection mongoDBCollectionFacetRange; private static int N = 1000; @@ -55,6 +61,7 @@ public class MongoDBCollectionTest { public ExpectedException thrown = ExpectedException.none(); public static final List<String> NAMES = Arrays.asList("John", "Jack", "Javi"); public static final List<String> SURNAMES = Arrays.asList("Doe", "Davis", null); + public static final List<String> COLORS = Arrays.asList("red", "green", "yellow", "blue"); @BeforeClass public static void beforeClass() throws Exception { @@ -68,6 +75,7 @@ public static void beforeClass() throws Exception { mongoDBCollectionInsertTest = createTestCollection("insert_test", 50); mongoDBCollectionUpdateTest = createTestCollection("update_test", 50); mongoDBCollectionRemoveTest = createTestCollection("remove_test", 50); + mongoDBCollectionFacetRange = createTestCollection2("facet-range-test"); } @Before @@ -88,16 +96,57 @@ public static class User { public String surname; public int age; public int number; + public String date; + public boolean tall; + public House house; + public List<Dog> dogs; + + public static class House { + public String color; + public int numRooms; + public int m2; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("House{"); + sb.append("color='").append(color).append('\''); + sb.append(", numRooms=").append(numRooms); + sb.append(", m2=").append(m2); + sb.append('}'); + return sb.toString(); + } + } + + public static class Dog { + public int age; + public List<Integer> years; + public String color; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Dog{"); + sb.append("age=").append(age); + sb.append("years=").append(years); + sb.append("color=").append(color); + sb.append('}'); + return sb.toString(); + } + } @Override public String toString() { - return "User{" - + "id:" + id - + ", name:\"" + name + '"' - + ", surname:\"" + surname + '"' - + ", age:" + age - + ", number:" + number - + '}'; + final StringBuilder sb = new StringBuilder("User{"); + sb.append("id=").append(id); + sb.append(", name='").append(name).append('\''); + sb.append(", surname='").append(surname).append('\''); + sb.append(", age=").append(age); + sb.append(", number=").append(number); + sb.append(", date='").append(date).append('\''); + sb.append(", tall=").append(tall); + sb.append(", house=").append(house); + sb.append(", dogs=").append(dogs); + sb.append('}'); + return sb.toString(); } } @@ -105,12 +154,56 @@ private static MongoDBCollection createTestCollection(String test, int size) { MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(test); Document document; Random random = new Random(); + + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMddHHmmss"); + LocalDateTime now = LocalDateTime.now(); + for (long i = 0; i < size; i++) { document = new Document("id", i); document.put("name", NAMES.get(random.nextInt(NAMES.size()))); document.put("surname", SURNAMES.get(random.nextInt(SURNAMES.size()))); document.put("age", (int) i % 5); document.put("number", (int) i * i); + LocalDateTime futureDate = now.plusDays(random.nextInt(1000)); + document.put("date", futureDate.format(formatter)); + document.put("tall", (i % 6 == 0)); + Document house = new Document(); + house.put("color", COLORS.get(random.nextInt(COLORS.size()))); + house.put("numRooms", (int) (i % 7) + 1); + house.put("m2", (int) i * 23); + document.put("house", house); + int numDogs = random.nextInt(5); + List<Document> dogs = new ArrayList<>(); + for (int j = 0 ; j < numDogs; j++) { + Document dog = new Document(); + dog.put("age", random.nextInt(20)); + int numYears = random.nextInt(3); + List<Integer> years = new ArrayList<>(); + for (int k = 0 ; k < numYears; k++) { + years.add(random.nextInt(100) + 1900); + } + if (years.size() > 1) { + dog.put("years", years); + } + dog.put("color", COLORS.get(random.nextInt(COLORS.size()))); + dogs.add(dog); + } + document.put("dogs", dogs); + mongoDBCollection.nativeQuery().insert(document, null); + System.out.println("document.toJson() = " + document.toJson()); + } + return mongoDBCollection; + } + + private static MongoDBCollection createTestCollection2(String name) { + MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(name); + Document document; + Random random = new Random(); + + List<Integer> ages = Arrays.asList(1, 3, 3, 9, 10, 11, 12); + for (Integer age : ages) { + document = new Document("age", age); mongoDBCollection.nativeQuery().insert(document, null); } return mongoDBCollection; @@ -163,7 +256,7 @@ public void testDistinct() throws Exception { @Test public void testSortOrder() throws Exception { Document query = new Document(); - QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 10).append(QueryOptions.SORT, "number") + QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 10).append(SORT, "number") .append(QueryOptions.ORDER, "asc"); List<Document> result = mongoDBCollection.find(query, queryOptions).getResults(); assertEquals(0L, result.get(0).get("number")); @@ -173,7 +266,7 @@ public void testSortOrder() throws Exception { public void testMultipleSortOrder() throws Exception { Document query = new Document(); QueryOptions queryOptions = new QueryOptions(QueryOptions.LIMIT, 500) - .append(QueryOptions.SORT, Arrays.asList("age:ASC", "number:DESC")) + .append(SORT, Arrays.asList("age:ASC", "number:DESC")) .append(QueryOptions.ORDER, "asc"); int age = 0; long number = Long.MAX_VALUE; @@ -450,6 +543,864 @@ public void testAggregate() { assertTrue(result.contains(queryResult.getResults().get(0))); } + @Test + public void testFacetBuckets() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + String value; + long totalCount = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + value = result.getString(fieldName); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetBucketsBoolean() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "tall"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + String value; + long totalCount = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + value = "" + result.getBoolean(fieldName); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetBucketsDotNotation() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "house.color"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + String value; + long totalCount = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Document house = (Document) result.get("house"); + value = house.getString("color"); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + map.put(value, 0); + } else if (!map.containsKey(value)) { + map.put(value, 0); + } + map.put(value, 1 + map.get(value)); + totalCount++; + } + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetCountBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.color"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); + + for (List<FacetField> facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + String value; + long totalCount = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + List<Document> dogs = (List<Document>) result.get("dogs"); + for (Document dog : dogs) { + totalCount++; + String color = dog.getString("color"); + if (StringUtils.isEmpty(color)) { + color = EMPTY; + map.put(color, 0); + } else if (!map.containsKey(color)) { + map.put(color, 0); + } + map.put(color, 1 + map.get(color)); + } + } + + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + + @Test + public void testFacetAvgBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "avg(dogs.age)"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List<FacetField> facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + int counter = 0; + int acc = 0; + for (Document doc : matchedResults.getResults()) { + List<Document> dogs = (List<Document>) doc.get("dogs"); + for (Document dog : dogs) { + counter++; + acc += (int) dog.get("age"); + } + } + System.out.println("counter = " + counter); + System.out.println("(acc/counter) = " + (1.0d * acc / counter)); + Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); + } + + // @Test +// public void testFacetAccumulatorMaxBucketsArray() { +// Document match = new Document("age", new BasicDBObject("$gt", 2)); +// DataResult<Document> matchedResults = mongoDBCollection.find(match, null); +// +// String fieldName = "dogs.color:max(dogs.years)"; +// List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); +// System.out.println("facets = " + facets); +// MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); +// DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); +// for (List<FacetField> facetFieldList : aggregate.getResults()) { +// System.out.println("facetFieldList = " + facetFieldList); +// } +// +// int counter = 0; +// int acc = 0; +// for (Document doc : matchedResults.getResults()) { +// List<Document> dogs = (List<Document>) doc.get("dogs"); +// for (Document dog : dogs) { +// counter++; +// acc += (int) dog.get("age"); +// } +// } +// System.out.println("counter = " + counter); +// System.out.println("(acc/counter) = " + (1.0d * acc / counter)); +// Assert.assertEquals(aggregate.getResults().get(0).get(0).getAggregationValues().get(0), 1.0d * acc / counter, 0.0001); +// } +// + @Test + public void testFacetFilterAccumulatorBucketsArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.color:avg(dogs.age)"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List<FacetField> facetFieldList : aggregate.getResults()) { + System.out.println("facetFieldList = " + facetFieldList); + } + + String value; + long totalCount = 0; + Map<String, Integer> counterMap = new HashMap<>(); + Map<String, Integer> accMap = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + List<Document> dogs = (List<Document>) result.get("dogs"); + for (Document dog : dogs) { + totalCount++; + String color = dog.getString("color"); + int age = (int) dog.get("age"); + if (StringUtils.isEmpty(color)) { + color = EMPTY; + counterMap.put(color, 0); + accMap.put(color, 0); + } else if (!counterMap.containsKey(color)) { + counterMap.put(color, 0); + accMap.put(color, 0); + } + counterMap.put(color, 1 + counterMap.get(color)); + accMap.put(color, age + accMap.get(color)); + } + } + + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(counterMap.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + if (StringUtils.isEmpty(value)) { + value = EMPTY; + } + Assert.assertEquals(counterMap.get(value).longValue(), bucket.getCount()); + Assert.assertEquals(counterMap.get(value).longValue(), bucket.getFacetFields().get(0).getCount()); + Assert.assertEquals("avg", bucket.getFacetFields().get(0).getAggregationName()); + Assert.assertEquals(1.0 * accMap.get(value) / counterMap.get(value), bucket.getFacetFields().get(0).getAggregationValues().get(0), 0.0001); + } + } + } + } + + @Test + public void testFacetRangeArray() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + int start = 1; + int end = 20; + int step = 5; + String fieldName = "dogs.age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); +// for (List<FacetField> facetFieldList : aggregate.getResults()) { +// System.out.println("facetFieldList = " + facetFieldList); +// } + + long outOfRange = 0; + List<Long> rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L)); + + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + int bucketNum; + List<Document> dogs = (List<Document>) result.get("dogs"); + for (Document dog : dogs) { + int value = (int) dog.get("age"); + if (value < start || value > end) { + outOfRange++; + } else { + bucketNum = (int) (value - start) / step; + rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); + } + } + } + + System.out.println("rangeValues = " + rangeValues); + System.out.println("outOfRange = " + outOfRange); + + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount()); + Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); + for (int i = 0; i < facetField.getBuckets().size(); i++) { + FacetField.Bucket bucket = facetField.getBuckets().get(i); + if (bucket.getValue().equals("Other")) { + Assert.assertEquals(outOfRange, bucket.getCount()); + } else { + Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); + } + } + } + } + } + + @Test + public void testFacetMax() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + long totalCount = 0; + double maxValue = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + totalCount++; + if (value != null) { + if (value > maxValue) { + maxValue = value; + } + } + } + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(max.name(), facetField.getAggregationName()); + Assert.assertEquals(maxValue, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + + @Test + public void testFacetMin() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "min(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + + long count = 0; + double minValue = Double.MAX_VALUE; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + count++; + if (value != null) { + if (value < minValue) { + minValue = value; + } + } + } + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(count, facetField.getCount()); + Assert.assertEquals(min.name(), facetField.getAggregationName()); + Assert.assertEquals(minValue, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + + @Test + public void testFacetAvg() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); + + long totalCount = 0; + double totalSum = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + Long value = result.getLong(fieldName); + if (value != null) { + totalSum += value; + totalCount++; + } + } + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(avg.name(), facetField.getAggregationName()); + Assert.assertEquals(totalSum / totalCount, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + + @Test + public void testFacetMaxDotNotationAndList() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "dogs.age"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "max(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); + + DataResult<Document> aggregate2 = mongoDBCollection.aggregate(facets, null); + System.out.println("aggregate2.toString() = " + aggregate2); + + int count = 0; + List<Double> maxValues = new ArrayList<>(Arrays.asList(0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D,0D)); + for (Document result : matchedResults.getResults()) { + List<Document> dogs = (List<Document>) result.get("dogs"); + if (result.getInteger("age") > 2 && dogs.size() > 0) { + System.out.println(); + for (int i = 0; i < dogs.size(); i++) { + Number value = (Number) dogs.get(i).get("age"); + count++; + System.out.print("age = " + result.getInteger("age") + "; i = " + i + "; value = " + value + "; "); + if (value.doubleValue() > maxValues.get(i)) { + maxValues.set(i, value.doubleValue()); + } + } + } + } + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(count, facetField.getCount()); + Assert.assertEquals(max.name(), facetField.getAggregationName()); +// for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { +// Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); +// } + } + } + } + + @Test + public void testFacetSumAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + int total = 0; + int count = 0; + String fieldName = "number"; + for (Document result : matchedResults.getResults()) { + System.out.println("result = " + result); + count++; + total += result.getLong(fieldName); + } + double avg = total / matchedResults.getNumResults(); + + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "avg(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate); + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(count, facetField.getCount()); + Assert.assertEquals(Accumulator.avg.name(), facetField.getAggregationName()); + Assert.assertEquals(avg, facetField.getAggregationValues().get(0), 0.5); +// for (int i = 0; i < facetField.getAggregationValues().size() ; i++) { +// Assert.assertEquals(maxValues.get(i), facetField.getAggregationValues().get(i), 0.0001); +// } + } + } + + + facets = MongoDBQueryUtils.createFacet(match, "sum(" + fieldName + ")"); + aggregate = mongoDBCollection.aggregate(facets, converter, null); + for (List<FacetField> result : aggregate.getResults()) { + Assert.assertEquals(1, result.size()); + for (FacetField facetField : result) { + Assert.assertEquals(count, facetField.getCount()); + Assert.assertEquals(Accumulator.sum.name(), facetField.getAggregationName()); + Assert.assertEquals(total, facetField.getAggregationValues().get(0), 0.0001); + } + } + } + + @Test + public void testFacetGroupSumAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + int totalCount = 0; + String groupFieldName = "name"; + String accumulatorFieldName = "age"; + Map<String, Integer> numberPerNames = new HashMap<>(); + Map<String, Integer> countPerNames = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + String name = result.getString(groupFieldName); + if (!numberPerNames.containsKey(name)) { + numberPerNames.put(name, 0); + countPerNames.put(name, 0); + } + numberPerNames.put(name, result.getInteger(accumulatorFieldName) + numberPerNames.get(name)); + countPerNames.put(name, 1 + countPerNames.get(name)); + } + + for (Map.Entry<String, Integer> entry : numberPerNames.entrySet()) { + System.out.println(entry.getKey() + " --> " + entry.getValue() + ", count = " + countPerNames.get(entry.getKey())); + totalCount += countPerNames.get(entry.getKey()); + } + System.out.println("totalCount = " + totalCount); + + String acc = "sum"; // "count"; // "avg"; + String facet = groupFieldName + ":" + acc + "(" + accumulatorFieldName + ")"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, facet); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + Assert.assertEquals(1, aggregate.getResults().size()); + FacetField facetField = aggregate.getResults().get(0).get(0); + Assert.assertEquals(groupFieldName, facetField.getName()); + Assert.assertEquals(totalCount, facetField.getCount(), 0.001); + Assert.assertEquals(numberPerNames.size(), facetField.getBuckets().size(), 0.001); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + Assert.assertTrue(countPerNames.containsKey(bucket.getValue())); + Assert.assertEquals(countPerNames.get(bucket.getValue()), bucket.getCount(), 0.001); + Assert.assertEquals(1, bucket.getFacetFields().size()); + Assert.assertEquals(accumulatorFieldName, bucket.getFacetFields().get(0).getName()); + Assert.assertEquals(acc, bucket.getFacetFields().get(0).getAggregationName()); + Assert.assertEquals(numberPerNames.get(bucket.getValue()), bucket.getFacetFields().get(0).getAggregationValues().get(0), 0.001); + } + } + + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidAccumulator() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "number"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, "toto(" + fieldName + ")"); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + mongoDBCollection.aggregate(facets, converter, null); + } + + @Test + public void testFacetMultiple() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name;surname"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + Assert.assertEquals(2, aggregate.first().size()); + } + + @Test + public void testFacetMultipleAccumulators() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + for (Document result : matchedResults.getResults()) { + double value = 1.0d * result.getLong("number"); + if (value > max) { + max = value; + } + if (value < min) { + min = value; + } + } + + String fieldName = "min(number);max(number)"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate = " + aggregate); + + Assert.assertEquals(2, aggregate.first().size()); + for (FacetField result : aggregate.first()) { + Assert.assertEquals("number", result.getName()); + Assert.assertEquals(matchedResults.getNumResults(), result.getCount()); + double value = 0d; + if ("min".equals(result.getAggregationName())) { + value = min; + } else if ("max".equals(result.getAggregationName())) { + value = max; + } else { + fail(); + } + Assert.assertEquals(value, result.getAggregationValues().get(0), 0.001d); + } + } + + @Test + public void testFacetCombine() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + DataResult<Document> matchedResults = mongoDBCollection.find(match, null); + + String fieldName = "name,surname"; + List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.toString() = " + aggregate.toString()); + + String name; + String surname; + long totalCount = 0; + Map<String, Integer> map = new HashMap<>(); + for (Document result : matchedResults.getResults()) { + name = result.getString("name"); + if (StringUtils.isEmpty(name)) { + name = null; + } + surname = result.getString("surname"); + if (StringUtils.isEmpty(surname)) { + surname = null; + } + String key = ""; + if (name != null) { + key += name; + } + key += SEPARATOR; + if (surname != null) { + key += surname; + } + if (!map.containsKey(key)) { + map.put(key, 0); + } + map.put(key, 1 + map.get(key)); + totalCount++; + } + String value; + for (List<FacetField> result : aggregate.getResults()) { + for (FacetField facetField : result) { + Assert.assertEquals(totalCount, facetField.getCount()); + Assert.assertEquals(map.size(), facetField.getBuckets().size()); + for (FacetField.Bucket bucket : facetField.getBuckets()) { + value = bucket.getValue(); + Assert.assertEquals(map.get(value).longValue(), bucket.getCount()); + } + } + } + } + +// @Test +// public void testFacetRange() { +// Document match = new Document("age", new BasicDBObject("$gt", 2)); +// DataResult<Document> matchedResults = mongoDBCollection.find(match, null); +// +// int start = 1000; +// int end = 5000; +// int step = 100; +// String fieldName = "number" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; +// List<Bson> facets = MongoDBQueryUtils.createFacet(match, fieldName); +// System.out.println("facets = " + facets); +// MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); +// DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); +// System.out.println("aggregate.first() = " + aggregate.first()); +// +// long outOfRange = 0; +// List<Long> rangeValues = new ArrayList<>(Arrays.asList(0L, 0L, 0L, 0L, 0L)); +// +// Map<String, Integer> map = new HashMap<>(); +// for (Document result : matchedResults.getResults()) { +// int bucketNum; +// Long value = result.getLong("number"); +// if (value != null) { +// bucketNum = (int) (value - start) / step; +// int numSections = (int) Math.ceil((end - start + 1) / step); +// if (value < start || bucketNum > numSections) { +// outOfRange++; +// } else { +// rangeValues.set(bucketNum, 1 + rangeValues.get(bucketNum)); +// } +// } +// } +// System.out.println("rangeValues = " + rangeValues); +// System.out.println("outOfRange = " + outOfRange); +// +// for (List<FacetField> result : aggregate.getResults()) { +// Assert.assertEquals(1, result.size()); +// for (FacetField facetField : result) { +// Assert.assertEquals(outOfRange + rangeValues.stream().mapToLong(Long::longValue).sum(), facetField.getCount().longValue()); +// Assert.assertEquals(rangeValues.size() + 1, facetField.getBuckets().size()); +// for (int i = 0; i < facetField.getBuckets().size(); i++) { +// FacetField.Bucket bucket = facetField.getBuckets().get(i); +// if (bucket.getValue().equals("Other")) { +// Assert.assertEquals(outOfRange, bucket.getCount()); +// } else { +// Assert.assertEquals(rangeValues.get(i).longValue(), bucket.getCount()); +// } +// } +// } +// } +// } + + @Test + public void testFacetRange() { + DataResult<Document> matchedResults = mongoDBCollectionFacetRange.find(new Document(), null); + for (Document result : matchedResults.getResults()) { + System.out.println("age = " + result.get("age")); + } + + double start = 3; + double end = 10; + double step = 2; + String fieldName = "age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + System.out.println("fieldName = " + fieldName); + List<Bson> facets = MongoDBQueryUtils.createFacet(new Document(), fieldName); + System.out.println("facets = " + facets); + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollectionFacetRange.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + Map<String, Integer> map = new HashMap<>(); + map.put("3.0", 2); + map.put("5.0", 0); + map.put("7.0", 0); + map.put("9.0", 2); + map.put("Other", 3); + Assert.assertTrue(aggregate.first().get(0).getBuckets().size() > 0); + for (FacetField.Bucket bucket : aggregate.first().get(0).getBuckets()) { + Assert.assertTrue(map.containsKey(bucket.getValue())); + Assert.assertEquals(map.get(bucket.getValue()), bucket.getCount(), 0.0001); + } + Assert.assertEquals(start, aggregate.first().get(0).getStart()); + Assert.assertEquals(end, aggregate.first().get(0).getEnd()); + Assert.assertEquals(step, aggregate.first().get(0).getStep()); + + step = 3; + System.out.println(); + fieldName = "age" + RANGE_MARK1 + start + RANGE_MARK + end + RANGE_MARK2 + ":" + step; + System.out.println("fieldName = " + fieldName); + facets = MongoDBQueryUtils.createFacet(new Document(), fieldName); + System.out.println("facets = " + facets); + converter = new MongoDBDocumentToFacetFieldsConverter(); + aggregate = mongoDBCollectionFacetRange.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + map.clear(); + map.put("3.0", 2); + map.put("6.0", 0); + map.put("9.0", 3); + map.put("Other", 2); + Assert.assertTrue(aggregate.first().get(0).getBuckets().size() > 0); + for (FacetField.Bucket bucket : aggregate.first().get(0).getBuckets()) { + Assert.assertTrue(map.containsKey(bucket.getValue())); + Assert.assertEquals(map.get(bucket.getValue()), bucket.getCount(), 0.0001); + } + Assert.assertEquals(start, aggregate.first().get(0).getStart()); + Assert.assertEquals(end, aggregate.first().get(0).getEnd()); + Assert.assertEquals(step, aggregate.first().get(0).getStep()); + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[toto0..20000]:1000"); + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat1() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[0:20000]:1000"); + } + + @Test(expected = IllegalArgumentException.class) + public void testFacetInvalidRangeFormat2() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + MongoDBQueryUtils.createFacet(match, "house.m2[toto0..20000]..1000"); + } + + @Test + public void testFacetYear() { + Date date = new Date(); + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); + System.out.println("sdf.format(date) = " + sdf.format(date)); + + Document match = new Document("age", new BasicDBObject("$gt", 2)); + + String facetField = "date[YEAR]"; + System.out.println("\nfacetField = " + facetField); + List<Bson> facets = createFacet(match, facetField); + System.out.println("\nyear counts for 'date'; facets = " + facets); + + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + + Assert.assertEquals(year.name(), aggregate.first().get(0).getAggregationName()); + } + + @Test + public void testFacetMonth() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + + String facetField = "date[MONTH]"; + System.out.println("\nfacetField = " + facetField); + List<Bson> facets = createFacet(match, facetField); + System.out.println("\nmonth counts for 'date'; facets = " + facets); + + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + + Assert.assertEquals(year.name() + SEPARATOR + month.name(), aggregate.first().get(0).getAggregationName()); + } + + @Test + public void testFacetDay() { + Document match = new Document("age", new BasicDBObject("$gt", 2)); + + String facetField = "date[DAY]"; + System.out.println("\nfacetField = " + facetField); + List<Bson> facets = createFacet(match, facetField); + System.out.println("\nmonth counts for 'date'; facets = " + facets); + + MongoDBDocumentToFacetFieldsConverter converter = new MongoDBDocumentToFacetFieldsConverter(); + DataResult<List<FacetField>> aggregate = mongoDBCollection.aggregate(facets, converter, null); + System.out.println("aggregate.first() = " + aggregate.first()); + + Assert.assertEquals(year.name() + SEPARATOR + month.name() + SEPARATOR + day.name(), aggregate.first().get(0).getAggregationName()); + } + @Test public void testInsert() throws Exception { Long countBefore = mongoDBCollectionInsertTest.count().getNumMatches(); diff --git a/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java b/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java index 4dcd7973..ac2a0e7f 100644 --- a/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java +++ b/commons-datastore/commons-datastore-solr/src/main/java/org/opencb/commons/datastore/solr/SolrFacetToFacetFieldsConverter.java @@ -67,12 +67,12 @@ public static List<FacetField> convert(QueryResponse solrResponse, Map<String, S * @param defaultCount Default count * @return Actual count */ - private static int getBucketCount(SimpleOrderedMap<Object> solrFacets, int defaultCount) { + private static long getBucketCount(SimpleOrderedMap<Object> solrFacets, long defaultCount) { List<SimpleOrderedMap<Object>> solrBuckets = (List<SimpleOrderedMap<Object>>) solrFacets.get("buckets"); if (solrBuckets == null) { for (int i = 0; i < solrFacets.size(); i++) { if (solrFacets.getName(i).equals("count")) { - return (int) solrFacets.getVal(i); + return (long) solrFacets.getVal(i); } } }