Skip to content

Commit

Permalink
Fix entity counting
Browse files Browse the repository at this point in the history
  • Loading branch information
jnsrnhld committed Jan 9, 2025
1 parent 303734d commit d96e881
Showing 1 changed file with 25 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
package com.bakdata.conquery.mode.local;

import static org.jooq.impl.DSL.*;
import static org.jooq.impl.DSL.asterisk;
import static org.jooq.impl.DSL.count;
import static org.jooq.impl.DSL.countDistinct;
import static org.jooq.impl.DSL.field;
import static org.jooq.impl.DSL.max;
import static org.jooq.impl.DSL.min;
import static org.jooq.impl.DSL.name;
import static org.jooq.impl.DSL.noCondition;
import static org.jooq.impl.DSL.noField;
import static org.jooq.impl.DSL.table;

import java.sql.Date;
import java.util.ArrayList;
Expand All @@ -15,6 +24,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.bakdata.conquery.models.common.daterange.CDateRange;
import com.bakdata.conquery.models.config.DatabaseConfig;
Expand Down Expand Up @@ -51,7 +61,7 @@
import org.jooq.Name;
import org.jooq.Record;
import org.jooq.Select;
import org.jooq.SelectJoinStep;
import org.jooq.SelectHavingStep;
import org.jooq.Table;

@Slf4j
Expand Down Expand Up @@ -168,23 +178,24 @@ private void calculateMatchingStats(final TreeConcept treeConcept) {
.map(field -> field(field.getUnqualifiedName()))
.collect(Collectors.toList());

// group by columns - because the same entity may satisfy guard conditions in multiple nodes, we have to group by primary id and we will deduplicate the
// entities in Java
final List<Field<?>> groupByColumns = Stream.concat(Stream.of(field(ENTITIES)), relevantColumnsAliased.stream()).toList();

// if there is no validity date at all, no field is selected
final Field<?> validityDateExpression = toValidityDateExpression(validityDateMap);

final SelectJoinStep<Record> query = dslContext.select(relevantColumnsAliased)
.select(
count(asterisk()).as(EVENTS),
countDistinct(field(ENTITIES)).as(ENTITIES),
validityDateExpression.as(DATES)
)
.from(unioned);

// not all dialects accept an empty group by () clause
final Select<Record> finalQuery = relevantColumnsAliased.isEmpty() ? query : query.groupBy(relevantColumnsAliased);
final SelectHavingStep<Record> query = dslContext.select(relevantColumnsAliased)
.select(
count(asterisk()).as(EVENTS),
countDistinct(field(ENTITIES)).as(ENTITIES),
validityDateExpression.as(DATES)
)
.from(unioned)
.groupBy(groupByColumns);

final ConceptTreeCache treeCache = new ConceptTreeCache(treeConcept);
executionService.fetchStream(finalQuery)
.forEach(record -> mapRecordToConceptElements(treeConcept, record, treeCache));
executionService.fetchStream(query).forEach(record -> mapRecordToConceptElements(treeConcept, record, treeCache));
}

/**
Expand Down

0 comments on commit d96e881

Please sign in to comment.