Skip to content

Commit

Permalink
fix (RDF): composite key objects (#4566)
Browse files Browse the repository at this point in the history
* code syntax fixes + removed unused code

* added test for composite key reference/refback

* simplified REFERENCE code

* Added test for empty values

* minor changes

* Fixed composite key null check

* working tests for composite REFERENCE (without ARRAY or REFBACK)

* fixed composite REF_ARRAY test

* updated code, still disabled composite REFBACK due to current behaviour

* Included REFBACK test (fails until other bug is fixed)

* Ensure an unmodifiable set is returned

* Updated/added some PrimaryKey tests

* simplified assert code

* Fixes for merge conflicts

* Added constant for subselect separator

* Added refback-specific code (and generalized some code to prevent code duplicity)

* Added code comment

* added refback logic for initial query

* localhost is now stored in a constant

* some formatting fixes

* wip: refbacks to composite keys with composite keys

* improved triple validation code

* updated tests

* quick fix in case of teardown failing

* simplified test code

* fixed refbacks to non-composite reference primary key

* additional fixes

* auto-formatting
  • Loading branch information
svandenhoek authored Feb 4, 2025
1 parent 6f8aa2c commit b75780d
Show file tree
Hide file tree
Showing 8 changed files with 825 additions and 503 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import static java.util.Map.entry;
import static org.eclipse.rdf4j.model.util.Values.literal;
import static org.molgenis.emx2.Constants.API_FILE;
import static org.molgenis.emx2.Constants.COMPOSITE_REF_SEPARATOR;
import static org.molgenis.emx2.Constants.SUBSELECT_SEPARATOR;
import static org.molgenis.emx2.rdf.RdfUtils.getSchemaNamespace;

import com.google.common.net.UrlEscapers;
Expand All @@ -11,7 +13,6 @@
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.base.CoreDatatype;
Expand Down Expand Up @@ -67,7 +68,7 @@ public class ColumnTypeRdfMapper {
// RELATIONSHIP
entry(ColumnType.REF, RdfColumnType.REFERENCE),
entry(ColumnType.REF_ARRAY, RdfColumnType.REFERENCE),
entry(ColumnType.REFBACK, RdfColumnType.REFERENCE),
entry(ColumnType.REFBACK, RdfColumnType.REFBACK),

// LAYOUT and other constants
entry(ColumnType.HEADING, RdfColumnType.SKIP), // Should not be in RDF output.
Expand Down Expand Up @@ -109,10 +110,8 @@ public static CoreDatatype.XSD getCoreDataType(ColumnType columnType) {
* </ul>
*/
public Set<Value> retrieveValues(final Row row, final Column column) {
if (row.getString(column.getName()) == null) {
return Set.of();
}
return mapping.get(column.getColumnType()).retrieveValues(baseURL, row, column);
RdfColumnType mapper = mapping.get(column.getColumnType());
return (mapper.isEmpty(row, column) ? Set.of() : mapper.retrieveValues(baseURL, row, column));
}

private enum RdfColumnType {
Expand Down Expand Up @@ -213,40 +212,65 @@ Set<Value> retrieveValues(String baseURL, Row row, Column column) {
REFERENCE(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
final TableMetadata target = column.getRefTable();
final String rootTableName =
UrlEscapers.urlPathSegmentEscaper().escape(target.getRootTable().getIdentifier());
final Namespace ns = getSchemaNamespace(baseURL, target.getRootTable().getSchema());

final Set<IRI> iris = new HashSet<>();
final Map<Integer, Map<String, String>> items = new HashMap<>();
for (final Reference reference : column.getReferences()) {
final String localColumn = reference.getName();
final String targetColumn = reference.getRefTo();
if (column.isArray()) {
final String[] values = row.getStringArray(localColumn);
if (values != null) {
for (int i = 0; i < values.length; i++) {
var keyValuePairs = items.getOrDefault(i, new LinkedHashMap<>());
keyValuePairs.put(targetColumn, values[i]);
items.put(i, keyValuePairs);
}
}
Map<String, String> colNameToRefTableColName =
column.getReferences().stream()
.collect(Collectors.toMap(Reference::getName, Reference::getRefTo));
return RdfColumnType.retrieveReferenceValues(
baseURL, row, column, colNameToRefTableColName);
}

@Override
boolean isEmpty(Row row, Column column) {
// Composite key requires all fields to be filled. If one is null, all should be null.
return row.getString(column.getReferences().get(0).getName()) == null;
}
},
REFBACK(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
Map<String, String> colNameToRefTableColName = new HashMap<>();
if (row.getString(column.getName()) != null) {
colNameToRefTableColName.put(
column.getName(), column.getRefTable().getPrimaryKeyColumns().get(0).getName());
} else {
refBackSubColumns(
colNameToRefTableColName, column, column.getName() + SUBSELECT_SEPARATOR, "");
}

return RdfColumnType.retrieveReferenceValues(
baseURL, row, column, colNameToRefTableColName);
}

private void refBackSubColumns(
Map<String, String> colNameToRefTableColName,
Column column,
String colPrefix,
String refPrefix) {
for (Column refPrimaryKey : column.getRefTable().getPrimaryKeyColumns()) {
if (refPrimaryKey.isRef() || refPrimaryKey.isRefArray()) {
refBackSubColumns(
colNameToRefTableColName,
refPrimaryKey,
colPrefix + refPrimaryKey.getName() + SUBSELECT_SEPARATOR,
refPrefix + refPrimaryKey.getName() + COMPOSITE_REF_SEPARATOR);
} else {
final String value = row.getString(localColumn);
if (value != null) {
var keyValuePairs = items.getOrDefault(0, new LinkedHashMap<>());
keyValuePairs.put(targetColumn, value);
items.put(0, keyValuePairs);
}
colNameToRefTableColName.put(
colPrefix + refPrimaryKey.getName(), refPrefix + refPrimaryKey.getName());
}
}
}

for (final var item : items.values()) {
PrimaryKey key = new PrimaryKey(item);
iris.add(Values.iri(ns, rootTableName + "?" + key.getEncodedValue()));
}
return Set.copyOf(iris);
@Override
boolean isEmpty(Row row, Column column) {
if (row.getString(column.getName()) != null) return false;

// Composite key requires all fields to be filled. If one is null, all should be null.
Optional<String> firstMatch =
row.getColumnNames().stream()
.filter(i -> i.startsWith(column.getName() + SUBSELECT_SEPARATOR))
.findFirst();

return firstMatch.isEmpty() || row.getString(firstMatch.get()) == null;
}
},
ONTOLOGY(CoreDatatype.XSD.ANYURI) {
Expand Down Expand Up @@ -283,7 +307,7 @@ public CoreDatatype.XSD getCoreDatatype() {
private static Set<Value> basicRetrieval(Object[] object, Function<Object, Value> function) {
return Arrays.stream(object)
.map(value -> (Value) function.apply(value))
.collect(Collectors.toSet());
.collect(Collectors.toUnmodifiableSet());
}

/**
Expand All @@ -299,9 +323,47 @@ private static Set<Value> basicRetrievalString(
String[] object, Function<String, Value> function) {
return Arrays.stream(object)
.map(value -> (Value) function.apply(value))
.collect(Collectors.toSet());
.collect(Collectors.toUnmodifiableSet());
}

abstract Set<Value> retrieveValues(final String baseURL, final Row row, final Column column);

boolean isEmpty(final Row row, final Column column) {
return row.getString(column.getName()) == null;
}

private static Set<Value> retrieveReferenceValues(
final String baseURL,
final Row row,
final Column tableColumn,
final Map<String, String> colNameToRefTableColName) {
final TableMetadata target = tableColumn.getRefTable();
final String rootTableName =
UrlEscapers.urlPathSegmentEscaper().escape(target.getRootTable().getIdentifier());
final Namespace ns = getSchemaNamespace(baseURL, target.getRootTable().getSchema());

final Map<Integer, Map<String, String>> items = new HashMap<>();
for (final String colName : colNameToRefTableColName.keySet()) {
final String[] values =
(tableColumn.isArray()
? row.getStringArray(colName)
: new String[] {row.getString(colName)});

if (values == null) continue;

for (int i = 0; i < values.length; i++) {
Map<String, String> keyValuePairs = items.getOrDefault(i, new LinkedHashMap<>());
keyValuePairs.put(colNameToRefTableColName.get(colName), values[i]);
items.put(i, keyValuePairs);
}
}

final Set<Value> values = new HashSet<>();
for (final Map<String, String> item : items.values()) {
PrimaryKey key = new PrimaryKey(item);
values.add(Values.iri(ns, rootTableName + "?" + key.getEncodedValue()));
}
return Set.copyOf(values);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,10 @@ private List<Row> getRows(Table table, final String rowId) {
for (Column c : table.getMetadata().getColumns()) {
if (c.isFile()) {
selectColumns.add(s(c.getName(), s("id"), s("filename"), s("mimetype")));
} else if (c.isReference()) {
} else if (c.isRef() || c.isRefArray()) {
c.getReferences().forEach(i -> selectColumns.add(s(i.getName())));
} else if (c.isRefback()) {
selectColumns.add(refBackSelect(c));
} else {
selectColumns.add(s(c.getName()));
}
Expand All @@ -611,6 +613,18 @@ private List<Row> getRows(Table table, final String rowId) {
}
}

private SelectColumn refBackSelect(Column column) {
List<SelectColumn> subSelects = new ArrayList<>();
for (Column subColumn : column.getRefTable().getPrimaryKeyColumns()) {
if (subColumn.isRef() || subColumn.isRefArray()) {
subSelects.add(refBackSelect(subColumn));
} else {
subSelects.add(s(subColumn.getName()));
}
}
return s(column.getName(), subSelects.toArray(SelectColumn[]::new));
}

private IRI getIriForRow(final Row row, final Table table) {
return getIriForRow(row, table.getMetadata());
}
Expand Down
Loading

0 comments on commit b75780d

Please sign in to comment.