Skip to content

Commit

Permalink
INSIGHT-1247: scrollable result with pairs (#20)
Browse files Browse the repository at this point in the history
* INSIGHT-1247: scrollable reulst with pairs
  • Loading branch information
Andrei Nechaev authored Nov 23, 2021
1 parent 138421d commit 9fd0009
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;
import static org.nuxeo.ai.sdk.rest.Common.DEFAULT_XPATH;
import static org.nuxeo.ai.sdk.rest.Common.Headers.SCROLL_ID_HEADER;
import static org.nuxeo.ai.sdk.rest.Common.THRESHOLD_PARAM;
import static org.nuxeo.ai.sdk.rest.Common.UID;
import static org.nuxeo.ai.sdk.rest.Common.XPATH_PARAM;
import static org.nuxeo.ai.sdk.rest.Common.Headers.SCROLL_ID_HEADER;

import java.io.IOException;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;

import org.apache.commons.lang3.tuple.Pair;
import org.junit.Test;
import org.nuxeo.ai.sdk.objects.TensorInstances;
import org.nuxeo.ai.sdk.objects.deduplication.ScrollableResult;
Expand Down Expand Up @@ -101,18 +101,21 @@ public void shouldFindTuples() throws IOException {
assertThat(result.getResult()).isNotEmpty();
assertThat(result.getResult().get(0).getDocumentId()).isNotEmpty();
assertThat(result.getResult().get(0).getXpath()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocumentIds()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocumentIds()).containsExactlyInAnyOrder("doc12", "doc13",
"doc14", "doc15", "doc16", "doc17", "doc18");
assertThat(result.getResult().get(0).getSimilarDocuments()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocuments()).containsExactlyInAnyOrder(
Pair.of("doc12", "file:content"), Pair.of("doc13", "file:content"), Pair.of("doc14", "file:content"),
Pair.of("doc15", "file:content"), Pair.of("doc16", "file:content"), Pair.of("doc17", "file:content"),
Pair.of("doc18", "file:content"));

result = client.api(Dedup.ALL).call(singletonMap(SCROLL_ID_HEADER, scrollId));
assertThat(result).isNotNull();
assertThat(scrollId).isNotEmpty();
assertThat(result.getResult()).isNotEmpty();
assertThat(result.getResult().get(0).getDocumentId()).isNotEmpty();
assertThat(result.getResult().get(0).getXpath()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocumentIds()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocumentIds()).containsExactlyInAnyOrder("doc121", "doc123");
assertThat(result.getResult().get(0).getSimilarDocuments()).isNotEmpty();
assertThat(result.getResult().get(0).getSimilarDocuments()).containsExactlyInAnyOrder(
Pair.of("doc121", "file:content"), Pair.of("doc123", "file:content"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,22 @@
{
"documentId": "doc10",
"xpath": "file:content",
"similarDocumentIds": [
"doc12", "doc13", "doc14", "doc15", "doc16", "doc17", "doc18"
"similarDocuments": [
{
"doc12": "file:content"
}, {
"doc13": "file:content"
}, {
"doc14": "file:content"
}, {
"doc15": "file:content"
}, {
"doc16": "file:content"
}, {
"doc17": "file:content"
}, {
"doc18": "file:content"
}
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"request": {
"method": "GET",
"url": "/api/v1/ai/dedup/test/similars",
"headers" : {
"headers": {
"Scroll-Id": {
"equalTo": "DXF1ZXJ5QW5kRmV0Y2gB...=="
}
Expand All @@ -16,8 +16,12 @@
{
"documentId": "doc10",
"xpath": "file:content",
"similarDocumentIds": [
"doc121", "doc123"
"similarDocuments": [
{
"doc121": "file:content"
}, {
"doc123": "file:content"
}
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,80 @@
*/
package org.nuxeo.ai.sdk.objects.deduplication;

import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

/**
* POJO representing a similar document tuple with the source document id, the given xpath and the similar documents.
*/
@JsonDeserialize(using = SimilarTuple.SimilarTupleDeserializer.class)
public class SimilarTuple implements Serializable {

private static final long serialVersionUID = -8481819084080610860L;
private static final long serialVersionUID = -1000077427339197687L;

protected String documentId;

protected String xpath;

protected List<String> similarDocumentIds;
protected Set<Pair<String, String>> similarDocuments;

public SimilarTuple() {
}

public SimilarTuple(String documentId, String xpath, List<String> similarDocumentIds) {
public SimilarTuple(String documentId, String xpath, Set<Pair<String, String>> similarDocuments) {
this.documentId = documentId;
this.xpath = xpath;
this.similarDocumentIds = similarDocumentIds;
this.similarDocuments = similarDocuments;
}

public String getDocumentId() {
return documentId;
}

public List<String> getSimilarDocumentIds() {
return similarDocumentIds;
public Set<Pair<String, String>> getSimilarDocuments() {
return similarDocuments;
}

public String getXpath() {
return xpath;
}

public void setXpath(String xpath) {
this.xpath = xpath;
public static class SimilarTupleDeserializer extends JsonDeserializer<SimilarTuple> {
@Override
public SimilarTuple deserialize(JsonParser jsonParser, DeserializationContext deserializationContext)
throws IOException {
TypeReference<Map<String, Object>> ref = new TypeReference<Map<String, Object>>() {
};
Map<String, Object> object = jsonParser.readValueAs(ref);
String docId = (String) object.get("documentId");
String xpath = (String) object.get("xpath");
@SuppressWarnings("unchecked")
List<Map<String, String>> similarDocumentsList = (List<Map<String, String>>) object.get("similarDocuments");
Set<Pair<String, String>> similarDocuments = similarDocumentsList.stream()
.filter(elem -> !elem.isEmpty())
.filter(elem -> elem.keySet()
.stream()
.findAny()
.isPresent())
.map(elem -> {
String key = elem.keySet()
.stream()
.findAny()
.get();
return Pair.of(key, elem.get(key));
})
.collect(Collectors.toSet());
return new SimilarTuple(docId, xpath, similarDocuments);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import static java.util.Collections.singletonList;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.Arrays;
import org.apache.commons.lang3.tuple.Pair;
import org.assertj.core.util.Sets;
import org.junit.Test;
import org.nuxeo.ai.sdk.objects.deduplication.ScrollableResult;
import org.nuxeo.ai.sdk.objects.deduplication.SimilarTuple;
Expand All @@ -44,14 +45,19 @@ public void shouldSerialize() throws JsonProcessingException {
assertThat(deserialized.getResult()).isEmpty();
assertThat(deserialized.getScrollId()).isNull();

SimilarTuple similarTuple = new SimilarTuple("doc1", "file:content", Arrays.asList("doc2", "doc3"));
SimilarTuple similarTuple = new SimilarTuple("doc1", "file:content",
Sets.newLinkedHashSet(Pair.of("doc2", "file:content"), Pair.of("doc3", "file:content")));
ScrollableResult result = new ScrollableResult("test", singletonList(similarTuple));
json = MAPPER.writeValueAsString(result);
assertThat(json).isNotEmpty();

deserialized = MAPPER.readValue(json, ScrollableResult.class);
assertThat(deserialized).isNotNull();
assertThat(deserialized.getResult()).isNotEmpty();
assertThat(deserialized.getScrollId()).isEqualTo("test");
assertThat(deserialized.getResult()).isNotEmpty();
assertThat(deserialized.getResult().get(0)).isNotNull();
assertThat(deserialized.getResult().get(0).getSimilarDocuments()).isNotEmpty();
assertThat(deserialized.getResult().get(0).getSimilarDocuments()).containsExactlyInAnyOrder(
Pair.of("doc2", "file:content"), Pair.of("doc3", "file:content"));
}
}

0 comments on commit 9fd0009

Please sign in to comment.