Skip to content

Commit

Permalink
Fix bug where ingestion failed for input document containing list of …
Browse files Browse the repository at this point in the history
…nested objects (#1040)

* Fix bug where ingestion failed for input document containing list of nested objects

Signed-off-by: Yizhe Liu <[email protected]>

* Address comments to use better method name/implementation

Signed-off-by: Yizhe Liu <[email protected]>

* Address comments: modify the test case to have doc with various fields

Signed-off-by: Yizhe Liu <[email protected]>

---------

Signed-off-by: Yizhe Liu <[email protected]>
  • Loading branch information
yizheliu-amazon authored Jan 3, 2025
1 parent ee24b1c commit 90df6c9
Show file tree
Hide file tree
Showing 3 changed files with 286 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,23 +434,26 @@ private void putNLPResultToSourceMapForMapType(
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
if (sourceAndMetadataMap.get(processorKey) instanceof List) {
// build nlp output for list of nested objects
Iterator<Object> inputNestedMapValueIt = ((List<Object>) inputNestedMapEntry.getValue()).iterator();
for (Map<String, Object> nestedElement : (List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey)) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++));
}
if (inputNestedMapEntry.getValue() instanceof List) {
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
(List<Object>) inputNestedMapEntry.getValue()
);
} else if (inputNestedMapEntry.getValue() instanceof Map) {
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
inputNestedMapEntry.getValue()
);
}
} else {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap;
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceMap = new HashMap<>();
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSourceMapForMapType(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
Expand All @@ -471,6 +474,97 @@ private void putNLPResultToSourceMapForMapType(
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
List<Object> inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is list type
Iterator<Object> inputNestedMapValueIt = inputNestedMapEntryValue.iterator();
for (Map<String, Object> nestedElement : sourceAndMetadataMapValueInList) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntryKey, results.get(indexWrapper.index++));
}
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
Object inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is map type
Iterator<Map<String, Object>> iterator = sourceAndMetadataMapValueInList.iterator();
IntStream.range(0, sourceAndMetadataMapValueInList.size()).forEach(index -> {
Map<String, Object> nestedElement = iterator.next();
putNLPResultToSingleSourceMapInList(
inputNestedMapEntryKey,
inputNestedMapEntryValue,
results,
indexWrapper,
nestedElement,
index
);
});
}

/**
* Put nlp result to single source element, which is in a list field of source document
* Such source element is in map type
*
* @param processorKey
* @param sourceValue
* @param results
* @param indexWrapper
* @param sourceAndMetadataMap
* @param nestedElementIndex index of the element in the list field of source document
*/
@SuppressWarnings("unchecked")
private void putNLPResultToSingleSourceMapInList(
String processorKey,
Object sourceValue,
List<?> results,
IndexWrapper indexWrapper,
Map<String, Object> sourceAndMetadataMap,
int nestedElementIndex
) {
if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return;
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSingleSourceMapInList(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
results,
indexWrapper,
sourceMap,
nestedElementIndex
);
}
} else {
if (sourceValue instanceof List && ((List<Object>) sourceValue).get(nestedElementIndex) != null) {
sourceAndMetadataMap.merge(processorKey, results.get(indexWrapper.index++), REMAPPING_FUNCTION);
}
}
}

@SuppressWarnings("unchecked")
private Map<String, Object> getSourceMapBySourceAndMetadataMap(String processorKey, Map<String, Object> sourceAndMetadataMap) {
Map<String, Object> sourceMap = new HashMap<>();
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
return sourceMap;
}

private List<Map<String, Object>> buildNLPResultForListType(List<String> sourceValue, List<?> results, IndexWrapper indexWrapper) {
List<Map<String, Object>> keyToResult = new ArrayList<>();
IntStream.range(0, sourceValue.size())
Expand Down
Loading

0 comments on commit 90df6c9

Please sign in to comment.