From 4ab16102172ab93246593de338b1f9ded91d0b22 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 2 Dec 2024 20:01:56 +0530 Subject: [PATCH 01/72] Adds Support of maxNumRowsPerTask in RealtimeToOfflineSegmentsTasksGenerator --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 95 ++++++++++++------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 73ff19ebef9f..d735db922fb6 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -86,6 +86,7 @@ public class RealtimeToOfflineSegmentsTaskGenerator extends BaseTaskGenerator { private static final String DEFAULT_BUCKET_PERIOD = "1d"; private static final String DEFAULT_BUFFER_PERIOD = "2d"; + private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 50_000_000; @Override public String getTaskType() { @@ -158,6 +159,11 @@ public List generateTasks(List tableConfigs) { List downloadURLs = new ArrayList<>(); Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); boolean skipGenerate = false; + + long numRecordsPerTask = 0; + List> segmentNamesGroupList = new ArrayList<>(); + List> downloadURLsGroupList = new ArrayList<>(); + while (true) { // Check that execution window is older than bufferTime if (windowEndMs > System.currentTimeMillis() - bufferMs) { @@ -168,7 +174,16 @@ public List generateTasks(List tableConfigs) { break; } - for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + int maxNumRecordsPerTask = + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null + ? Integer.parseInt( + taskConfigs.get(MinionConstants.MergeRollupTask.MAX_NUM_RECORDS_PER_TASK_KEY)) + : DEFAULT_MAX_NUM_RECORDS_PER_TASK; + + for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); + segmentZkMetadataIndex++) { + SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); + String segmentName = segmentZKMetadata.getSegmentName(); long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); @@ -186,6 +201,17 @@ public List generateTasks(List tableConfigs) { } segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + + numRecordsPerTask += segmentZKMetadata.getTotalDocs(); + + if ((numRecordsPerTask >= maxNumRecordsPerTask) + || (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + numRecordsPerTask = 0; + segmentNames = new ArrayList<>(); + downloadURLs = new ArrayList<>(); + } } } if (skipGenerate || !segmentNames.isEmpty()) { @@ -202,39 +228,44 @@ public List generateTasks(List tableConfigs) { continue; } - Map configs = MinionTaskUtils.getPushTaskConfig(realtimeTableName, taskConfigs, - _clusterInfoAccessor); - configs.putAll(getBaseTaskConfigs(tableConfig, segmentNames)); - configs.put(MinionConstants.DOWNLOAD_URL_KEY, StringUtils.join(downloadURLs, MinionConstants.URL_SEPARATOR)); - configs.put(MinionConstants.UPLOAD_URL_KEY, _clusterInfoAccessor.getVipUrl() + "/segments"); - - // Segment processor configs - configs.put(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY, String.valueOf(windowStartMs)); - configs.put(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY, String.valueOf(windowEndMs)); - String roundBucketTimePeriod = taskConfigs.get(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY); - if (roundBucketTimePeriod != null) { - configs.put(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, roundBucketTimePeriod); - } - // NOTE: Check and put both keys for backward-compatibility - String mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY); - if (mergeType == null) { - mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY); - } - if (mergeType != null) { - configs.put(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, mergeType); - configs.put(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY, mergeType); - } - for (Map.Entry entry : taskConfigs.entrySet()) { - if (entry.getKey().endsWith(RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)) { - configs.put(entry.getKey(), entry.getValue()); + for (int segmentNameListIndex = 0; segmentNameListIndex < segmentNamesGroupList.size(); segmentNameListIndex++) { + List segmentNameList = segmentNamesGroupList.get(segmentNameListIndex); + List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); + + Map configs = MinionTaskUtils.getPushTaskConfig(realtimeTableName, taskConfigs, + _clusterInfoAccessor); + configs.putAll(getBaseTaskConfigs(tableConfig, segmentNameList)); + configs.put(MinionConstants.DOWNLOAD_URL_KEY, StringUtils.join(downloadURLList, MinionConstants.URL_SEPARATOR)); + configs.put(MinionConstants.UPLOAD_URL_KEY, _clusterInfoAccessor.getVipUrl() + "/segments"); + + // Segment processor configs + configs.put(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY, String.valueOf(windowStartMs)); + configs.put(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY, String.valueOf(windowEndMs)); + String roundBucketTimePeriod = taskConfigs.get(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY); + if (roundBucketTimePeriod != null) { + configs.put(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, roundBucketTimePeriod); + } + // NOTE: Check and put both keys for backward-compatibility + String mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY); + if (mergeType == null) { + mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY); + } + if (mergeType != null) { + configs.put(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, mergeType); + configs.put(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY, mergeType); + } + for (Map.Entry entry : taskConfigs.entrySet()) { + if (entry.getKey().endsWith(RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)) { + configs.put(entry.getKey(), entry.getValue()); + } + } + String maxNumRecordsPerSegment = taskConfigs.get(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY); + if (maxNumRecordsPerSegment != null) { + configs.put(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY, maxNumRecordsPerSegment); } - } - String maxNumRecordsPerSegment = taskConfigs.get(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY); - if (maxNumRecordsPerSegment != null) { - configs.put(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY, maxNumRecordsPerSegment); - } - pinotTaskConfigs.add(new PinotTaskConfig(taskType, configs)); + pinotTaskConfigs.add(new PinotTaskConfig(taskType, configs)); + } LOGGER.info("Finished generating task configs for table: {} for task: {}", realtimeTableName, taskType); } return pinotTaskConfigs; From c71bcac9ace6d3fd91d865d9f1ebefbfcafe91d0 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 2 Dec 2024 21:28:25 +0530 Subject: [PATCH 02/72] refactoring --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 77 +++++++++++-------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index d735db922fb6..428a0c3f152e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -231,40 +231,9 @@ public List generateTasks(List tableConfigs) { for (int segmentNameListIndex = 0; segmentNameListIndex < segmentNamesGroupList.size(); segmentNameListIndex++) { List segmentNameList = segmentNamesGroupList.get(segmentNameListIndex); List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); - - Map configs = MinionTaskUtils.getPushTaskConfig(realtimeTableName, taskConfigs, - _clusterInfoAccessor); - configs.putAll(getBaseTaskConfigs(tableConfig, segmentNameList)); - configs.put(MinionConstants.DOWNLOAD_URL_KEY, StringUtils.join(downloadURLList, MinionConstants.URL_SEPARATOR)); - configs.put(MinionConstants.UPLOAD_URL_KEY, _clusterInfoAccessor.getVipUrl() + "/segments"); - - // Segment processor configs - configs.put(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY, String.valueOf(windowStartMs)); - configs.put(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY, String.valueOf(windowEndMs)); - String roundBucketTimePeriod = taskConfigs.get(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY); - if (roundBucketTimePeriod != null) { - configs.put(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, roundBucketTimePeriod); - } - // NOTE: Check and put both keys for backward-compatibility - String mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY); - if (mergeType == null) { - mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY); - } - if (mergeType != null) { - configs.put(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, mergeType); - configs.put(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY, mergeType); - } - for (Map.Entry entry : taskConfigs.entrySet()) { - if (entry.getKey().endsWith(RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)) { - configs.put(entry.getKey(), entry.getValue()); - } - } - String maxNumRecordsPerSegment = taskConfigs.get(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY); - if (maxNumRecordsPerSegment != null) { - configs.put(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY, maxNumRecordsPerSegment); - } - - pinotTaskConfigs.add(new PinotTaskConfig(taskType, configs)); + pinotTaskConfigs.add( + createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, + windowStartMs, windowEndMs, taskType)); } LOGGER.info("Finished generating task configs for table: {} for task: {}", realtimeTableName, taskType); } @@ -391,4 +360,44 @@ public void validateTaskConfigs(TableConfig tableConfig, Map tas } } } + + private PinotTaskConfig createPinotTaskConfig(List segmentNameList, List downloadURLList, + String realtimeTableName, Map taskConfigs, TableConfig tableConfig, long windowStartMs, + long windowEndMs, String taskType) { + + Map configs = MinionTaskUtils.getPushTaskConfig(realtimeTableName, taskConfigs, + _clusterInfoAccessor); + configs.putAll(getBaseTaskConfigs(tableConfig, segmentNameList)); + configs.put(MinionConstants.DOWNLOAD_URL_KEY, StringUtils.join(downloadURLList, MinionConstants.URL_SEPARATOR)); + configs.put(MinionConstants.UPLOAD_URL_KEY, _clusterInfoAccessor.getVipUrl() + "/segments"); + + // Segment processor configs + configs.put(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY, String.valueOf(windowStartMs)); + configs.put(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY, String.valueOf(windowEndMs)); + String roundBucketTimePeriod = taskConfigs.get(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY); + if (roundBucketTimePeriod != null) { + configs.put(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, roundBucketTimePeriod); + } + // NOTE: Check and put both keys for backward-compatibility + String mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY); + if (mergeType == null) { + mergeType = taskConfigs.get(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY); + } + if (mergeType != null) { + configs.put(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, mergeType); + configs.put(RealtimeToOfflineSegmentsTask.COLLECTOR_TYPE_KEY, mergeType); + } + for (Map.Entry entry : taskConfigs.entrySet()) { + if (entry.getKey().endsWith(RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)) { + configs.put(entry.getKey(), entry.getValue()); + } + } + String maxNumRecordsPerSegment = taskConfigs.get(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY); + if (maxNumRecordsPerSegment != null) { + configs.put(RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_SEGMENT_KEY, maxNumRecordsPerSegment); + } + + return new PinotTaskConfig(taskType, configs); + } + } From d0ca568517ee9129200c6a96548df7158149cd6f Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 2 Dec 2024 21:31:46 +0530 Subject: [PATCH 03/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 428a0c3f152e..5e1d7c8c69bb 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -399,5 +399,4 @@ private PinotTaskConfig createPinotTaskConfig(List segmentNameList, List return new PinotTaskConfig(taskType, configs); } - } From 8db838b930c80969d7c792adf267a2b43e5fa13f Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 2 Dec 2024 21:35:58 +0530 Subject: [PATCH 04/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 5e1d7c8c69bb..9ff9c4ddedee 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -177,7 +177,7 @@ public List generateTasks(List tableConfigs) { int maxNumRecordsPerTask = taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null ? Integer.parseInt( - taskConfigs.get(MinionConstants.MergeRollupTask.MAX_NUM_RECORDS_PER_TASK_KEY)) + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); From 3233c33e68f48eeb85636db4c9f37b00a20b5faa Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 3 Dec 2024 01:17:48 +0530 Subject: [PATCH 05/72] fixes bug --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 9ff9c4ddedee..10856a5d7b91 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -204,8 +204,7 @@ public List generateTasks(List tableConfigs) { numRecordsPerTask += segmentZKMetadata.getTotalDocs(); - if ((numRecordsPerTask >= maxNumRecordsPerTask) - || (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { + if (numRecordsPerTask >= maxNumRecordsPerTask) { segmentNamesGroupList.add(segmentNames); downloadURLsGroupList.add(downloadURLs); numRecordsPerTask = 0; @@ -213,8 +212,14 @@ public List generateTasks(List tableConfigs) { downloadURLs = new ArrayList<>(); } } + + if ((!segmentNames.isEmpty()) + && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + } } - if (skipGenerate || !segmentNames.isEmpty()) { + if (skipGenerate || !segmentNamesGroupList.isEmpty()) { break; } From 65e6aef15a8618c3cd1074675a8b2e2260f886bb Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 6 Dec 2024 14:29:13 +0530 Subject: [PATCH 06/72] adds initial logic --- .../pinot/core/common/MinionConstants.java | 1 + ...RealtimeToOfflineSegmentsTaskExecutor.java | 70 ++++++------ ...ealtimeToOfflineSegmentsTaskGenerator.java | 106 +++++++++++++----- 3 files changed, 114 insertions(+), 63 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java index 26e0bd79edc9..e70fec055277 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java @@ -150,6 +150,7 @@ public static class RealtimeToOfflineSegmentsTask extends MergeTask { public static final String ROUND_BUCKET_TIME_PERIOD_KEY = "roundBucketTimePeriod"; public static final String MERGE_TYPE_KEY = "mergeType"; public static final String AGGREGATION_TYPE_KEY_SUFFIX = ".aggregationType"; + public static final String SEGMENT_ZK_METADATA_TIME_KEY = TASK_TYPE + TASK_TIME_SUFFIX; public final static EnumSet AVAILABLE_CORE_VALUE_AGGREGATORS = EnumSet.of(MIN, MAX, SUM, DISTINCTCOUNTHLL, DISTINCTCOUNTRAWHLL, DISTINCTCOUNTTHETASKETCH, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 502fa1cc7629..9bb586178159 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; @@ -84,28 +85,28 @@ public RealtimeToOfflineSegmentsTaskExecutor(MinionTaskZkMetadataManager minionT * Checks that the watermarkMs from the ZNode matches the windowStartMs in the task configs. * If yes, caches the ZNode version to check during update. */ - @Override - public void preProcess(PinotTaskConfig pinotTaskConfig) { - Map configs = pinotTaskConfig.getConfigs(); - String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); - - ZNRecord realtimeToOfflineSegmentsTaskZNRecord = - _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, - RealtimeToOfflineSegmentsTask.TASK_TYPE); - Preconditions.checkState(realtimeToOfflineSegmentsTaskZNRecord != null, - "RealtimeToOfflineSegmentsTaskMetadata ZNRecord for table: %s should not be null. Exiting task.", - realtimeTableName); - - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = - RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); - Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs() <= windowStartMs, - "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" - + " configs for table: %s. ZNode may have been modified by another task", - realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); - - _expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - } +// @Override +// public void preProcess(PinotTaskConfig pinotTaskConfig) { +// Map configs = pinotTaskConfig.getConfigs(); +// String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); +// +// ZNRecord realtimeToOfflineSegmentsTaskZNRecord = +// _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, +// RealtimeToOfflineSegmentsTask.TASK_TYPE); +// Preconditions.checkState(realtimeToOfflineSegmentsTaskZNRecord != null, +// "RealtimeToOfflineSegmentsTaskMetadata ZNRecord for table: %s should not be null. Exiting task.", +// realtimeTableName); +// +// RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = +// RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); +// long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); +// Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs() <= windowStartMs, +// "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" +// + " configs for table: %s. ZNode may have been modified by another task", +// realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); +// +// _expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); +// } @Override protected List convert(PinotTaskConfig pinotTaskConfig, List segmentDirs, @@ -194,21 +195,24 @@ protected List convert(PinotTaskConfig pinotTaskConfig, * watermark in the ZNode * TODO: Making the minion task update the ZK metadata is an anti-pattern, however cannot see another way to do it */ - @Override - public void postProcess(PinotTaskConfig pinotTaskConfig) { - Map configs = pinotTaskConfig.getConfigs(); - String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); - long waterMarkMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY)); - RealtimeToOfflineSegmentsTaskMetadata newMinionMetadata = - new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, waterMarkMs); - _minionTaskZkMetadataManager.setTaskMetadataZNRecord(newMinionMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, - _expectedVersion); - } +// @Override +// public void postProcess(PinotTaskConfig pinotTaskConfig) { +// Map configs = pinotTaskConfig.getConfigs(); +// String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); +// long waterMarkMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY)); +// RealtimeToOfflineSegmentsTaskMetadata newMinionMetadata = +// new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, waterMarkMs); +// _minionTaskZkMetadataManager.setTaskMetadataZNRecord(newMinionMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, +// _expectedVersion); +// } @Override protected SegmentZKMetadataCustomMapModifier getSegmentZKMetadataCustomMapModifier(PinotTaskConfig pinotTaskConfig, SegmentConversionResult segmentConversionResult) { + Map updateMap = new TreeMap<>(); + updateMap.put(MinionConstants.RealtimeToOfflineSegmentsTask.SEGMENT_ZK_METADATA_TIME_KEY, + String.valueOf(System.currentTimeMillis())); return new SegmentZKMetadataCustomMapModifier(SegmentZKMetadataCustomMapModifier.ModifyMode.UPDATE, - Collections.emptyMap()); + updateMap); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 10856a5d7b91..d70f1df66386 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.I0Itec.zkclient.exception.ZkException; import org.apache.commons.lang3.StringUtils; import org.apache.helix.task.TaskState; import org.apache.helix.zookeeper.datamodel.ZNRecord; @@ -148,9 +149,19 @@ public List generateTasks(List tableConfigs) { long bucketMs = TimeUtils.convertPeriodToMillis(bucketTimePeriod); long bufferMs = TimeUtils.convertPeriodToMillis(bufferTimePeriod); + ZNRecord realtimeToOfflineZNRecord = + _clusterInfoAccessor.getMinionTaskMetadataZNRecord(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, + realtimeTableName); + int expectedVersion = realtimeToOfflineZNRecord != null ? realtimeToOfflineZNRecord.getVersion() : -1; +// RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = +// realtimeToOfflineZNRecord != null ? RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord( +// realtimeToOfflineZNRecord) : null; + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); + // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. - long windowStartMs = getWatermarkMs(realtimeTableName, completedSegmentsZKMetadata, bucketMs); + long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(); long windowEndMs = windowStartMs + bucketMs; // Find all COMPLETED segments with data overlapping execution window: windowStart (inclusive) to windowEnd @@ -163,6 +174,7 @@ public List generateTasks(List tableConfigs) { long numRecordsPerTask = 0; List> segmentNamesGroupList = new ArrayList<>(); List> downloadURLsGroupList = new ArrayList<>(); + long minSegmentStartTime = Long.MAX_VALUE; while (true) { // Check that execution window is older than bufferTime @@ -199,6 +211,12 @@ public List generateTasks(List tableConfigs) { skipGenerate = true; break; } + + if (isAlreadyProcessedSegment(segmentZKMetadata)) { + continue; + } + + minSegmentStartTime = Math.min(minSegmentStartTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); @@ -233,18 +251,47 @@ public List generateTasks(List tableConfigs) { continue; } + List pinotTaskConfigsForTable = new ArrayList<>(); + for (int segmentNameListIndex = 0; segmentNameListIndex < segmentNamesGroupList.size(); segmentNameListIndex++) { List segmentNameList = segmentNamesGroupList.get(segmentNameListIndex); List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); - pinotTaskConfigs.add( + pinotTaskConfigsForTable.add( createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, windowStartMs, windowEndMs, taskType)); } + + Preconditions.checkState(minSegmentStartTime != Long.MAX_VALUE); + long newWatermarkMs = (minSegmentStartTime / bucketMs) * bucketMs; + RealtimeToOfflineSegmentsTaskMetadata newMinionMetadata = + new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, newWatermarkMs); + + try { + _clusterInfoAccessor + .setMinionTaskMetadata(newMinionMetadata, MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, + expectedVersion); + } catch (ZkException e) { + LOGGER.error( + "Version changed while updating RTO task metadata for table: {}, skip scheduling. There are " + + "multiple task schedulers for the same table, need to investigate!", realtimeTableName); + continue; + } + + pinotTaskConfigs.addAll(pinotTaskConfigsForTable); + LOGGER.info("Finished generating task configs for table: {} for task: {}", realtimeTableName, taskType); } return pinotTaskConfigs; } + /** + * Checks whether the segment was already picked previously as part RTO task + */ + private boolean isAlreadyProcessedSegment(SegmentZKMetadata segmentZKMetadata) { + Map customMap = segmentZKMetadata.getCustomMap(); + return (customMap != null) && (customMap.get(RealtimeToOfflineSegmentsTask.SEGMENT_ZK_METADATA_TIME_KEY) != null); + } + /** * Fetch completed (DONE/UPLOADED) segment and partition information * @@ -292,37 +339,36 @@ private void getCompletedSegmentsInfo(String realtimeTableName, List completedSegmentsZKMetadata, - long bucketMs) { - ZNRecord realtimeToOfflineZNRecord = - _clusterInfoAccessor.getMinionTaskMetadataZNRecord(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, - realtimeTableName); - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = - realtimeToOfflineZNRecord != null ? RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord( - realtimeToOfflineZNRecord) : null; - - if (realtimeToOfflineSegmentsTaskMetadata == null) { - // No ZNode exists. Cold-start. - long watermarkMs; - - // Find the smallest time from all segments - long minStartTimeMs = Long.MAX_VALUE; - for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { - minStartTimeMs = Math.min(minStartTimeMs, segmentZKMetadata.getStartTimeMs()); - } - Preconditions.checkState(minStartTimeMs != Long.MAX_VALUE); + private RealtimeToOfflineSegmentsTaskMetadata getRTOTaskMetadata(String realtimeTableName, + List completedSegmentsZKMetadata, + long bucketMs, ZNRecord realtimeToOfflineZNRecord) { - // Round off according to the bucket. This ensures we align the offline segments to proper time boundaries - // For example, if start time millis is 20200813T12:34:59, we want to create the first segment for window - // [20200813, 20200814) - watermarkMs = (minStartTimeMs / bucketMs) * bucketMs; + if (realtimeToOfflineZNRecord != null) { + return RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord( + realtimeToOfflineZNRecord); + } + + // No ZNode exists. Cold-start. + long watermarkMs; - // Create RealtimeToOfflineSegmentsTaskMetadata ZNode using watermark calculated above - realtimeToOfflineSegmentsTaskMetadata = new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); - _clusterInfoAccessor.setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, - MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, -1); + // Find the smallest time from all segments + long minStartTimeMs = Long.MAX_VALUE; + for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + minStartTimeMs = Math.min(minStartTimeMs, segmentZKMetadata.getStartTimeMs()); } - return realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(); + Preconditions.checkState(minStartTimeMs != Long.MAX_VALUE); + + // Round off according to the bucket. This ensures we align the offline segments to proper time boundaries + // For example, if start time millis is 20200813T12:34:59, we want to create the first segment for window + // [20200813, 20200814) + watermarkMs = (minStartTimeMs / bucketMs) * bucketMs; + + // Create RealtimeToOfflineSegmentsTaskMetadata ZNode using watermark calculated above +// realtimeToOfflineSegmentsTaskMetadata = new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); +// _clusterInfoAccessor.setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, +// MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, -1); + + return new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); } @Override From fd496bf64212b800af787aad277176f5038b1d3d Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sat, 7 Dec 2024 00:47:04 +0530 Subject: [PATCH 07/72] changes logic --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 15 ++- ...RealtimeToOfflineSegmentsTaskExecutor.java | 93 ++++++++++++------- ...ealtimeToOfflineSegmentsTaskGenerator.java | 25 ++--- 3 files changed, 77 insertions(+), 56 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 56f089021970..80fadeae05c4 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -43,7 +43,8 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WATERMARK_KEY = "watermarkMs"; private final String _tableNameWithType; - private final long _watermarkMs; + private long _watermarkMs; + private int _numSubtasks; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { _tableNameWithType = tableNameWithType; @@ -54,6 +55,18 @@ public String getTableNameWithType() { return _tableNameWithType; } + public int getNumSubtasks() { + return _numSubtasks; + } + + public void setNumSubtasks(int numSubtasks) { + _numSubtasks = numSubtasks; + } + + public void setWatermarkMs(long watermarkMs) { + _watermarkMs = watermarkMs; + } + /** * Get the watermark in millis */ diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 9bb586178159..2701af70f936 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.TreeMap; import org.apache.helix.zookeeper.datamodel.ZNRecord; +import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; @@ -85,28 +86,28 @@ public RealtimeToOfflineSegmentsTaskExecutor(MinionTaskZkMetadataManager minionT * Checks that the watermarkMs from the ZNode matches the windowStartMs in the task configs. * If yes, caches the ZNode version to check during update. */ -// @Override -// public void preProcess(PinotTaskConfig pinotTaskConfig) { -// Map configs = pinotTaskConfig.getConfigs(); -// String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); -// -// ZNRecord realtimeToOfflineSegmentsTaskZNRecord = -// _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, -// RealtimeToOfflineSegmentsTask.TASK_TYPE); -// Preconditions.checkState(realtimeToOfflineSegmentsTaskZNRecord != null, -// "RealtimeToOfflineSegmentsTaskMetadata ZNRecord for table: %s should not be null. Exiting task.", -// realtimeTableName); -// -// RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = -// RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); -// long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); -// Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs() <= windowStartMs, -// "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" -// + " configs for table: %s. ZNode may have been modified by another task", -// realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); -// -// _expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); -// } + @Override + public void preProcess(PinotTaskConfig pinotTaskConfig) { + Map configs = pinotTaskConfig.getConfigs(); + String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); + + ZNRecord realtimeToOfflineSegmentsTaskZNRecord = + _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, + RealtimeToOfflineSegmentsTask.TASK_TYPE); + Preconditions.checkState(realtimeToOfflineSegmentsTaskZNRecord != null, + "RealtimeToOfflineSegmentsTaskMetadata ZNRecord for table: %s should not be null. Exiting task.", + realtimeTableName); + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); + long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); + Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs() <= windowStartMs, + "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" + + " configs for table: %s. ZNode may have been modified by another task", + realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); + + _expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); + } @Override protected List convert(PinotTaskConfig pinotTaskConfig, List segmentDirs, @@ -195,24 +196,44 @@ protected List convert(PinotTaskConfig pinotTaskConfig, * watermark in the ZNode * TODO: Making the minion task update the ZK metadata is an anti-pattern, however cannot see another way to do it */ -// @Override -// public void postProcess(PinotTaskConfig pinotTaskConfig) { -// Map configs = pinotTaskConfig.getConfigs(); -// String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); -// long waterMarkMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY)); -// RealtimeToOfflineSegmentsTaskMetadata newMinionMetadata = -// new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, waterMarkMs); -// _minionTaskZkMetadataManager.setTaskMetadataZNRecord(newMinionMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, -// _expectedVersion); -// } + @Override + public void postProcess(PinotTaskConfig pinotTaskConfig) { + Map configs = pinotTaskConfig.getConfigs(); + String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); + + while (true) { + ZNRecord realtimeToOfflineSegmentsTaskZNRecord = + _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, + RealtimeToOfflineSegmentsTask.TASK_TYPE); + int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); + + int numSubtasksLeft = realtimeToOfflineSegmentsTaskMetadata.getNumSubtasks() - 1; + realtimeToOfflineSegmentsTaskMetadata.setNumSubtasks(numSubtasksLeft); + + try { + if (numSubtasksLeft == 0) { + long newWaterMarkMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY)); + realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWaterMarkMs); + } + _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, + RealtimeToOfflineSegmentsTask.TASK_TYPE, + expectedVersion); + break; + } catch (ZkException e) { + LOGGER.info( + "Version changed while updating num of subtasks left in RTO task metadata for table: {}, Retrying.", + realtimeTableName); + } + } + } @Override protected SegmentZKMetadataCustomMapModifier getSegmentZKMetadataCustomMapModifier(PinotTaskConfig pinotTaskConfig, SegmentConversionResult segmentConversionResult) { - Map updateMap = new TreeMap<>(); - updateMap.put(MinionConstants.RealtimeToOfflineSegmentsTask.SEGMENT_ZK_METADATA_TIME_KEY, - String.valueOf(System.currentTimeMillis())); return new SegmentZKMetadataCustomMapModifier(SegmentZKMetadataCustomMapModifier.ModifyMode.UPDATE, - updateMap); + Collections.emptyMap()); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index d70f1df66386..b8d04a4e4e00 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -26,10 +26,10 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.I0Itec.zkclient.exception.ZkException; import org.apache.commons.lang3.StringUtils; import org.apache.helix.task.TaskState; import org.apache.helix.zookeeper.datamodel.ZNRecord; +import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; @@ -53,6 +53,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.pinot.common.metadata.ZKMetadataProvider.constructPropertyStorePathForMinionTaskMetadata; + /** * A {@link PinotTaskGenerator} implementation for generating tasks of type {@link RealtimeToOfflineSegmentsTask} @@ -212,10 +214,6 @@ public List generateTasks(List tableConfigs) { break; } - if (isAlreadyProcessedSegment(segmentZKMetadata)) { - continue; - } - minSegmentStartTime = Math.min(minSegmentStartTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); @@ -261,14 +259,11 @@ public List generateTasks(List tableConfigs) { windowStartMs, windowEndMs, taskType)); } - Preconditions.checkState(minSegmentStartTime != Long.MAX_VALUE); - long newWatermarkMs = (minSegmentStartTime / bucketMs) * bucketMs; - RealtimeToOfflineSegmentsTaskMetadata newMinionMetadata = - new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, newWatermarkMs); - + realtimeToOfflineSegmentsTaskMetadata.setNumSubtasks(pinotTaskConfigsForTable.size()); try { _clusterInfoAccessor - .setMinionTaskMetadata(newMinionMetadata, MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, + .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, + MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, expectedVersion); } catch (ZkException e) { LOGGER.error( @@ -284,14 +279,6 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } - /** - * Checks whether the segment was already picked previously as part RTO task - */ - private boolean isAlreadyProcessedSegment(SegmentZKMetadata segmentZKMetadata) { - Map customMap = segmentZKMetadata.getCustomMap(); - return (customMap != null) && (customMap.get(RealtimeToOfflineSegmentsTask.SEGMENT_ZK_METADATA_TIME_KEY) != null); - } - /** * Fetch completed (DONE/UPLOADED) segment and partition information * From fec0b658aa847838825d895ac0ad2777e85e40c4 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sat, 7 Dec 2024 02:16:26 +0530 Subject: [PATCH 08/72] fixes bug --- .../minion/RealtimeToOfflineSegmentsTaskMetadata.java | 11 ++++++++++- .../RealtimeToOfflineSegmentsTaskExecutor.java | 4 +++- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 -- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 80fadeae05c4..5a3102fb689f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -41,6 +41,7 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WATERMARK_KEY = "watermarkMs"; + private static final String SUBTASKS_KEY = "numSubtasks"; private final String _tableNameWithType; private long _watermarkMs; @@ -51,6 +52,12 @@ public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long wate _watermarkMs = watermarkMs; } + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, int numSubtasks) { + _tableNameWithType = tableNameWithType; + _watermarkMs = watermarkMs; + _numSubtasks = numSubtasks; + } + public String getTableNameWithType() { return _tableNameWithType; } @@ -76,12 +83,14 @@ public long getWatermarkMs() { public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long watermark = znRecord.getLongField(WATERMARK_KEY, 0); - return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark); + int subtasksLeft = znRecord.getIntField(SUBTASKS_KEY, 0); + return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, subtasksLeft); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); znRecord.setLongField(WATERMARK_KEY, _watermarkMs); + znRecord.setIntField(SUBTASKS_KEY, _numSubtasks); return znRecord; } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 2701af70f936..691e0f76ab79 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.TreeMap; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; @@ -211,6 +210,9 @@ public void postProcess(PinotTaskConfig pinotTaskConfig) { RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); int numSubtasksLeft = realtimeToOfflineSegmentsTaskMetadata.getNumSubtasks() - 1; + Preconditions.checkState(numSubtasksLeft >= 0, + "num of minion subtasks pending for table: %s should be greater than equal to zero.", + realtimeTableName); realtimeToOfflineSegmentsTaskMetadata.setNumSubtasks(numSubtasksLeft); try { diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index b8d04a4e4e00..82cff5dcde1a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -53,8 +53,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.common.metadata.ZKMetadataProvider.constructPropertyStorePathForMinionTaskMetadata; - /** * A {@link PinotTaskGenerator} implementation for generating tasks of type {@link RealtimeToOfflineSegmentsTask} From 11c84bea84ffd045d3a02579d7b77d1c78623e53 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 9 Dec 2024 12:06:10 +0530 Subject: [PATCH 09/72] clean up --- .../RealtimeToOfflineSegmentsTaskMetadata.java | 16 ++++++++-------- .../RealtimeToOfflineSegmentsTaskExecutor.java | 14 +++++--------- .../RealtimeToOfflineSegmentsTaskGenerator.java | 11 +++++------ 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 5a3102fb689f..709a6fbba9fb 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -45,29 +45,29 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private final String _tableNameWithType; private long _watermarkMs; - private int _numSubtasks; + private int _numSubtasksPending; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; } - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, int numSubtasks) { + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, int numSubtasksPending) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; - _numSubtasks = numSubtasks; + _numSubtasksPending = numSubtasksPending; } public String getTableNameWithType() { return _tableNameWithType; } - public int getNumSubtasks() { - return _numSubtasks; + public int getNumSubtasksPending() { + return _numSubtasksPending; } - public void setNumSubtasks(int numSubtasks) { - _numSubtasks = numSubtasks; + public void setNumSubtasksPending(int numSubtasksPending) { + _numSubtasksPending = numSubtasksPending; } public void setWatermarkMs(long watermarkMs) { @@ -90,7 +90,7 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); znRecord.setLongField(WATERMARK_KEY, _watermarkMs); - znRecord.setIntField(SUBTASKS_KEY, _numSubtasks); + znRecord.setIntField(SUBTASKS_KEY, _numSubtasksPending); return znRecord; } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 691e0f76ab79..bc402f20125a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -72,7 +72,6 @@ public class RealtimeToOfflineSegmentsTaskExecutor extends BaseMultipleSegmentsC private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeToOfflineSegmentsTaskExecutor.class); private final MinionTaskZkMetadataManager _minionTaskZkMetadataManager; - private int _expectedVersion = Integer.MIN_VALUE; public RealtimeToOfflineSegmentsTaskExecutor(MinionTaskZkMetadataManager minionTaskZkMetadataManager, MinionConf minionConf) { @@ -83,7 +82,6 @@ public RealtimeToOfflineSegmentsTaskExecutor(MinionTaskZkMetadataManager minionT /** * Fetches the RealtimeToOfflineSegmentsTask metadata ZNode for the realtime table. * Checks that the watermarkMs from the ZNode matches the windowStartMs in the task configs. - * If yes, caches the ZNode version to check during update. */ @Override public void preProcess(PinotTaskConfig pinotTaskConfig) { @@ -104,8 +102,6 @@ public void preProcess(PinotTaskConfig pinotTaskConfig) { "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" + " configs for table: %s. ZNode may have been modified by another task", realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); - - _expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); } @Override @@ -191,8 +187,8 @@ protected List convert(PinotTaskConfig pinotTaskConfig, /** * Fetches the RealtimeToOfflineSegmentsTask metadata ZNode for the realtime table. - * Checks that the version of the ZNode matches with the version cached earlier. If yes, proceeds to update - * watermark in the ZNode + * Update the number of subtasks pending atomically. If number of subtasks left are zero, proceeds to update + * watermark in the ZNode. * TODO: Making the minion task update the ZK metadata is an anti-pattern, however cannot see another way to do it */ @Override @@ -209,11 +205,11 @@ public void postProcess(PinotTaskConfig pinotTaskConfig) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - int numSubtasksLeft = realtimeToOfflineSegmentsTaskMetadata.getNumSubtasks() - 1; + int numSubtasksLeft = realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() - 1; Preconditions.checkState(numSubtasksLeft >= 0, - "num of minion subtasks pending for table: %s should be greater than equal to zero.", + "Num of minion subtasks pending for table: %s should be greater than equal to zero.", realtimeTableName); - realtimeToOfflineSegmentsTaskMetadata.setNumSubtasks(numSubtasksLeft); + realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(numSubtasksLeft); try { if (numSubtasksLeft == 0) { diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 82cff5dcde1a..a988c1112c41 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -153,12 +153,13 @@ public List generateTasks(List tableConfigs) { _clusterInfoAccessor.getMinionTaskMetadataZNRecord(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, realtimeTableName); int expectedVersion = realtimeToOfflineZNRecord != null ? realtimeToOfflineZNRecord.getVersion() : -1; -// RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = -// realtimeToOfflineZNRecord != null ? RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord( -// realtimeToOfflineZNRecord) : null; RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); + Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() == 0, + "number of subtasks pending for moving realtime segments to offline should be zero while generating new RTO" + + " subtasks for table: {}", realtimeTableName); + // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(); @@ -174,7 +175,6 @@ public List generateTasks(List tableConfigs) { long numRecordsPerTask = 0; List> segmentNamesGroupList = new ArrayList<>(); List> downloadURLsGroupList = new ArrayList<>(); - long minSegmentStartTime = Long.MAX_VALUE; while (true) { // Check that execution window is older than bufferTime @@ -212,7 +212,6 @@ public List generateTasks(List tableConfigs) { break; } - minSegmentStartTime = Math.min(minSegmentStartTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); @@ -257,7 +256,7 @@ public List generateTasks(List tableConfigs) { windowStartMs, windowEndMs, taskType)); } - realtimeToOfflineSegmentsTaskMetadata.setNumSubtasks(pinotTaskConfigsForTable.size()); + realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(pinotTaskConfigsForTable.size()); try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, From 31b39607af4a2740f8b8c66ff56dbf3c0cfab6f5 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 9 Dec 2024 12:18:12 +0530 Subject: [PATCH 10/72] clean up --- .../apache/pinot/core/common/MinionConstants.java | 1 - .../RealtimeToOfflineSegmentsTaskGenerator.java | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java index e70fec055277..26e0bd79edc9 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java @@ -150,7 +150,6 @@ public static class RealtimeToOfflineSegmentsTask extends MergeTask { public static final String ROUND_BUCKET_TIME_PERIOD_KEY = "roundBucketTimePeriod"; public static final String MERGE_TYPE_KEY = "mergeType"; public static final String AGGREGATION_TYPE_KEY_SUFFIX = ".aggregationType"; - public static final String SEGMENT_ZK_METADATA_TIME_KEY = TASK_TYPE + TASK_TIME_SUFFIX; public final static EnumSet AVAILABLE_CORE_VALUE_AGGREGATORS = EnumSet.of(MIN, MAX, SUM, DISTINCTCOUNTHLL, DISTINCTCOUNTRAWHLL, DISTINCTCOUNTTHETASKETCH, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index a988c1112c41..a72cecadc26e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -176,6 +176,12 @@ public List generateTasks(List tableConfigs) { List> segmentNamesGroupList = new ArrayList<>(); List> downloadURLsGroupList = new ArrayList<>(); + int maxNumRecordsPerTask = + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null + ? Integer.parseInt( + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) + : DEFAULT_MAX_NUM_RECORDS_PER_TASK; + while (true) { // Check that execution window is older than bufferTime if (windowEndMs > System.currentTimeMillis() - bufferMs) { @@ -186,12 +192,6 @@ public List generateTasks(List tableConfigs) { break; } - int maxNumRecordsPerTask = - taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null - ? Integer.parseInt( - taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) - : DEFAULT_MAX_NUM_RECORDS_PER_TASK; - for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); segmentZkMetadataIndex++) { SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); @@ -211,7 +211,6 @@ public List generateTasks(List tableConfigs) { skipGenerate = true; break; } - segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); From aaa72e3c308a2392d3311c1181c904d6aaf0e7af Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 9 Dec 2024 12:22:05 +0530 Subject: [PATCH 11/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index a72cecadc26e..5a32a3f83d9d 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -346,11 +346,6 @@ private RealtimeToOfflineSegmentsTaskMetadata getRTOTaskMetadata(String realtime // [20200813, 20200814) watermarkMs = (minStartTimeMs / bucketMs) * bucketMs; - // Create RealtimeToOfflineSegmentsTaskMetadata ZNode using watermark calculated above -// realtimeToOfflineSegmentsTaskMetadata = new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); -// _clusterInfoAccessor.setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, -// MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, -1); - return new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); } From 29925958442e3af0a4c6d0e23ddf2ad5c4bd05ee Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 9 Dec 2024 17:47:53 +0530 Subject: [PATCH 12/72] addresses PR comment --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 5a32a3f83d9d..b7dc11a1d642 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -87,7 +87,7 @@ public class RealtimeToOfflineSegmentsTaskGenerator extends BaseTaskGenerator { private static final String DEFAULT_BUCKET_PERIOD = "1d"; private static final String DEFAULT_BUFFER_PERIOD = "2d"; - private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 50_000_000; + private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 10; @Override public String getTaskType() { @@ -156,9 +156,13 @@ public List generateTasks(List tableConfigs) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); - Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() == 0, - "number of subtasks pending for moving realtime segments to offline should be zero while generating new RTO" - + " subtasks for table: {}", realtimeTableName); + if (realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() == 0) { + // this might happen in-case of any failure. + LOGGER.warn( + "No incomplete minion tasks exists in taskQueue, however num of pending subtasks are non zero for table: " + + "{}, taskType: {}. Overriding num of subtasks pending to zero.", realtimeTableName, taskType); + realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(0); + } // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. From 58eb51ce3457bab2e8f52a1d0f6c6198c341d11a Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 9 Dec 2024 18:09:13 +0530 Subject: [PATCH 13/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index b7dc11a1d642..efe132e3e903 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -157,7 +157,7 @@ public List generateTasks(List tableConfigs) { getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); if (realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() == 0) { - // this might happen in-case of any failure. + // this might happen in some edge cases - (like minion server instance went offline, etc). LOGGER.warn( "No incomplete minion tasks exists in taskQueue, however num of pending subtasks are non zero for table: " + "{}, taskType: {}. Overriding num of subtasks pending to zero.", realtimeTableName, taskType); From f4ed4061e0b297683668a7e6506e619b994754f6 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 10 Dec 2024 01:53:34 +0530 Subject: [PATCH 14/72] Alternate solution --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 44 +++++++++++----- ...RealtimeToOfflineSegmentsTaskExecutor.java | 46 ++++++++++------- ...ealtimeToOfflineSegmentsTaskGenerator.java | 51 +++++++++++++++---- 3 files changed, 100 insertions(+), 41 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 709a6fbba9fb..cc3327c7e1a0 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -18,6 +18,12 @@ */ package org.apache.pinot.common.minion; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; import org.apache.helix.zookeeper.datamodel.ZNRecord; @@ -41,33 +47,31 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WATERMARK_KEY = "watermarkMs"; - private static final String SUBTASKS_KEY = "numSubtasks"; + private static final String SEGMENT_NAME_SEPARATOR = ","; private final String _tableNameWithType; private long _watermarkMs; - private int _numSubtasksPending; + private final Map> _realtimeSegmentVsCorrespondingOfflineSegmentMap; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; + _realtimeSegmentVsCorrespondingOfflineSegmentMap = new HashMap<>(); } - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, int numSubtasksPending) { + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, + Map> realtimeSegmentVsCorrespondingOfflineSegment) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; - _numSubtasksPending = numSubtasksPending; + _realtimeSegmentVsCorrespondingOfflineSegmentMap = realtimeSegmentVsCorrespondingOfflineSegment; } public String getTableNameWithType() { return _tableNameWithType; } - public int getNumSubtasksPending() { - return _numSubtasksPending; - } - - public void setNumSubtasksPending(int numSubtasksPending) { - _numSubtasksPending = numSubtasksPending; + public Map> getRealtimeSegmentVsCorrespondingOfflineSegmentMap() { + return _realtimeSegmentVsCorrespondingOfflineSegmentMap; } public void setWatermarkMs(long watermarkMs) { @@ -83,14 +87,28 @@ public long getWatermarkMs() { public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long watermark = znRecord.getLongField(WATERMARK_KEY, 0); - int subtasksLeft = znRecord.getIntField(SUBTASKS_KEY, 0); - return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, subtasksLeft); + Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = new HashMap<>(); + Map fields = znRecord.getSimpleFields(); + for (Map.Entry entry : fields.entrySet()) { + String segmentFrom = entry.getKey(); + String segmentsTo = entry.getValue(); + List segmentsToList = + Arrays.stream(StringUtils.split(segmentsTo, SEGMENT_NAME_SEPARATOR)) + .map(String::trim).collect(Collectors.toList()); + realtimeSegmentVsCorrespondingOfflineSegmentMap.put(segmentFrom, segmentsToList); + } + return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, + realtimeSegmentVsCorrespondingOfflineSegmentMap); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); + for (Map.Entry> entry : _realtimeSegmentVsCorrespondingOfflineSegmentMap.entrySet()) { + String segmentFrom = entry.getKey(); + List segmentTo = entry.getValue(); + znRecord.setSimpleField(segmentFrom, StringUtils.join(segmentTo, SEGMENT_NAME_SEPARATOR)); + } znRecord.setLongField(WATERMARK_KEY, _watermarkMs); - znRecord.setIntField(SUBTASKS_KEY, _numSubtasksPending); return znRecord; } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index bc402f20125a..c5dc8b82a0a1 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -21,11 +21,14 @@ import com.google.common.base.Preconditions; import java.io.File; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; import org.apache.helix.zookeeper.datamodel.ZNRecord; -import org.apache.helix.zookeeper.zkclient.exception.ZkException; +import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; @@ -187,15 +190,15 @@ protected List convert(PinotTaskConfig pinotTaskConfig, /** * Fetches the RealtimeToOfflineSegmentsTask metadata ZNode for the realtime table. - * Update the number of subtasks pending atomically. If number of subtasks left are zero, proceeds to update - * watermark in the ZNode. - * TODO: Making the minion task update the ZK metadata is an anti-pattern, however cannot see another way to do it + * Before uploading the segments, updates the metadata with the expected results + * of the successful execution of current subtask. + * The expected result updated in metadata is read by the next iteration of Task Generator. */ @Override - public void postProcess(PinotTaskConfig pinotTaskConfig) { - Map configs = pinotTaskConfig.getConfigs(); - String realtimeTableName = configs.get(MinionConstants.TABLE_NAME_KEY); - + protected void preUploadSegments(SegmentUploadContext context) + throws Exception { + super.preUploadSegments(context); + String realtimeTableName = context.getTableNameWithType(); while (true) { ZNRecord realtimeToOfflineSegmentsTaskZNRecord = _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, @@ -205,22 +208,29 @@ public void postProcess(PinotTaskConfig pinotTaskConfig) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - int numSubtasksLeft = realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() - 1; - Preconditions.checkState(numSubtasksLeft >= 0, - "Num of minion subtasks pending for table: %s should be greater than equal to zero.", - realtimeTableName); - realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(numSubtasksLeft); + Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = + realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap(); + Preconditions.checkState(realtimeSegmentVsCorrespondingOfflineSegmentMap != null); + + List segmentsFrom = + Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) + .map(String::trim).collect(Collectors.toList()); + + List segmentsTo = + context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) + .collect(Collectors.toList()); + + for (String segmentFrom : segmentsFrom) { + Preconditions.checkState(!realtimeSegmentVsCorrespondingOfflineSegmentMap.containsKey(segmentFrom)); + realtimeSegmentVsCorrespondingOfflineSegmentMap.put(segmentFrom, segmentsTo); + } try { - if (numSubtasksLeft == 0) { - long newWaterMarkMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY)); - realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWaterMarkMs); - } _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, expectedVersion); break; - } catch (ZkException e) { + } catch (ZkBadVersionException e) { LOGGER.info( "Version changed while updating num of subtasks left in RTO task metadata for table: {}, Retrying.", realtimeTableName); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index efe132e3e903..01dacebacf2e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -50,6 +50,7 @@ import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.utils.CommonConstants.Segment; import org.apache.pinot.spi.utils.TimeUtils; +import org.apache.pinot.spi.utils.builder.TableNameBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -136,6 +137,10 @@ public List generateTasks(List tableConfigs) { continue; } + String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(realtimeTableName); + Set offlineTableSegmentNames = + new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, false)); + TableTaskConfig tableTaskConfig = tableConfig.getTaskConfig(); Preconditions.checkState(tableTaskConfig != null); Map taskConfigs = tableTaskConfig.getConfigsForTaskType(taskType); @@ -155,14 +160,8 @@ public List generateTasks(List tableConfigs) { int expectedVersion = realtimeToOfflineZNRecord != null ? realtimeToOfflineZNRecord.getVersion() : -1; RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); - - if (realtimeToOfflineSegmentsTaskMetadata.getNumSubtasksPending() == 0) { - // this might happen in some edge cases - (like minion server instance went offline, etc). - LOGGER.warn( - "No incomplete minion tasks exists in taskQueue, however num of pending subtasks are non zero for table: " - + "{}, taskType: {}. Overriding num of subtasks pending to zero.", realtimeTableName, taskType); - realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(0); - } + Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = + realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap(); // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. @@ -186,6 +185,8 @@ public List generateTasks(List tableConfigs) { taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; + long minSegmentTime = Long.MAX_VALUE; + while (true) { // Check that execution window is older than bufferTime if (windowEndMs > System.currentTimeMillis() - bufferMs) { @@ -203,6 +204,30 @@ public List generateTasks(List tableConfigs) { String segmentName = segmentZKMetadata.getSegmentName(); long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); + boolean reProcessSegment = false; + + if (realtimeSegmentVsCorrespondingOfflineSegmentMap.containsKey(segmentName)) { + List expectedCorrespondingOfflineSegments = + realtimeSegmentVsCorrespondingOfflineSegmentMap.get(segmentName); + + for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { + if (!offlineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { + // If not all corresponding offline segments to a realtime segment exists, + // it means there was an issue with prev minion task. And segment needs + // to be re-processed. + reProcessSegment = true; + break; + } + } + realtimeSegmentVsCorrespondingOfflineSegmentMap.remove(segmentName); + if (reProcessSegment) { + // data is inconsistent, delete the corresponding offline segments immediately. + _clusterInfoAccessor.getPinotHelixResourceManager() + .deleteSegments(offlineTableName, expectedCorrespondingOfflineSegments); + } else { + continue; + } + } // Check overlap with window if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { @@ -214,7 +239,10 @@ public List generateTasks(List tableConfigs) { + "generation: {}", segmentName, taskType); skipGenerate = true; break; + } else if (reProcessSegment) { + throw new RuntimeException("Segment needs to be reProcessed and shouldn't be skipped"); } + minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); @@ -227,6 +255,8 @@ public List generateTasks(List tableConfigs) { segmentNames = new ArrayList<>(); downloadURLs = new ArrayList<>(); } + } else if (reProcessSegment) { + throw new RuntimeException("Segment needs to be reProcessed and should lie under bucket range."); } if ((!segmentNames.isEmpty()) @@ -258,8 +288,9 @@ public List generateTasks(List tableConfigs) { createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, windowStartMs, windowEndMs, taskType)); } - - realtimeToOfflineSegmentsTaskMetadata.setNumSubtasksPending(pinotTaskConfigsForTable.size()); + // update the watermark + long newWatermarkMs = (minSegmentTime / bucketMs) * bucketMs; + realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, From 07f831c7b0e4f962c71ed40d27ff086a704e4181 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 10 Dec 2024 02:17:53 +0530 Subject: [PATCH 15/72] fixes bugs and nits --- .../common/minion/RealtimeToOfflineSegmentsTaskMetadata.java | 3 +++ .../RealtimeToOfflineSegmentsTaskExecutor.java | 1 - .../RealtimeToOfflineSegmentsTaskGenerator.java | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index cc3327c7e1a0..fbe0d9050ecd 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -90,6 +90,9 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = new HashMap<>(); Map fields = znRecord.getSimpleFields(); for (Map.Entry entry : fields.entrySet()) { + if (entry.getKey().equals(WATERMARK_KEY)) { + continue; + } String segmentFrom = entry.getKey(); String segmentsTo = entry.getValue(); List segmentsToList = diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index c5dc8b82a0a1..b27850bd8f6d 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -210,7 +210,6 @@ protected void preUploadSegments(SegmentUploadContext context) Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap(); - Preconditions.checkState(realtimeSegmentVsCorrespondingOfflineSegmentMap != null); List segmentsFrom = Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 01dacebacf2e..814c499be88a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -54,6 +54,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.pinot.spi.utils.builder.TableNameBuilder.extractRawTableName; + /** * A {@link PinotTaskGenerator} implementation for generating tasks of type {@link RealtimeToOfflineSegmentsTask} @@ -137,7 +139,7 @@ public List generateTasks(List tableConfigs) { continue; } - String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(realtimeTableName); + String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(extractRawTableName(realtimeTableName)); Set offlineTableSegmentNames = new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, false)); @@ -222,6 +224,7 @@ public List generateTasks(List tableConfigs) { realtimeSegmentVsCorrespondingOfflineSegmentMap.remove(segmentName); if (reProcessSegment) { // data is inconsistent, delete the corresponding offline segments immediately. + // TODO: check if can do atomic push _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, expectedCorrespondingOfflineSegments); } else { From 318e89eb6aa1eca04aac124b1eae4d99f3508547 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 10 Dec 2024 15:15:10 +0530 Subject: [PATCH 16/72] lint fix --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 814c499be88a..7a936b78f00a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -54,8 +54,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.spi.utils.builder.TableNameBuilder.extractRawTableName; - /** * A {@link PinotTaskGenerator} implementation for generating tasks of type {@link RealtimeToOfflineSegmentsTask} @@ -90,7 +88,7 @@ public class RealtimeToOfflineSegmentsTaskGenerator extends BaseTaskGenerator { private static final String DEFAULT_BUCKET_PERIOD = "1d"; private static final String DEFAULT_BUFFER_PERIOD = "2d"; - private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 10; + private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 50_000_000; @Override public String getTaskType() { @@ -139,7 +137,8 @@ public List generateTasks(List tableConfigs) { continue; } - String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(extractRawTableName(realtimeTableName)); + String offlineTableName = + TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); Set offlineTableSegmentNames = new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, false)); From b8e0daf17b86efe09f817ee4a28dc80706c58725 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 10 Dec 2024 16:18:29 +0530 Subject: [PATCH 17/72] fix multiple consecuritve failure scenrio --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 7a936b78f00a..37d75df5d221 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -187,6 +187,7 @@ public List generateTasks(List tableConfigs) { : DEFAULT_MAX_NUM_RECORDS_PER_TASK; long minSegmentTime = Long.MAX_VALUE; + boolean prevMinionTaskSuccessful = true; while (true) { // Check that execution window is older than bufferTime @@ -217,10 +218,10 @@ public List generateTasks(List tableConfigs) { // it means there was an issue with prev minion task. And segment needs // to be re-processed. reProcessSegment = true; + prevMinionTaskSuccessful = false; break; } } - realtimeSegmentVsCorrespondingOfflineSegmentMap.remove(segmentName); if (reProcessSegment) { // data is inconsistent, delete the corresponding offline segments immediately. // TODO: check if can do atomic push @@ -293,6 +294,10 @@ public List generateTasks(List tableConfigs) { // update the watermark long newWatermarkMs = (minSegmentTime / bucketMs) * bucketMs; realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); + if (prevMinionTaskSuccessful) { + // if there were no segments which needed to be reProcessed, we can remove last minion run lineage state. + realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap().clear(); + } try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, From 30a94590c02a45b26fdc02bf265814afd00c6758 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 01:44:43 +0530 Subject: [PATCH 18/72] refactoring and clean up --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 95 +++++++++++++------ ...RealtimeToOfflineSegmentsTaskExecutor.java | 15 +-- ...ealtimeToOfflineSegmentsTaskGenerator.java | 40 ++++++-- 3 files changed, 110 insertions(+), 40 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index fbe0d9050ecd..cc133dc3860c 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -18,11 +18,11 @@ */ package org.apache.pinot.common.minion; +import com.google.common.base.Preconditions; +import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.helix.zookeeper.datamodel.ZNRecord; @@ -47,31 +47,42 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WATERMARK_KEY = "watermarkMs"; - private static final String SEGMENT_NAME_SEPARATOR = ","; + private static final String TASK_ID_KEY = "taskID"; + private static final String COMMA_SEPARATOR = ","; private final String _tableNameWithType; private long _watermarkMs; - private final Map> _realtimeSegmentVsCorrespondingOfflineSegmentMap; + private final List _expectedRealtimeToOfflineSegmentsMapList; + private final String _taskId; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { + _watermarkMs = watermarkMs; + _tableNameWithType = tableNameWithType; + _taskId = null; + _expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + } + + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, String taskId) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; - _realtimeSegmentVsCorrespondingOfflineSegmentMap = new HashMap<>(); + _expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + _taskId = taskId; } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, - Map> realtimeSegmentVsCorrespondingOfflineSegment) { + String taskId, List expectedRealtimeToOfflineSegmentsMapList) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; - _realtimeSegmentVsCorrespondingOfflineSegmentMap = realtimeSegmentVsCorrespondingOfflineSegment; + _expectedRealtimeToOfflineSegmentsMapList = expectedRealtimeToOfflineSegmentsMapList; + _taskId = taskId; } public String getTableNameWithType() { return _tableNameWithType; } - public Map> getRealtimeSegmentVsCorrespondingOfflineSegmentMap() { - return _realtimeSegmentVsCorrespondingOfflineSegmentMap; + public List getExpectedRealtimeToOfflineSegmentsMapList() { + return _expectedRealtimeToOfflineSegmentsMapList; } public void setWatermarkMs(long watermarkMs) { @@ -85,33 +96,63 @@ public long getWatermarkMs() { return _watermarkMs; } + public String getTaskId() { + return _taskId; + } + public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long watermark = znRecord.getLongField(WATERMARK_KEY, 0); - Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = new HashMap<>(); - Map fields = znRecord.getSimpleFields(); - for (Map.Entry entry : fields.entrySet()) { - if (entry.getKey().equals(WATERMARK_KEY)) { - continue; - } - String segmentFrom = entry.getKey(); - String segmentsTo = entry.getValue(); - List segmentsToList = - Arrays.stream(StringUtils.split(segmentsTo, SEGMENT_NAME_SEPARATOR)) - .map(String::trim).collect(Collectors.toList()); - realtimeSegmentVsCorrespondingOfflineSegmentMap.put(segmentFrom, segmentsToList); + String taskID = znRecord.getSimpleField(TASK_ID_KEY); + List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + Map> listFields = znRecord.getListFields(); + for (Map.Entry> listField : listFields.entrySet()) { + String subtaskID = listField.getKey(); + List value = listField.getValue(); + Preconditions.checkState(value.size() == 2); + List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); + List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); + expectedRealtimeToOfflineSegmentsMapList.add( + new RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, subtaskID)); } return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, - realtimeSegmentVsCorrespondingOfflineSegmentMap); + taskID, expectedRealtimeToOfflineSegmentsMapList); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); - for (Map.Entry> entry : _realtimeSegmentVsCorrespondingOfflineSegmentMap.entrySet()) { - String segmentFrom = entry.getKey(); - List segmentTo = entry.getValue(); - znRecord.setSimpleField(segmentFrom, StringUtils.join(segmentTo, SEGMENT_NAME_SEPARATOR)); - } znRecord.setLongField(WATERMARK_KEY, _watermarkMs); + znRecord.setSimpleField(TASK_ID_KEY, _taskId); + for (RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap : _expectedRealtimeToOfflineSegmentsMapList) { + String segmentsFrom = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsFrom()); + String segmentsTo = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsTo()); + String subtaskID = realtimeToOfflineSegmentsMap.getSubtaskId(); + List listEntry = Arrays.asList(segmentsFrom, segmentsTo); + znRecord.setListField(subtaskID, listEntry); + } return znRecord; } + + public static class RealtimeToOfflineSegmentsMap { + private final List _segmentsFrom; + private final List _segmentsTo; + private final String _subtaskId; + + public RealtimeToOfflineSegmentsMap(List segmentsFrom, List segmentsTo, String subtaskId) { + _segmentsFrom = segmentsFrom; + _segmentsTo = segmentsTo; + _subtaskId = subtaskId; + } + + public String getSubtaskId() { + return _subtaskId; + } + + public List getSegmentsFrom() { + return _segmentsFrom; + } + + public List getSegmentsTo() { + return _segmentsTo; + } + } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 354ec1f15d9c..d0ff72f93aec 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -212,8 +212,9 @@ protected void preUploadSegments(SegmentUploadContext context) RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = - realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap(); + List + expectedRealtimeToOfflineSegmentsMapList = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); List segmentsFrom = Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) @@ -223,10 +224,12 @@ protected void preUploadSegments(SegmentUploadContext context) context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) .collect(Collectors.toList()); - for (String segmentFrom : segmentsFrom) { - Preconditions.checkState(!realtimeSegmentVsCorrespondingOfflineSegmentMap.containsKey(segmentFrom)); - realtimeSegmentVsCorrespondingOfflineSegmentMap.put(segmentFrom, segmentsTo); - } + PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); + + RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap = + new RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, ""); + + expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); try { _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 37d75df5d221..07d78eee3030 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -140,7 +140,7 @@ public List generateTasks(List tableConfigs) { String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); Set offlineTableSegmentNames = - new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, false)); + new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); TableTaskConfig tableTaskConfig = tableConfig.getTaskConfig(); Preconditions.checkState(tableTaskConfig != null); @@ -161,8 +161,6 @@ public List generateTasks(List tableConfigs) { int expectedVersion = realtimeToOfflineZNRecord != null ? realtimeToOfflineZNRecord.getVersion() : -1; RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); - Map> realtimeSegmentVsCorrespondingOfflineSegmentMap = - realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap(); // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. @@ -189,6 +187,13 @@ public List generateTasks(List tableConfigs) { long minSegmentTime = Long.MAX_VALUE; boolean prevMinionTaskSuccessful = true; + List + expectedRealtimeToOfflineSegmentsMapList = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); + Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = + getRealtimeSegmentNameVsCorrespondingOfflineSegmentNames(realtimeToOfflineSegmentsTaskMetadata.getTaskId(), + expectedRealtimeToOfflineSegmentsMapList); + while (true) { // Check that execution window is older than bufferTime if (windowEndMs > System.currentTimeMillis() - bufferMs) { @@ -208,9 +213,9 @@ public List generateTasks(List tableConfigs) { long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); boolean reProcessSegment = false; - if (realtimeSegmentVsCorrespondingOfflineSegmentMap.containsKey(segmentName)) { + if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { List expectedCorrespondingOfflineSegments = - realtimeSegmentVsCorrespondingOfflineSegmentMap.get(segmentName); + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { if (!offlineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { @@ -295,8 +300,9 @@ public List generateTasks(List tableConfigs) { long newWatermarkMs = (minSegmentTime / bucketMs) * bucketMs; realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); if (prevMinionTaskSuccessful) { - // if there were no segments which needed to be reProcessed, we can remove last minion run lineage state. - realtimeToOfflineSegmentsTaskMetadata.getRealtimeSegmentVsCorrespondingOfflineSegmentMap().clear(); + // if there were no segments which needed to be reProcessed, we can remove the previous minion run expected + // results. + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList().clear(); } try { _clusterInfoAccessor @@ -317,6 +323,26 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } + private Map> getRealtimeSegmentNameVsCorrespondingOfflineSegmentNames( + String taskId, + List expectedRealtimeToOfflineSegmentsMapList) { + Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames = new HashMap<>(); + + for (RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap : + expectedRealtimeToOfflineSegmentsMapList) { + List segmentsFrom = realtimeToOfflineSegmentsMap.getSegmentsFrom(); + List segmentsTo = realtimeToOfflineSegmentsMap.getSegmentsTo(); + for (String segmentFrom : segmentsFrom) { + Preconditions.checkState(!realtimeSegmentNameVsCorrespondingOfflineSegmentNames.containsKey(segmentFrom), + "Realtime segment: {} was picked by multiple subtasks in the previous minion run with task id: {}", + segmentFrom, taskId); + realtimeSegmentNameVsCorrespondingOfflineSegmentNames.put(segmentFrom, segmentsTo); + } + } + + return realtimeSegmentNameVsCorrespondingOfflineSegmentNames; + } + /** * Fetch completed (DONE/UPLOADED) segment and partition information * From d29af8d5d832bc1f4fcc6bea6019d046b115379e Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 01:55:42 +0530 Subject: [PATCH 19/72] refactoring --- ...RealtimeToOfflineSegmentsTaskExecutor.java | 92 +++++++++++-------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index d0ff72f93aec..d9359643d80d 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -47,6 +47,8 @@ import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.RecordReader; import org.apache.pinot.spi.utils.builder.TableNameBuilder; +import org.apache.pinot.spi.utils.retry.RetryPolicies; +import org.apache.pinot.spi.utils.retry.RetryPolicy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -73,6 +75,7 @@ */ public class RealtimeToOfflineSegmentsTaskExecutor extends BaseMultipleSegmentsConversionExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeToOfflineSegmentsTaskExecutor.class); + private static final RetryPolicy DEFAULT_RETRY_POLICY = RetryPolicies.exponentialBackoffRetryPolicy(5, 1000L, 2.0f); private final MinionTaskZkMetadataManager _minionTaskZkMetadataManager; @@ -203,45 +206,58 @@ protected void preUploadSegments(SegmentUploadContext context) throws Exception { super.preUploadSegments(context); String realtimeTableName = context.getTableNameWithType(); - while (true) { - ZNRecord realtimeToOfflineSegmentsTaskZNRecord = - _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, - RealtimeToOfflineSegmentsTask.TASK_TYPE); - int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = - RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - - List - expectedRealtimeToOfflineSegmentsMapList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); - - List segmentsFrom = - Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) - .map(String::trim).collect(Collectors.toList()); - - List segmentsTo = - context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) - .collect(Collectors.toList()); - - PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); - - RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap = - new RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, ""); - - expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); - - try { - _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, - RealtimeToOfflineSegmentsTask.TASK_TYPE, - expectedVersion); - break; - } catch (ZkBadVersionException e) { - LOGGER.info( - "Version changed while updating num of subtasks left in RTO task metadata for table: {}, Retrying.", - realtimeTableName); - } + int attemptCount; + try { + attemptCount = DEFAULT_RETRY_POLICY.attempt(() -> { + ZNRecord realtimeToOfflineSegmentsTaskZNRecord = + _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, + RealtimeToOfflineSegmentsTask.TASK_TYPE); + int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); + + List + expectedRealtimeToOfflineSegmentsMapList = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); + + List segmentsFrom = + Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) + .map(String::trim).collect(Collectors.toList()); + + List segmentsTo = + context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) + .collect(Collectors.toList()); + + PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); + + RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap = + new RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, ""); + + expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); + + try { + _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, + RealtimeToOfflineSegmentsTask.TASK_TYPE, + expectedVersion); + return true; + } catch (ZkBadVersionException e) { + LOGGER.info( + "Version changed while updating num of subtasks left in RTO task metadata for table: {}, Retrying.", + realtimeTableName); + return false; + } + }); + } catch (Exception e) { + String errorMsg = + String.format("Failed to update the sRealtimeToOfflineSegmentsTaskMetadata during preUploadSegments. " + + "(tableName = %s)", realtimeTableName); + LOGGER.error(errorMsg, e); + throw new RuntimeException(errorMsg, e); } + LOGGER.info( + "Successfully updated the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments for table: {}", + realtimeTableName); } @Override From 8bcbe0f5c5c73984ef6a13c1211c951fd94714c7 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 03:16:15 +0530 Subject: [PATCH 20/72] refactoring --- ...ExpectedRealtimeOfflineTaskResultInfo.java | 66 +++++++++++++++ ...RealtimeToOfflineSegmentsTaskMetadata.java | 83 ++++++------------- ...RealtimeToOfflineSegmentsTaskExecutor.java | 18 ++-- ...ealtimeToOfflineSegmentsTaskGenerator.java | 70 ++++++++++------ 4 files changed, 145 insertions(+), 92 deletions(-) create mode 100644 pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java new file mode 100644 index 000000000000..70b29293eb90 --- /dev/null +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.minion; + +import java.util.List; +import java.util.UUID; + + +/** + * ExpectedRealtimeOfflineTaskResultInfo is created in + * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskExecutor} + * before uploading offline segment(s) to the offline table. + */ +public class ExpectedRealtimeOfflineTaskResultInfo { + private final List _segmentsFrom; + private final List _segmentsTo; + private final String _id; + private final String _taskID; + + public ExpectedRealtimeOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String taskID) { + _segmentsFrom = segmentsFrom; + _segmentsTo = segmentsTo; + _taskID = taskID; + _id = UUID.randomUUID().toString(); + } + + public ExpectedRealtimeOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, + String realtimeToOfflineSegmentsMapId, String taskID) { + _segmentsFrom = segmentsFrom; + _segmentsTo = segmentsTo; + _id = realtimeToOfflineSegmentsMapId; + _taskID = taskID; + } + + public String getTaskID() { + return _taskID; + } + + public String getId() { + return _id; + } + + public List getSegmentsFrom() { + return _segmentsFrom; + } + + public List getSegmentsTo() { + return _segmentsTo; + } +} diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index cc133dc3860c..15140f7ea70b 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -30,6 +30,10 @@ /** * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. * The watermarkMs denotes the time (exclusive) upto which tasks have been executed. + * The expectedRealtimeToOfflineSegmentsTaskResultList denotes the expected RTO tasks result info. + * This list can contain both completed and in-completed Tasks expected Results. This list is used by + * generator to validate whether a potential segment (for RTO task) has already been successfully + * processed as a RTO task in the past or not. * * This gets serialized and stored in zookeeper under the path * MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask @@ -41,48 +45,38 @@ * * PinotTaskExecutor: * The same watermark is used by the RealtimeToOfflineSegmentsTaskExecutor, to: - * - Verify that is is running the latest task scheduled by the task generator - * - Update the watermark as the end of the window that it executed for + * - Verify that it's running the latest task scheduled by the task generator. + * - The ExpectedRealtimeToOfflineSegmentsTaskResultList is updated before the offline segments + * are uploaded to the table. */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WATERMARK_KEY = "watermarkMs"; - private static final String TASK_ID_KEY = "taskID"; private static final String COMMA_SEPARATOR = ","; private final String _tableNameWithType; private long _watermarkMs; - private final List _expectedRealtimeToOfflineSegmentsMapList; - private final String _taskId; + private final List _expectedRealtimeToOfflineSegmentsTaskResultList; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { _watermarkMs = watermarkMs; _tableNameWithType = tableNameWithType; - _taskId = null; - _expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); - } - - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, String taskId) { - _tableNameWithType = tableNameWithType; - _watermarkMs = watermarkMs; - _expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); - _taskId = taskId; + _expectedRealtimeToOfflineSegmentsTaskResultList = new ArrayList<>(); } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, - String taskId, List expectedRealtimeToOfflineSegmentsMapList) { + List expectedRealtimeToOfflineSegmentsMapList) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; - _expectedRealtimeToOfflineSegmentsMapList = expectedRealtimeToOfflineSegmentsMapList; - _taskId = taskId; + _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineSegmentsMapList; } public String getTableNameWithType() { return _tableNameWithType; } - public List getExpectedRealtimeToOfflineSegmentsMapList() { - return _expectedRealtimeToOfflineSegmentsMapList; + public List getExpectedRealtimeToOfflineSegmentsTaskResultList() { + return _expectedRealtimeToOfflineSegmentsTaskResultList; } public void setWatermarkMs(long watermarkMs) { @@ -96,63 +90,36 @@ public long getWatermarkMs() { return _watermarkMs; } - public String getTaskId() { - return _taskId; - } - public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long watermark = znRecord.getLongField(WATERMARK_KEY, 0); - String taskID = znRecord.getSimpleField(TASK_ID_KEY); - List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); Map> listFields = znRecord.getListFields(); for (Map.Entry> listField : listFields.entrySet()) { - String subtaskID = listField.getKey(); + String realtimeToOfflineSegmentsMapId = listField.getKey(); List value = listField.getValue(); - Preconditions.checkState(value.size() == 2); + Preconditions.checkState(value.size() == 3); List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); + String taskID = value.get(2); expectedRealtimeToOfflineSegmentsMapList.add( - new RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, subtaskID)); + new ExpectedRealtimeOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID)); } return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, - taskID, expectedRealtimeToOfflineSegmentsMapList); + expectedRealtimeToOfflineSegmentsMapList); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); znRecord.setLongField(WATERMARK_KEY, _watermarkMs); - znRecord.setSimpleField(TASK_ID_KEY, _taskId); - for (RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap : _expectedRealtimeToOfflineSegmentsMapList) { + for (ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap + : _expectedRealtimeToOfflineSegmentsTaskResultList) { String segmentsFrom = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsFrom()); String segmentsTo = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsTo()); - String subtaskID = realtimeToOfflineSegmentsMap.getSubtaskId(); - List listEntry = Arrays.asList(segmentsFrom, segmentsTo); - znRecord.setListField(subtaskID, listEntry); + String taskId = realtimeToOfflineSegmentsMap.getTaskID(); + String realtimeToOfflineSegmentsMapId = realtimeToOfflineSegmentsMap.getId(); + List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId); + znRecord.setListField(realtimeToOfflineSegmentsMapId, listEntry); } return znRecord; } - - public static class RealtimeToOfflineSegmentsMap { - private final List _segmentsFrom; - private final List _segmentsTo; - private final String _subtaskId; - - public RealtimeToOfflineSegmentsMap(List segmentsFrom, List segmentsTo, String subtaskId) { - _segmentsFrom = segmentsFrom; - _segmentsTo = segmentsTo; - _subtaskId = subtaskId; - } - - public String getSubtaskId() { - return _subtaskId; - } - - public List getSegmentsFrom() { - return _segmentsFrom; - } - - public List getSegmentsTo() { - return _segmentsTo; - } - } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index d9359643d80d..1b03dec5992d 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -30,6 +30,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; +import org.apache.pinot.common.minion.ExpectedRealtimeOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; import org.apache.pinot.core.common.MinionConstants.RealtimeToOfflineSegmentsTask; @@ -199,7 +200,8 @@ protected List convert(PinotTaskConfig pinotTaskConfig, * Fetches the RealtimeToOfflineSegmentsTask metadata ZNode for the realtime table. * Before uploading the segments, updates the metadata with the expected results * of the successful execution of current subtask. - * The expected result updated in metadata is read by the next iteration of Task Generator. + * The expected result updated in metadata is read by the next iteration of Task Generator + * to ensure data correctness. */ @Override protected void preUploadSegments(SegmentUploadContext context) @@ -217,9 +219,9 @@ protected void preUploadSegments(SegmentUploadContext context) RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - List + List expectedRealtimeToOfflineSegmentsMapList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); List segmentsFrom = Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) @@ -231,8 +233,9 @@ protected void preUploadSegments(SegmentUploadContext context) PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); - RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap = - new RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap(segmentsFrom, segmentsTo, ""); + ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap = + new ExpectedRealtimeOfflineTaskResultInfo(segmentsFrom, segmentsTo, + pinotTaskConfig.getTaskId()); expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); @@ -256,8 +259,9 @@ protected void preUploadSegments(SegmentUploadContext context) throw new RuntimeException(errorMsg, e); } LOGGER.info( - "Successfully updated the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments for table: {}", - realtimeTableName); + "Successfully updated the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments for table: {}, " + + "attemptCount: {}", + realtimeTableName, attemptCount); } @Override diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 07d78eee3030..6dba9fca8f80 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -31,6 +31,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; +import org.apache.pinot.common.minion.ExpectedRealtimeOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; import org.apache.pinot.controller.helix.core.minion.generator.BaseTaskGenerator; @@ -187,12 +188,12 @@ public List generateTasks(List tableConfigs) { long minSegmentTime = Long.MAX_VALUE; boolean prevMinionTaskSuccessful = true; - List - expectedRealtimeToOfflineSegmentsMapList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList(); + // get past minion task runs expected results. This list can have both successful and + // failed task's expected results. + List expectedRealtimeToOfflineSegmentsMapList = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = - getRealtimeSegmentNameVsCorrespondingOfflineSegmentNames(realtimeToOfflineSegmentsTaskMetadata.getTaskId(), - expectedRealtimeToOfflineSegmentsMapList); + getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineSegmentsMapList); while (true) { // Check that execution window is older than bufferTime @@ -217,22 +218,18 @@ public List generateTasks(List tableConfigs) { List expectedCorrespondingOfflineSegments = realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); - for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { - if (!offlineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { - // If not all corresponding offline segments to a realtime segment exists, - // it means there was an issue with prev minion task. And segment needs - // to be re-processed. - reProcessSegment = true; - prevMinionTaskSuccessful = false; - break; - } - } + reProcessSegment = + checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, offlineTableSegmentNames); + if (reProcessSegment) { + prevMinionTaskSuccessful = false; // data is inconsistent, delete the corresponding offline segments immediately. // TODO: check if can do atomic push _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, expectedCorrespondingOfflineSegments); } else { + // segment has been picked in one of the past tasks and the task was successful. + // move to next potential segment continue; } } @@ -248,7 +245,8 @@ public List generateTasks(List tableConfigs) { skipGenerate = true; break; } else if (reProcessSegment) { - throw new RuntimeException("Segment needs to be reProcessed and shouldn't be skipped"); + throw new RuntimeException( + "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); } minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); @@ -264,7 +262,8 @@ public List generateTasks(List tableConfigs) { downloadURLs = new ArrayList<>(); } } else if (reProcessSegment) { - throw new RuntimeException("Segment needs to be reProcessed and should lie under bucket range."); + throw new RuntimeException( + "Segment: " + segmentName + " needs to be reProcessed and should lie under bucket range."); } if ((!segmentNames.isEmpty()) @@ -300,9 +299,10 @@ public List generateTasks(List tableConfigs) { long newWatermarkMs = (minSegmentTime / bucketMs) * bucketMs; realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); if (prevMinionTaskSuccessful) { - // if there were no segments which needed to be reProcessed, we can remove the previous minion run expected - // results. - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsMapList().clear(); + // if there were no segments which needed to be reProcessed, we can remove the past minion runs expected + // results. The past minion runs expected result list should either be cleared all or none so that + // the same segment is not picked again in-case of consecutive minion task failures. + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); } try { _clusterInfoAccessor @@ -323,19 +323,35 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } - private Map> getRealtimeSegmentNameVsCorrespondingOfflineSegmentNames( - String taskId, - List expectedRealtimeToOfflineSegmentsMapList) { - Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames = new HashMap<>(); + private boolean checkIfSegmentNeedsToBeReProcessed(List expectedCorrespondingOfflineSegments, + Set offlineTableSegmentNames) { + + for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { + if (!offlineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { + // If not all corresponding offline segments to a realtime segment exists, + // it means there was an issue with prev minion task. And segment needs + // to be re-processed. + return true; + } + } + + return false; + } + + private Map> getRealtimeVsCorrespondingOfflineSegmentNames( + List + expectedRealtimeToOfflineSegmentsMapList) { + Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames + = new HashMap<>(); - for (RealtimeToOfflineSegmentsTaskMetadata.RealtimeToOfflineSegmentsMap realtimeToOfflineSegmentsMap : - expectedRealtimeToOfflineSegmentsMapList) { + for (ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap + : expectedRealtimeToOfflineSegmentsMapList) { List segmentsFrom = realtimeToOfflineSegmentsMap.getSegmentsFrom(); List segmentsTo = realtimeToOfflineSegmentsMap.getSegmentsTo(); for (String segmentFrom : segmentsFrom) { Preconditions.checkState(!realtimeSegmentNameVsCorrespondingOfflineSegmentNames.containsKey(segmentFrom), "Realtime segment: {} was picked by multiple subtasks in the previous minion run with task id: {}", - segmentFrom, taskId); + segmentFrom, realtimeToOfflineSegmentsMap.getTaskID()); realtimeSegmentNameVsCorrespondingOfflineSegmentNames.put(segmentFrom, segmentsTo); } } From 01381a2cffeb52152236b4e058408c931cd2b998 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 12:15:03 +0530 Subject: [PATCH 21/72] fixes tests --- ...imeToOfflineSegmentsTaskGeneratorTest.java | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 49a9fd8d57d3..c4d4bee144af 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -20,6 +20,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -61,6 +62,7 @@ public class RealtimeToOfflineSegmentsTaskGeneratorTest { private static final String RAW_TABLE_NAME = "testTable"; private static final String REALTIME_TABLE_NAME = "testTable_REALTIME"; + private static final String OFFLINE_TABLE_NAME = "testTable_OFFLINE"; private static final String TIME_COLUMN_NAME = "millisSinceEpoch"; private final Map _streamConfigs = new HashMap<>(); @@ -95,6 +97,11 @@ public void testGenerateTasksCheckConfigs() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)) .thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); generator.init(mockClusterInfoProvide); @@ -151,6 +158,11 @@ public void testGenerateTasksSimultaneousConstraints() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)) .thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); generator.init(mockClusterInfoProvide); @@ -244,6 +256,11 @@ public void testGenerateTasksNoMinionMetadata() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)).thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata1.getSegmentName(), segmentZKMetadata2.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + // StartTime calculated using segment metadata Map> taskConfigsMap = new HashMap<>(); taskConfigsMap.put(RealtimeToOfflineSegmentsTask.TASK_TYPE, new HashMap<>()); @@ -305,6 +322,11 @@ public void testGenerateTasksWithMinionMetadata() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)).thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata1.getSegmentName(), segmentZKMetadata2.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + // Default configs Map> taskConfigsMap = new HashMap<>(); taskConfigsMap.put(RealtimeToOfflineSegmentsTask.TASK_TYPE, new HashMap<>()); @@ -403,6 +425,11 @@ public void testOverflowIntoConsuming() { RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); generator.init(mockClusterInfoProvide); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + // last COMPLETED segment's endTime is less than windowEnd time. CONSUMING segment overlap. Skip task List pinotTaskConfigs = generator.generateTasks(Lists.newArrayList(realtimeTableConfig)); assertTrue(pinotTaskConfigs.isEmpty()); @@ -453,6 +480,11 @@ public void testTimeGap() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)).thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); generator.init(mockClusterInfoProvide); @@ -486,6 +518,11 @@ public void testBuffer() { when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)).thenReturn(getIdealState(REALTIME_TABLE_NAME, Lists.newArrayList(segmentZKMetadata.getSegmentName()))); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn(new ArrayList<>()); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); generator.init(mockClusterInfoProvide); @@ -659,9 +696,9 @@ private SegmentZKMetadata getSegmentZKMetadata(String segmentName, Status status private IdealState getIdealState(String tableName, List segmentNames) { IdealState idealState = new IdealState(tableName); idealState.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED); - for (String segmentName: segmentNames) { + for (String segmentName : segmentNames) { idealState.setPartitionState(segmentName, "Server_0", "ONLINE"); } return idealState; } -} +} \ No newline at end of file From 75fb4ba69ee7b3e560000506a9798ca56a5c0180 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 12:38:17 +0530 Subject: [PATCH 22/72] nit --- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index c4d4bee144af..76970a1cc990 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -701,4 +701,4 @@ private IdealState getIdealState(String tableName, List segmentNames) { } return idealState; } -} \ No newline at end of file +} From 546f27e94cd090980b15915148e26d94dd4230c4 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Fri, 13 Dec 2024 12:39:07 +0530 Subject: [PATCH 23/72] nit --- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 76970a1cc990..17c90d42e3ad 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -696,7 +696,7 @@ private SegmentZKMetadata getSegmentZKMetadata(String segmentName, Status status private IdealState getIdealState(String tableName, List segmentNames) { IdealState idealState = new IdealState(tableName); idealState.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED); - for (String segmentName : segmentNames) { + for (String segmentName: segmentNames) { idealState.setPartitionState(segmentName, "Server_0", "ONLINE"); } return idealState; From fae4aaa80873a0f057fc6857ce703a69a6d8df06 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sat, 14 Dec 2024 00:14:00 +0530 Subject: [PATCH 24/72] nit --- ...=> ExpectedRealtimeToOfflineTaskResultInfo.java} | 6 +++--- .../RealtimeToOfflineSegmentsTaskMetadata.java | 13 +++++++------ .../RealtimeToOfflineSegmentsTaskExecutor.java | 8 ++++---- .../RealtimeToOfflineSegmentsTaskGenerator.java | 8 ++++---- 4 files changed, 18 insertions(+), 17 deletions(-) rename pinot-common/src/main/java/org/apache/pinot/common/minion/{ExpectedRealtimeOfflineTaskResultInfo.java => ExpectedRealtimeToOfflineTaskResultInfo.java} (87%) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java similarity index 87% rename from pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java rename to pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java index 70b29293eb90..e8379ca137a7 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java @@ -27,20 +27,20 @@ * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskExecutor} * before uploading offline segment(s) to the offline table. */ -public class ExpectedRealtimeOfflineTaskResultInfo { +public class ExpectedRealtimeToOfflineTaskResultInfo { private final List _segmentsFrom; private final List _segmentsTo; private final String _id; private final String _taskID; - public ExpectedRealtimeOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String taskID) { + public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String taskID) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; _taskID = taskID; _id = UUID.randomUUID().toString(); } - public ExpectedRealtimeOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, + public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String realtimeToOfflineSegmentsMapId, String taskID) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 15140f7ea70b..3e833936d129 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -56,7 +56,7 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private final String _tableNameWithType; private long _watermarkMs; - private final List _expectedRealtimeToOfflineSegmentsTaskResultList; + private final List _expectedRealtimeToOfflineSegmentsTaskResultList; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { _watermarkMs = watermarkMs; @@ -65,7 +65,7 @@ public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long wate } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, - List expectedRealtimeToOfflineSegmentsMapList) { + List expectedRealtimeToOfflineSegmentsMapList) { _tableNameWithType = tableNameWithType; _watermarkMs = watermarkMs; _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineSegmentsMapList; @@ -75,7 +75,7 @@ public String getTableNameWithType() { return _tableNameWithType; } - public List getExpectedRealtimeToOfflineSegmentsTaskResultList() { + public List getExpectedRealtimeToOfflineSegmentsTaskResultList() { return _expectedRealtimeToOfflineSegmentsTaskResultList; } @@ -92,7 +92,7 @@ public long getWatermarkMs() { public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long watermark = znRecord.getLongField(WATERMARK_KEY, 0); - List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); Map> listFields = znRecord.getListFields(); for (Map.Entry> listField : listFields.entrySet()) { String realtimeToOfflineSegmentsMapId = listField.getKey(); @@ -102,7 +102,8 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); String taskID = value.get(2); expectedRealtimeToOfflineSegmentsMapList.add( - new ExpectedRealtimeOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID)); + new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID) + ); } return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, expectedRealtimeToOfflineSegmentsMapList); @@ -111,7 +112,7 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); znRecord.setLongField(WATERMARK_KEY, _watermarkMs); - for (ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap + for (ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap : _expectedRealtimeToOfflineSegmentsTaskResultList) { String segmentsFrom = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsFrom()); String segmentsTo = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsTo()); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 1b03dec5992d..32595b931f26 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -30,7 +30,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; -import org.apache.pinot.common.minion.ExpectedRealtimeOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; import org.apache.pinot.core.common.MinionConstants.RealtimeToOfflineSegmentsTask; @@ -219,7 +219,7 @@ protected void preUploadSegments(SegmentUploadContext context) RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - List + List expectedRealtimeToOfflineSegmentsMapList = realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); @@ -233,8 +233,8 @@ protected void preUploadSegments(SegmentUploadContext context) PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); - ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap = - new ExpectedRealtimeOfflineTaskResultInfo(segmentsFrom, segmentsTo, + ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap = + new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, pinotTaskConfig.getTaskId()); expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 6dba9fca8f80..a9a507edde32 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -31,7 +31,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; -import org.apache.pinot.common.minion.ExpectedRealtimeOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; import org.apache.pinot.controller.helix.core.minion.generator.BaseTaskGenerator; @@ -190,7 +190,7 @@ public List generateTasks(List tableConfigs) { // get past minion task runs expected results. This list can have both successful and // failed task's expected results. - List expectedRealtimeToOfflineSegmentsMapList = + List expectedRealtimeToOfflineSegmentsMapList = realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineSegmentsMapList); @@ -339,12 +339,12 @@ private boolean checkIfSegmentNeedsToBeReProcessed(List expectedCorrespo } private Map> getRealtimeVsCorrespondingOfflineSegmentNames( - List + List expectedRealtimeToOfflineSegmentsMapList) { Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames = new HashMap<>(); - for (ExpectedRealtimeOfflineTaskResultInfo realtimeToOfflineSegmentsMap + for (ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap : expectedRealtimeToOfflineSegmentsMapList) { List segmentsFrom = realtimeToOfflineSegmentsMap.getSegmentsFrom(); List segmentsTo = realtimeToOfflineSegmentsMap.getSegmentsTo(); From 97e8c498ac7ace7909836f44c8c26c889e03885e Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sat, 14 Dec 2024 02:24:23 +0530 Subject: [PATCH 25/72] Fixes time window bug --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index a9a507edde32..4c634b31a016 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -235,7 +235,7 @@ public List generateTasks(List tableConfigs) { } // Check overlap with window - if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { + if ((segmentStartTimeMs >= windowStartMs) && (segmentStartTimeMs < windowEndMs)) { // If last completed segment is being used, make sure that segment crosses over end of window. // In the absence of this check, CONSUMING segments could contain some portion of the window. That data // would be skipped forever. From 4ddfbb1a8871249622831fa6a05aecee6536ab31 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 15 Dec 2024 01:13:19 +0530 Subject: [PATCH 26/72] refactoring --- ...RealtimeToOfflineSegmentsTaskExecutor.java | 64 +++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 32595b931f26..ab47de57f29e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -211,36 +211,16 @@ protected void preUploadSegments(SegmentUploadContext context) int attemptCount; try { attemptCount = DEFAULT_RETRY_POLICY.attempt(() -> { + ZNRecord realtimeToOfflineSegmentsTaskZNRecord = _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, RealtimeToOfflineSegmentsTask.TASK_TYPE); int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = - RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - - List - expectedRealtimeToOfflineSegmentsMapList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); - - List segmentsFrom = - Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) - .map(String::trim).collect(Collectors.toList()); - - List segmentsTo = - context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) - .collect(Collectors.toList()); - - PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); - - ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap = - new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, - pinotTaskConfig.getTaskId()); - - expectedRealtimeToOfflineSegmentsMapList.add(realtimeToOfflineSegmentsMap); - + RealtimeToOfflineSegmentsTaskMetadata updatedRealtimeToOfflineSegmentsTaskMetadata = + getUpdatedTaskMetadata(context, realtimeToOfflineSegmentsTaskZNRecord); try { - _minionTaskZkMetadataManager.setTaskMetadataZNRecord(realtimeToOfflineSegmentsTaskMetadata, + _minionTaskZkMetadataManager.setTaskMetadataZNRecord(updatedRealtimeToOfflineSegmentsTaskMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, expectedVersion); return true; @@ -253,7 +233,7 @@ protected void preUploadSegments(SegmentUploadContext context) }); } catch (Exception e) { String errorMsg = - String.format("Failed to update the sRealtimeToOfflineSegmentsTaskMetadata during preUploadSegments. " + String.format("Failed to update the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments. " + "(tableName = %s)", realtimeTableName); LOGGER.error(errorMsg, e); throw new RuntimeException(errorMsg, e); @@ -270,4 +250,38 @@ protected SegmentZKMetadataCustomMapModifier getSegmentZKMetadataCustomMapModifi return new SegmentZKMetadataCustomMapModifier(SegmentZKMetadataCustomMapModifier.ModifyMode.UPDATE, Collections.emptyMap()); } + + private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUploadContext context, + ZNRecord realtimeToOfflineSegmentsTaskZNRecord) { + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); + + List + expectedRealtimeToOfflineSegmentsMapList = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); + + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + getExpectedRealtimeToOfflineTaskResultInfo(context); + + expectedRealtimeToOfflineSegmentsMapList.add(expectedRealtimeToOfflineTaskResultInfo); + return realtimeToOfflineSegmentsTaskMetadata; + } + + private ExpectedRealtimeToOfflineTaskResultInfo getExpectedRealtimeToOfflineTaskResultInfo( + SegmentUploadContext context) { + + PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); + String taskId = pinotTaskConfig.getTaskId(); + + List segmentsFrom = + Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) + .map(String::trim).collect(Collectors.toList()); + + List segmentsTo = + context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) + .collect(Collectors.toList()); + + return new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, taskId); + } } From 7d3fa68c3519684c0a57bbc4dea6fabbf1d89538 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 15 Dec 2024 02:21:37 +0530 Subject: [PATCH 27/72] adds conditions for edge cases --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 69 ++++++++++++++----- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 4c634b31a016..d61a76a1042e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -29,7 +29,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.helix.task.TaskState; import org.apache.helix.zookeeper.datamodel.ZNRecord; -import org.apache.helix.zookeeper.zkclient.exception.ZkException; +import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; @@ -140,7 +140,7 @@ public List generateTasks(List tableConfigs) { String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); - Set offlineTableSegmentNames = + Set existingOfflineTableSegmentNames = new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); TableTaskConfig tableTaskConfig = tableConfig.getTaskConfig(); @@ -186,7 +186,7 @@ public List generateTasks(List tableConfigs) { : DEFAULT_MAX_NUM_RECORDS_PER_TASK; long minSegmentTime = Long.MAX_VALUE; - boolean prevMinionTaskSuccessful = true; + boolean isPrevMinionTaskSuccessful = true; // get past minion task runs expected results. This list can have both successful and // failed task's expected results. @@ -212,29 +212,42 @@ public List generateTasks(List tableConfigs) { String segmentName = segmentZKMetadata.getSegmentName(); long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); + // reProcessSegment denotes whether to reschedule a previous segment which was a + // part of a failed task. boolean reProcessSegment = false; if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { + // segment has been picked previously, check if offline segments generated by this segment + // exists in offline table List expectedCorrespondingOfflineSegments = realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); reProcessSegment = - checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, offlineTableSegmentNames); + checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, + existingOfflineTableSegmentNames); if (reProcessSegment) { - prevMinionTaskSuccessful = false; + isPrevMinionTaskSuccessful = false; + // In-case of partial failure of segments upload, // data is inconsistent, delete the corresponding offline segments immediately. - // TODO: check if can do atomic push - _clusterInfoAccessor.getPinotHelixResourceManager() - .deleteSegments(offlineTableName, expectedCorrespondingOfflineSegments); + // TODO: check if can do atomic push in executor + List segmentsToDelete = + getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); + + if (!segmentsToDelete.isEmpty()) { + _clusterInfoAccessor.getPinotHelixResourceManager() + .deleteSegments(offlineTableName, segmentsToDelete); + // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the + // actual state. But there is no need to update existingOfflineTableSegmentNames. + } } else { // segment has been picked in one of the past tasks and the task was successful. - // move to next potential segment + // move to the next potential segment continue; } } - // Check overlap with window + // Check overlap with window. if ((segmentStartTimeMs >= windowStartMs) && (segmentStartTimeMs < windowEndMs)) { // If last completed segment is being used, make sure that segment crosses over end of window. // In the absence of this check, CONSUMING segments could contain some portion of the window. That data @@ -243,10 +256,14 @@ public List generateTasks(List tableConfigs) { LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping task " + "generation: {}", segmentName, taskType); skipGenerate = true; - break; - } else if (reProcessSegment) { - throw new RuntimeException( + // Note: There can be segments which needs to be reprocessed. + // In these cases the window bucket time range will always remain same, + // i.e. watermark will never advance and + // above condition of skipGenerate = true; will never happen if there + // is any segments which needs to be reprocessed. + Preconditions.checkState(!reProcessSegment, "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); + break; } minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentName); @@ -276,6 +293,9 @@ public List generateTasks(List tableConfigs) { break; } + Preconditions.checkState(isPrevMinionTaskSuccessful, + "Prev minion task failed and bucket time window cannot be incremented"); + LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", taskType, windowStartMs, windowEndMs); windowStartMs = windowEndMs; @@ -283,6 +303,8 @@ public List generateTasks(List tableConfigs) { } if (skipGenerate) { + Preconditions.checkState(isPrevMinionTaskSuccessful, + "There are segment(s) which needs to be re-processed, current run can't be skipped."); continue; } @@ -296,20 +318,22 @@ public List generateTasks(List tableConfigs) { windowStartMs, windowEndMs, taskType)); } // update the watermark - long newWatermarkMs = (minSegmentTime / bucketMs) * bucketMs; - realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); - if (prevMinionTaskSuccessful) { + long newWatermarkMs = windowStartMs; + if (isPrevMinionTaskSuccessful) { + // It's certain that newWatermarkMs is equal to the watermark present in the zookeeper + Preconditions.checkState(newWatermarkMs == realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs()); // if there were no segments which needed to be reProcessed, we can remove the past minion runs expected // results. The past minion runs expected result list should either be cleared all or none so that // the same segment is not picked again in-case of consecutive minion task failures. realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); } + realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, expectedVersion); - } catch (ZkException e) { + } catch (ZkBadVersionException e) { LOGGER.error( "Version changed while updating RTO task metadata for table: {}, skip scheduling. There are " + "multiple task schedulers for the same table, need to investigate!", realtimeTableName); @@ -323,6 +347,17 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } + private List getSegmentsToDelete(List expectedCorrespondingOfflineSegments, + Set existingOfflineTableSegmentNames) { + List segmentsToDelete = new ArrayList<>(); + for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { + if (existingOfflineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { + segmentsToDelete.add(expectedCorrespondingOfflineSegment); + } + } + return segmentsToDelete; + } + private boolean checkIfSegmentNeedsToBeReProcessed(List expectedCorrespondingOfflineSegments, Set offlineTableSegmentNames) { From 19b83c6599f8ccbf7d84001d68d024ae564bab36 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 15 Dec 2024 02:33:19 +0530 Subject: [PATCH 28/72] nit --- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 17c90d42e3ad..6b11b041361e 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -308,6 +308,7 @@ public void testGenerateTasksNoMinionMetadata() { public void testGenerateTasksWithMinionMetadata() { ClusterInfoAccessor mockClusterInfoProvide = mock(ClusterInfoAccessor.class); when(mockClusterInfoProvide.getTaskStates(RealtimeToOfflineSegmentsTask.TASK_TYPE)).thenReturn(new HashMap<>()); + // why this test has watermark after min segment start time if it's a clod start? when(mockClusterInfoProvide .getMinionTaskMetadataZNRecord(RealtimeToOfflineSegmentsTask.TASK_TYPE, REALTIME_TABLE_NAME)).thenReturn( new RealtimeToOfflineSegmentsTaskMetadata(REALTIME_TABLE_NAME, 1590019200000L).toZNRecord()); // 21 May 2020 UTC From 68cc920b4a64573e50b0270b71b272a44644c582 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 15 Dec 2024 02:38:46 +0530 Subject: [PATCH 29/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 -- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index d61a76a1042e..18fa9ddec9eb 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -320,8 +320,6 @@ public List generateTasks(List tableConfigs) { // update the watermark long newWatermarkMs = windowStartMs; if (isPrevMinionTaskSuccessful) { - // It's certain that newWatermarkMs is equal to the watermark present in the zookeeper - Preconditions.checkState(newWatermarkMs == realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs()); // if there were no segments which needed to be reProcessed, we can remove the past minion runs expected // results. The past minion runs expected result list should either be cleared all or none so that // the same segment is not picked again in-case of consecutive minion task failures. diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 6b11b041361e..5b3820c3cc26 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -304,7 +304,7 @@ public void testGenerateTasksNoMinionMetadata() { /** * Tests for subsequent runs after cold start */ - @Test +// @Test public void testGenerateTasksWithMinionMetadata() { ClusterInfoAccessor mockClusterInfoProvide = mock(ClusterInfoAccessor.class); when(mockClusterInfoProvide.getTaskStates(RealtimeToOfflineSegmentsTask.TASK_TYPE)).thenReturn(new HashMap<>()); From 84f471abf98c318f314db0c05dfffed2b92ea7e1 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 15 Dec 2024 02:50:46 +0530 Subject: [PATCH 30/72] update --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 18fa9ddec9eb..660c63e07d2c 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -320,9 +320,9 @@ public List generateTasks(List tableConfigs) { // update the watermark long newWatermarkMs = windowStartMs; if (isPrevMinionTaskSuccessful) { - // if there were no segments which needed to be reProcessed, we can remove the past minion runs expected - // results. The past minion runs expected result list should either be cleared all or none so that - // the same segment is not picked again in-case of consecutive minion task failures. + // if there were no segments which needed to be reProcessed, only then we can remove the past minion runs + // expected results. The past minion runs expected result list should either be cleared all or left as it is + // so that the successful subtask's segments are not picked again in-case of consecutive minion task failures. realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); } realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); From e365e91d1605cd95746c6b5ca53ed748902507fb Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 02:20:28 +0530 Subject: [PATCH 31/72] revert time overlap change --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 660c63e07d2c..187f9b4ed44f 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -248,7 +248,7 @@ public List generateTasks(List tableConfigs) { } // Check overlap with window. - if ((segmentStartTimeMs >= windowStartMs) && (segmentStartTimeMs < windowEndMs)) { + if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { // If last completed segment is being used, make sure that segment crosses over end of window. // In the absence of this check, CONSUMING segments could contain some portion of the window. That data // would be skipped forever. From 1baf68e4897e65082b7c4ca6d0bc00bba0f3d628 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 03:39:22 +0530 Subject: [PATCH 32/72] fixes some edge cases --- ...eSegmentsMinionClusterIntegrationTest.java | 8 +- ...ealtimeToOfflineSegmentsTaskGenerator.java | 218 ++++++++++-------- ...imeToOfflineSegmentsTaskGeneratorTest.java | 3 +- 3 files changed, 133 insertions(+), 96 deletions(-) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index e6c8ce270030..6219d528f47b 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -228,7 +228,7 @@ public void testRealtimeToOfflineSegmentsTask() .map(ColumnPartitionConfig::getNumPartitions).reduce((a, b) -> a * b) .orElseThrow(() -> new RuntimeException("Expected accumulated result but not found.")) : 1; - long expectedWatermark = _dataSmallestTimeMs + 86400000; + long expectedWatermark = _dataSmallestTimeMs; for (int i = 0; i < 3; i++) { // Schedule task assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) @@ -245,7 +245,7 @@ public void testRealtimeToOfflineSegmentsTask() segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_offlineTableName); assertEquals(segmentsZKMetadata.size(), (numOfflineSegmentsPerTask * (i + 1))); - long expectedOfflineSegmentTimeMs = expectedWatermark - 86400000; + long expectedOfflineSegmentTimeMs = expectedWatermark; for (int j = (numOfflineSegmentsPerTask * i); j < segmentsZKMetadata.size(); j++) { SegmentZKMetadata segmentZKMetadata = segmentsZKMetadata.get(j); assertEquals(segmentZKMetadata.getStartTimeMs(), expectedOfflineSegmentTimeMs); @@ -279,7 +279,7 @@ public void testRealtimeToOfflineSegmentsMetadataPushTask() .map(ColumnPartitionConfig::getNumPartitions).reduce((a, b) -> a * b) .orElseThrow(() -> new RuntimeException("Expected accumulated result but not found.")) : 1; - long expectedWatermark = _dataSmallestMetadataTableTimeMs + 86400000; + long expectedWatermark = _dataSmallestMetadataTableTimeMs; _taskManager.cleanUpTask(); for (int i = 0; i < 3; i++) { // Schedule task @@ -297,7 +297,7 @@ public void testRealtimeToOfflineSegmentsMetadataPushTask() segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_offlineMetadataTableName); assertEquals(segmentsZKMetadata.size(), (numOfflineSegmentsPerTask * (i + 1))); - long expectedOfflineSegmentTimeMs = expectedWatermark - 86400000; + long expectedOfflineSegmentTimeMs = expectedWatermark; for (int j = (numOfflineSegmentsPerTask * i); j < segmentsZKMetadata.size(); j++) { SegmentZKMetadata segmentZKMetadata = segmentsZKMetadata.get(j); assertEquals(segmentZKMetadata.getStartTimeMs(), expectedOfflineSegmentTimeMs); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 187f9b4ed44f..dd0ed8c45963 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -195,116 +195,154 @@ public List generateTasks(List tableConfigs) { Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineSegmentsMapList); - while (true) { - // Check that execution window is older than bufferTime - if (windowEndMs > System.currentTimeMillis() - bufferMs) { - LOGGER.info( - "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. Skipping task " - + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); - skipGenerate = true; - break; + List segmentsToBeReProcessedList = new ArrayList<>(); + + for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + String segmentName = segmentZKMetadata.getSegmentName(); + // reProcessSegment denotes whether to reschedule a previous segment which was a + // part of a failed task. + boolean reProcessSegment; + + if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { + // segment has been picked previously, check if offline segments generated by this segment + // exists in offline table + List expectedCorrespondingOfflineSegments = + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); + + reProcessSegment = + checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, + existingOfflineTableSegmentNames); + + if (reProcessSegment) { + segmentsToBeReProcessedList.add(segmentZKMetadata); + isPrevMinionTaskSuccessful = false; + // In-case of partial failure of segments upload, + // data is inconsistent, delete the corresponding offline segments immediately. + // TODO: check if can do atomic push in executor + List segmentsToDelete = + getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); + + if (!segmentsToDelete.isEmpty()) { + _clusterInfoAccessor.getPinotHelixResourceManager() + .deleteSegments(offlineTableName, segmentsToDelete); + // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the + // actual state. But there is no need to update existingOfflineTableSegmentNames. + } + } } + } - for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); - segmentZkMetadataIndex++) { - SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); - - String segmentName = segmentZKMetadata.getSegmentName(); - long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); - long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); - // reProcessSegment denotes whether to reschedule a previous segment which was a - // part of a failed task. - boolean reProcessSegment = false; - - if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { - // segment has been picked previously, check if offline segments generated by this segment - // exists in offline table - List expectedCorrespondingOfflineSegments = - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); - - reProcessSegment = - checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, - existingOfflineTableSegmentNames); - - if (reProcessSegment) { - isPrevMinionTaskSuccessful = false; - // In-case of partial failure of segments upload, - // data is inconsistent, delete the corresponding offline segments immediately. - // TODO: check if can do atomic push in executor - List segmentsToDelete = - getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); - - if (!segmentsToDelete.isEmpty()) { - _clusterInfoAccessor.getPinotHelixResourceManager() - .deleteSegments(offlineTableName, segmentsToDelete); - // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the - // actual state. But there is no need to update existingOfflineTableSegmentNames. - } - } else { - // segment has been picked in one of the past tasks and the task was successful. - // move to the next potential segment - continue; - } + if (isPrevMinionTaskSuccessful) { + // if all offline segments of prev minion tasks were successfully uploaded, + // we can clear the state of prev minion tasks as now it's useless. + if (!expectedRealtimeToOfflineSegmentsMapList.isEmpty()) { + expectedRealtimeToOfflineSegmentsMapList.clear(); + // Todo: store windowEnd in metadata + windowStartMs = windowEndMs; + windowEndMs += bucketMs; + } + + while (true) { + // Check that execution window is older than bufferTime + if (windowEndMs > System.currentTimeMillis() - bufferMs) { + LOGGER.info( + "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. Skipping " + + "task " + + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); + skipGenerate = true; + break; } - // Check overlap with window. - if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { - // If last completed segment is being used, make sure that segment crosses over end of window. - // In the absence of this check, CONSUMING segments could contain some portion of the window. That data - // would be skipped forever. - if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { - LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping task " - + "generation: {}", segmentName, taskType); - skipGenerate = true; - // Note: There can be segments which needs to be reprocessed. - // In these cases the window bucket time range will always remain same, - // i.e. watermark will never advance and - // above condition of skipGenerate = true; will never happen if there - // is any segments which needs to be reprocessed. - Preconditions.checkState(!reProcessSegment, - "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); - break; + for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); + segmentZkMetadataIndex++) { + SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); + + String segmentName = segmentZKMetadata.getSegmentName(); + long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); + long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); + // reProcessSegment denotes whether to reschedule a previous segment which was a + // part of a failed task. +// boolean reProcessSegment = false; + + // Check overlap with window. + if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { + // If last completed segment is being used, make sure that segment crosses over end of window. + // In the absence of this check, CONSUMING segments could contain some portion of the window. That data + // would be skipped forever. + if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { + LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping task " + + "generation: {}", segmentName, taskType); + skipGenerate = true; + // Note: There can be segments which needs to be reprocessed. + // In these cases the window bucket time range will always remain same, + // i.e. watermark will never advance and + // above condition of skipGenerate = true; will never happen if there + // is any segments which needs to be reprocessed. +// Preconditions.checkState(!reProcessSegment, +// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); + break; + } + minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); + segmentNames.add(segmentName); + downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + + numRecordsPerTask += segmentZKMetadata.getTotalDocs(); + + if (numRecordsPerTask >= maxNumRecordsPerTask) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + numRecordsPerTask = 0; + segmentNames = new ArrayList<>(); + downloadURLs = new ArrayList<>(); + } } - minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); - segmentNames.add(segmentName); - downloadURLs.add(segmentZKMetadata.getDownloadUrl()); - - numRecordsPerTask += segmentZKMetadata.getTotalDocs(); - if (numRecordsPerTask >= maxNumRecordsPerTask) { + if ((!segmentNames.isEmpty()) + && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { segmentNamesGroupList.add(segmentNames); downloadURLsGroupList.add(downloadURLs); - numRecordsPerTask = 0; - segmentNames = new ArrayList<>(); - downloadURLs = new ArrayList<>(); } - } else if (reProcessSegment) { - throw new RuntimeException( - "Segment: " + segmentName + " needs to be reProcessed and should lie under bucket range."); } + if (skipGenerate || !segmentNamesGroupList.isEmpty()) { + break; + } + + Preconditions.checkState(isPrevMinionTaskSuccessful, + "Prev minion task failed and bucket time window cannot be incremented"); + + LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", + taskType, windowStartMs, windowEndMs); + windowStartMs = windowEndMs; + windowEndMs += bucketMs; + } + } else { + for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); + segmentZkMetadataIndex++) { + SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); + minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); + segmentNames.add(segmentZKMetadata.getSegmentName()); + downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + + numRecordsPerTask += segmentZKMetadata.getTotalDocs(); + if (numRecordsPerTask >= maxNumRecordsPerTask) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + numRecordsPerTask = 0; + segmentNames = new ArrayList<>(); + downloadURLs = new ArrayList<>(); + } if ((!segmentNames.isEmpty()) && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { segmentNamesGroupList.add(segmentNames); downloadURLsGroupList.add(downloadURLs); } } - if (skipGenerate || !segmentNamesGroupList.isEmpty()) { - break; - } - - Preconditions.checkState(isPrevMinionTaskSuccessful, - "Prev minion task failed and bucket time window cannot be incremented"); - - LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", - taskType, windowStartMs, windowEndMs); - windowStartMs = windowEndMs; - windowEndMs += bucketMs; } if (skipGenerate) { - Preconditions.checkState(isPrevMinionTaskSuccessful, - "There are segment(s) which needs to be re-processed, current run can't be skipped."); +// Preconditions.checkState(true, +// "There are segment(s) which needs to be re-processed, current run can't be skipped."); continue; } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 5b3820c3cc26..17c90d42e3ad 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -304,11 +304,10 @@ public void testGenerateTasksNoMinionMetadata() { /** * Tests for subsequent runs after cold start */ -// @Test + @Test public void testGenerateTasksWithMinionMetadata() { ClusterInfoAccessor mockClusterInfoProvide = mock(ClusterInfoAccessor.class); when(mockClusterInfoProvide.getTaskStates(RealtimeToOfflineSegmentsTask.TASK_TYPE)).thenReturn(new HashMap<>()); - // why this test has watermark after min segment start time if it's a clod start? when(mockClusterInfoProvide .getMinionTaskMetadataZNRecord(RealtimeToOfflineSegmentsTask.TASK_TYPE, REALTIME_TABLE_NAME)).thenReturn( new RealtimeToOfflineSegmentsTaskMetadata(REALTIME_TABLE_NAME, 1590019200000L).toZNRecord()); // 21 May 2020 UTC From 1754dbc5dfadaf05602aac16bef9bda40777adbc Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 18:51:13 +0530 Subject: [PATCH 33/72] refactoring --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 39 +- ...timeToOfflineSegmentsTaskMetadataTest.java | 2 +- ...eSegmentsMinionClusterIntegrationTest.java | 2 +- ...RealtimeToOfflineSegmentsTaskExecutor.java | 4 +- ...ealtimeToOfflineSegmentsTaskGenerator.java | 468 ++++++++++++------ 5 files changed, 339 insertions(+), 176 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 3e833936d129..d610603ffdd6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -51,24 +51,27 @@ */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { - private static final String WATERMARK_KEY = "watermarkMs"; + private static final String WINDOW_START_KEY = "windowStartMs"; + private static final String WINDOW_END_KEY = "windowEndMs"; private static final String COMMA_SEPARATOR = ","; private final String _tableNameWithType; - private long _watermarkMs; + private long _windowStartMs; private final List _expectedRealtimeToOfflineSegmentsTaskResultList; + private long _windowEndMs; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { - _watermarkMs = watermarkMs; + _windowStartMs = watermarkMs; _tableNameWithType = tableNameWithType; _expectedRealtimeToOfflineSegmentsTaskResultList = new ArrayList<>(); } - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs, - List expectedRealtimeToOfflineSegmentsMapList) { + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, + long windowEndMs, List expectedRealtimeToOfflineSegmentsMapList) { _tableNameWithType = tableNameWithType; - _watermarkMs = watermarkMs; + _windowStartMs = windowStartMs; _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineSegmentsMapList; + _windowEndMs = windowEndMs; } public String getTableNameWithType() { @@ -79,19 +82,28 @@ public List getExpectedRealtimeToOfflin return _expectedRealtimeToOfflineSegmentsTaskResultList; } - public void setWatermarkMs(long watermarkMs) { - _watermarkMs = watermarkMs; + public void setWindowStartMs(long windowStartMs) { + _windowStartMs = windowStartMs; } /** * Get the watermark in millis */ - public long getWatermarkMs() { - return _watermarkMs; + public long getWindowStartMs() { + return _windowStartMs; + } + + public long getWindowEndMs() { + return _windowEndMs; + } + + public void setWindowEndMs(long windowEndMs) { + _windowEndMs = windowEndMs; } public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { - long watermark = znRecord.getLongField(WATERMARK_KEY, 0); + long windowStartMs = znRecord.getLongField(WINDOW_START_KEY, 0); + long windowEndMs = znRecord.getLongField(WINDOW_END_KEY, 0); List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); Map> listFields = znRecord.getListFields(); for (Map.Entry> listField : listFields.entrySet()) { @@ -105,13 +117,14 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID) ); } - return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), watermark, + return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, expectedRealtimeToOfflineSegmentsMapList); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); - znRecord.setLongField(WATERMARK_KEY, _watermarkMs); + znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); + znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); for (ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap : _expectedRealtimeToOfflineSegmentsTaskResultList) { String segmentsFrom = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsFrom()); diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 4f109940fa2e..6d74091b3d9f 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -41,6 +41,6 @@ public void testToFromZNRecord() { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), "testTable_REALTIME"); - assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), 1000); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), 1000); } } diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index 6219d528f47b..a0b42740385e 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -332,7 +332,7 @@ private void waitForTaskToComplete(long expectedWatermark, String realtimeTableN RealtimeToOfflineSegmentsTaskMetadata minionTaskMetadata = znRecord != null ? RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord) : null; assertNotNull(minionTaskMetadata); - assertEquals(minionTaskMetadata.getWatermarkMs(), expectedWatermark); + assertEquals(minionTaskMetadata.getWindowStartMs(), expectedWatermark); } @AfterClass diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index ab47de57f29e..63577466bcb2 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -105,10 +105,10 @@ public void preProcess(PinotTaskConfig pinotTaskConfig) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); - Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs() <= windowStartMs, + Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs() <= windowStartMs, "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" + " configs for table: %s. ZNode may have been modified by another task", - realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(), windowStartMs, realtimeTableName); + realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), windowStartMs, realtimeTableName); } @Override diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index dd0ed8c45963..9f34e41122c4 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -138,11 +138,6 @@ public List generateTasks(List tableConfigs) { continue; } - String offlineTableName = - TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); - Set existingOfflineTableSegmentNames = - new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); - TableTaskConfig tableTaskConfig = tableConfig.getTaskConfig(); Preconditions.checkState(tableTaskConfig != null); Map taskConfigs = tableTaskConfig.getConfigsForTaskType(taskType); @@ -163,19 +158,21 @@ public List generateTasks(List tableConfigs) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); + long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); + long windowEndMs = windowStartMs + bucketMs; + // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. - long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWatermarkMs(); - long windowEndMs = windowStartMs + bucketMs; +// long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); +// long windowEndMs = windowStartMs + bucketMs; // Find all COMPLETED segments with data overlapping execution window: windowStart (inclusive) to windowEnd // (exclusive) - List segmentNames = new ArrayList<>(); - List downloadURLs = new ArrayList<>(); +// List segmentNames = new ArrayList<>(); +// List downloadURLs = new ArrayList<>(); Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); boolean skipGenerate = false; - long numRecordsPerTask = 0; List> segmentNamesGroupList = new ArrayList<>(); List> downloadURLsGroupList = new ArrayList<>(); @@ -185,8 +182,8 @@ public List generateTasks(List tableConfigs) { taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; - long minSegmentTime = Long.MAX_VALUE; - boolean isPrevMinionTaskSuccessful = true; +// long minSegmentTime = Long.MAX_VALUE; +// boolean prevMinionTaskSuccessful = true; // get past minion task runs expected results. This list can have both successful and // failed task's expected results. @@ -195,152 +192,127 @@ public List generateTasks(List tableConfigs) { Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineSegmentsMapList); - List segmentsToBeReProcessedList = new ArrayList<>(); + String offlineTableName = + TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); + Set existingOfflineTableSegmentNames = + new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); - for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { - String segmentName = segmentZKMetadata.getSegmentName(); - // reProcessSegment denotes whether to reschedule a previous segment which was a - // part of a failed task. - boolean reProcessSegment; - - if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { - // segment has been picked previously, check if offline segments generated by this segment - // exists in offline table - List expectedCorrespondingOfflineSegments = - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); - - reProcessSegment = - checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, - existingOfflineTableSegmentNames); - - if (reProcessSegment) { - segmentsToBeReProcessedList.add(segmentZKMetadata); - isPrevMinionTaskSuccessful = false; - // In-case of partial failure of segments upload, - // data is inconsistent, delete the corresponding offline segments immediately. - // TODO: check if can do atomic push in executor - List segmentsToDelete = - getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); - - if (!segmentsToDelete.isEmpty()) { - _clusterInfoAccessor.getPinotHelixResourceManager() - .deleteSegments(offlineTableName, segmentsToDelete); - // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the - // actual state. But there is no need to update existingOfflineTableSegmentNames. - } - } - } - } + List segmentsToBeReProcessedList = + getSegmentsToBeReprocessed(completedSegmentsZKMetadata, + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask, existingOfflineTableSegmentNames); + + boolean prevMinionTaskSuccessful = segmentsToBeReProcessedList.isEmpty(); - if (isPrevMinionTaskSuccessful) { + if (!prevMinionTaskSuccessful) { + deleteInvalidOfflineSegments(offlineTableName, segmentsToBeReProcessedList, existingOfflineTableSegmentNames, + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask); + divideSegmentsAmongSubtasks(segmentsToBeReProcessedList, segmentNamesGroupList, downloadURLsGroupList, + maxNumRecordsPerTask); + } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. if (!expectedRealtimeToOfflineSegmentsMapList.isEmpty()) { expectedRealtimeToOfflineSegmentsMapList.clear(); - // Todo: store windowEnd in metadata - windowStartMs = windowEndMs; - windowEndMs += bucketMs; - } - - while (true) { - // Check that execution window is older than bufferTime - if (windowEndMs > System.currentTimeMillis() - bufferMs) { - LOGGER.info( - "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. Skipping " - + "task " - + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); - skipGenerate = true; - break; - } - - for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); - segmentZkMetadataIndex++) { - SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); - - String segmentName = segmentZKMetadata.getSegmentName(); - long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); - long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); - // reProcessSegment denotes whether to reschedule a previous segment which was a - // part of a failed task. -// boolean reProcessSegment = false; - - // Check overlap with window. - if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { - // If last completed segment is being used, make sure that segment crosses over end of window. - // In the absence of this check, CONSUMING segments could contain some portion of the window. That data - // would be skipped forever. - if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { - LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping task " - + "generation: {}", segmentName, taskType); - skipGenerate = true; - // Note: There can be segments which needs to be reprocessed. - // In these cases the window bucket time range will always remain same, - // i.e. watermark will never advance and - // above condition of skipGenerate = true; will never happen if there - // is any segments which needs to be reprocessed. -// Preconditions.checkState(!reProcessSegment, -// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); - break; - } - minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); - segmentNames.add(segmentName); - downloadURLs.add(segmentZKMetadata.getDownloadUrl()); - - numRecordsPerTask += segmentZKMetadata.getTotalDocs(); - - if (numRecordsPerTask >= maxNumRecordsPerTask) { - segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); - numRecordsPerTask = 0; - segmentNames = new ArrayList<>(); - downloadURLs = new ArrayList<>(); - } - } - - if ((!segmentNames.isEmpty()) - && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { - segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); - } - } - if (skipGenerate || !segmentNamesGroupList.isEmpty()) { - break; - } - - Preconditions.checkState(isPrevMinionTaskSuccessful, - "Prev minion task failed and bucket time window cannot be incremented"); - - LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", - taskType, windowStartMs, windowEndMs); - windowStartMs = windowEndMs; - windowEndMs += bucketMs; - } - } else { - for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); - segmentZkMetadataIndex++) { - SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); - minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); - segmentNames.add(segmentZKMetadata.getSegmentName()); - downloadURLs.add(segmentZKMetadata.getDownloadUrl()); - - numRecordsPerTask += segmentZKMetadata.getTotalDocs(); - - if (numRecordsPerTask >= maxNumRecordsPerTask) { - segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); - numRecordsPerTask = 0; - segmentNames = new ArrayList<>(); - downloadURLs = new ArrayList<>(); - } - if ((!segmentNames.isEmpty()) - && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { - segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); - } + windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); } + windowEndMs = windowStartMs + bucketMs; + List segmentZKMetadataList = + generateNewSegmentsToProcess(completedSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, bufferMs, + bufferTimePeriod, + lastLLCSegmentPerPartition, realtimeToOfflineSegmentsTaskMetadata); + divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, downloadURLsGroupList, + maxNumRecordsPerTask); } - if (skipGenerate) { +// if (isPrevMinionTaskSuccessful) { +// // if all offline segments of prev minion tasks were successfully uploaded, +// // we can clear the state of prev minion tasks as now it's useless. +// if (!expectedRealtimeToOfflineSegmentsMapList.isEmpty()) { +// expectedRealtimeToOfflineSegmentsMapList.clear(); +// windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); +// windowEndMs = windowStartMs + bucketMs; +// } +// +//// while (true) { +//// // Check that execution window is older than bufferTime +//// if (windowEndMs > System.currentTimeMillis() - bufferMs) { +//// LOGGER.info( +//// "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. +// Skipping " +//// + "task " +//// + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); +//// skipGenerate = true; +//// break; +//// } +//// +//// for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); +//// segmentZkMetadataIndex++) { +//// SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); +//// +//// String segmentName = segmentZKMetadata.getSegmentName(); +//// long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); +//// long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); +//// // reProcessSegment denotes whether to reschedule a previous segment which was a +//// // part of a failed task. +////// boolean reProcessSegment = false; +//// +//// // Check overlap with window. +//// if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { +//// // If last completed segment is being used, make sure that segment crosses over end of window. +//// // In the absence of this check, CONSUMING segments could contain some portion of the window. That +// data +//// // would be skipped forever. +//// if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { +//// LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping +//// task " +//// + "generation: {}", segmentName, taskType); +//// skipGenerate = true; +//// // Note: There can be segments which needs to be reprocessed. +//// // In these cases the window bucket time range will always remain same, +//// // i.e. watermark will never advance and +//// // above condition of skipGenerate = true; will never happen if there +//// // is any segments which needs to be reprocessed. +////// Preconditions.checkState(!reProcessSegment, +////// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); +//// break; +//// } +//// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); +//// segmentNames.add(segmentName); +//// downloadURLs.add(segmentZKMetadata.getDownloadUrl()); +//// +//// numRecordsPerTask += segmentZKMetadata.getTotalDocs(); +//// +//// if (numRecordsPerTask >= maxNumRecordsPerTask) { +//// segmentNamesGroupList.add(segmentNames); +//// downloadURLsGroupList.add(downloadURLs); +//// numRecordsPerTask = 0; +//// segmentNames = new ArrayList<>(); +//// downloadURLs = new ArrayList<>(); +//// } +//// } +//// +//// if ((!segmentNames.isEmpty()) +//// && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { +//// segmentNamesGroupList.add(segmentNames); +//// downloadURLsGroupList.add(downloadURLs); +//// } +//// } +//// if (skipGenerate || !segmentNamesGroupList.isEmpty()) { +//// break; +//// } +//// +//// Preconditions.checkState(isPrevMinionTaskSuccessful, +//// "Prev minion task failed and bucket time window cannot be incremented"); +//// +//// LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time +// bucket", +//// taskType, windowStartMs, windowEndMs); +//// windowStartMs = windowEndMs; +//// windowEndMs += bucketMs; +//// } +// } + + if (segmentNamesGroupList.isEmpty()) { // Preconditions.checkState(true, // "There are segment(s) which needs to be re-processed, current run can't be skipped."); continue; @@ -353,17 +325,17 @@ public List generateTasks(List tableConfigs) { List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); pinotTaskConfigsForTable.add( createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, - windowStartMs, windowEndMs, taskType)); - } - // update the watermark - long newWatermarkMs = windowStartMs; - if (isPrevMinionTaskSuccessful) { - // if there were no segments which needed to be reProcessed, only then we can remove the past minion runs - // expected results. The past minion runs expected result list should either be cleared all or left as it is - // so that the successful subtask's segments are not picked again in-case of consecutive minion task failures. - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); + realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), + realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(), taskType)); } - realtimeToOfflineSegmentsTaskMetadata.setWatermarkMs(newWatermarkMs); +// if (prevMinionTaskSuccessful) { +// // if there were no segments which needed to be reProcessed, only then we can remove the past minion runs +// // expected results. The past minion runs expected result list should either be cleared all or left as it is +// // so that the successful subtask's segments are not picked again in-case of consecutive minion task failures. +// realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); +// realtimeToOfflineSegmentsTaskMetadata.setWindowStartMs(windowStartMs); +// realtimeToOfflineSegmentsTaskMetadata.setWindowEndMs(windowEndMs); +// } try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, @@ -383,6 +355,184 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } + private void deleteInvalidOfflineSegments(String offlineTableName, + List segmentsToBeReProcessedList, + Set existingOfflineTableSegmentNames, + Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask) { + Set segmentsToBeDeleted = new HashSet<>(); + for (SegmentZKMetadata segmentZKMetadata : segmentsToBeReProcessedList) { + String segmentName = segmentZKMetadata.getSegmentName(); + List expectedCorrespondingOfflineSegments = + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); + segmentsToBeDeleted.addAll( + getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); + } + if (!segmentsToBeDeleted.isEmpty()) { + _clusterInfoAccessor.getPinotHelixResourceManager() + .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); + // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the + // actual state. But there is no need to update existingOfflineTableSegmentNames. + } + } + + private List getSegmentsToBeReprocessed(List completedSegmentsZKMetadata, + Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask, + Set existingOfflineTableSegmentNames) { + List segmentsToBeReProcessedList = new ArrayList<>(); + + for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + String segmentName = segmentZKMetadata.getSegmentName(); + // reProcessSegment denotes whether to reschedule a previous segment which was a + // part of a failed task. + boolean reProcessSegment; + + if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { + // segment has been picked previously, check if offline segments generated by this segment + // exists in offline table + List expectedCorrespondingOfflineSegments = + realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); + + reProcessSegment = + checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, + existingOfflineTableSegmentNames); + + if (reProcessSegment) { + segmentsToBeReProcessedList.add(segmentZKMetadata); + // In-case of partial failure of segments upload, + // data is inconsistent, delete the corresponding offline segments immediately. + // TODO: check if can do atomic push in executor +// List segmentsToDelete = +// getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); +// +// if (!segmentsToDelete.isEmpty()) { +// _clusterInfoAccessor.getPinotHelixResourceManager() +// .deleteSegments(offlineTableName, segmentsToDelete); +// // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the +// // actual state. But there is no need to update existingOfflineTableSegmentNames. +// } + } + } + } + return segmentsToBeReProcessedList; + } + + private List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, + long windowStartMs, + long windowEndMs, long bucketMs, long bufferMs, String bufferTimePeriod, Set lastLLCSegmentPerPartition, + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { + + String taskType = RealtimeToOfflineSegmentsTask.TASK_TYPE; + List segmentZKMetadataList = new ArrayList<>(); + + boolean skipGenerate = false; + + while (true) { + // Check that execution window is older than bufferTime + if (windowEndMs > System.currentTimeMillis() - bufferMs) { + LOGGER.info( + "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. Skipping " + + "task " + + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); + return new ArrayList<>(); + } + + for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + String segmentName = segmentZKMetadata.getSegmentName(); + long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); + long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); + // reProcessSegment denotes whether to reschedule a previous segment which was a + // part of a failed task. +// boolean reProcessSegment = false; + + // Check overlap with window. + if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { + // If last completed segment is being used, make sure that segment crosses over end of window. + // In the absence of this check, CONSUMING segments could contain some portion of the window. That data + // would be skipped forever. + if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { + LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping task " + + "generation: {}", segmentName, taskType); + skipGenerate = true; + return new ArrayList<>(); + // Note: There can be segments which needs to be reprocessed. + // In these cases the window bucket time range will always remain same, + // i.e. watermark will never advance and + // above condition of skipGenerate = true; will never happen if there + // is any segments which needs to be reprocessed. +// Preconditions.checkState(!reProcessSegment, +// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); +// break; + } +// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); + segmentZKMetadataList.add(segmentZKMetadata); +// segmentNames.add(segmentName); +// downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + +// numRecordsPerTask += segmentZKMetadata.getTotalDocs(); +// +// if (numRecordsPerTask >= maxNumRecordsPerTask) { +// segmentNamesGroupList.add(segmentNames); +// downloadURLsGroupList.add(downloadURLs); +// numRecordsPerTask = 0; +// segmentNames = new ArrayList<>(); +// downloadURLs = new ArrayList<>(); +// } + } + +// if ((!segmentNames.isEmpty()) +// && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { +// segmentNamesGroupList.add(segmentNames); +// downloadURLsGroupList.add(downloadURLs); +// } + } + if (!segmentZKMetadataList.isEmpty()) { + break; + } + +// Preconditions.checkState(isPrevMinionTaskSuccessful, +// "Prev minion task failed and bucket time window cannot be incremented"); + + LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", + taskType, windowStartMs, windowEndMs); + windowStartMs = windowEndMs; + windowEndMs += bucketMs; + } + realtimeToOfflineSegmentsTaskMetadata.setWindowStartMs(windowStartMs); + realtimeToOfflineSegmentsTaskMetadata.setWindowEndMs(windowEndMs); + return segmentZKMetadataList; + } + + private void divideSegmentsAmongSubtasks(List segmentsToBeReProcessedList, + List> segmentNamesGroupList, List> downloadURLsGroupList, int maxNumRecordsPerTask) { + + long numRecordsPerTask = 0; + List segmentNames = new ArrayList<>(); + List downloadURLs = new ArrayList<>(); + + for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); + segmentZkMetadataIndex++) { + SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); +// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); + segmentNames.add(segmentZKMetadata.getSegmentName()); + downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + + numRecordsPerTask += segmentZKMetadata.getTotalDocs(); + + if (numRecordsPerTask >= maxNumRecordsPerTask) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + numRecordsPerTask = 0; + segmentNames = new ArrayList<>(); + downloadURLs = new ArrayList<>(); + } + if ((!segmentNames.isEmpty()) + && (segmentZkMetadataIndex == (segmentsToBeReProcessedList.size() - 1))) { + segmentNamesGroupList.add(segmentNames); + downloadURLsGroupList.add(downloadURLs); + } + } + } + private List getSegmentsToDelete(List expectedCorrespondingOfflineSegments, Set existingOfflineTableSegmentNames) { List segmentsToDelete = new ArrayList<>(); From ff4017e051983ea2cafc9edcb8f3bd9f6fa5e40c Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 20:05:45 +0530 Subject: [PATCH 34/72] fixes test --- .../metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 6d74091b3d9f..43a4844b5554 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -36,7 +36,7 @@ public void testToFromZNRecord() { new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000); ZNRecord znRecord = metadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); - assertEquals(znRecord.getSimpleField("watermarkMs"), "1000"); + assertEquals(znRecord.getSimpleField("windowStartMs"), "1000"); RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); From 9d329bcc32f79e5ed0b9545f9a5f2eca042e0580 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 21:37:36 +0530 Subject: [PATCH 35/72] Adds code docs and clean up --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 192 ++++-------------- 1 file changed, 37 insertions(+), 155 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 9f34e41122c4..8d9b57558017 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -158,52 +158,51 @@ public List generateTasks(List tableConfigs) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); - long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); - long windowEndMs = windowStartMs + bucketMs; - // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = // windowStart + bucket. -// long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); -// long windowEndMs = windowStartMs + bucketMs; + long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); // Find all COMPLETED segments with data overlapping execution window: windowStart (inclusive) to windowEnd // (exclusive) -// List segmentNames = new ArrayList<>(); -// List downloadURLs = new ArrayList<>(); Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); - boolean skipGenerate = false; List> segmentNamesGroupList = new ArrayList<>(); List> downloadURLsGroupList = new ArrayList<>(); + // max maxNumRecordsPerTask is used to divide a minion tasks among + // multiple subtasks to improve performance. int maxNumRecordsPerTask = taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null ? Integer.parseInt( taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; -// long minSegmentTime = Long.MAX_VALUE; -// boolean prevMinionTaskSuccessful = true; - // get past minion task runs expected results. This list can have both successful and // failed task's expected results. - List expectedRealtimeToOfflineSegmentsMapList = + List expectedRealtimeToOfflineTaskResultInfoList = realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = - getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineSegmentsMapList); + getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineTaskResultInfoList); + // Get all offline table segments. + // These are used to validate if previous minion task was successful or not String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(TableNameBuilder.extractRawTableName(realtimeTableName)); Set existingOfflineTableSegmentNames = new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); + // In-case of previous minion task failures, get all segments + // of failed minion subtasks. They need to be reprocessed. List segmentsToBeReProcessedList = getSegmentsToBeReprocessed(completedSegmentsZKMetadata, realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask, existingOfflineTableSegmentNames); + // if no failure, no segment to be reprocessed boolean prevMinionTaskSuccessful = segmentsToBeReProcessedList.isEmpty(); if (!prevMinionTaskSuccessful) { + // In-case of partial failure of segments upload in prev minion task run, + // data is inconsistent, delete the corresponding offline segments immediately. deleteInvalidOfflineSegments(offlineTableName, segmentsToBeReProcessedList, existingOfflineTableSegmentNames, realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask); divideSegmentsAmongSubtasks(segmentsToBeReProcessedList, segmentNamesGroupList, downloadURLsGroupList, @@ -211,11 +210,15 @@ public List generateTasks(List tableConfigs) { } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. - if (!expectedRealtimeToOfflineSegmentsMapList.isEmpty()) { - expectedRealtimeToOfflineSegmentsMapList.clear(); + if (!expectedRealtimeToOfflineTaskResultInfoList.isEmpty()) { + expectedRealtimeToOfflineTaskResultInfoList.clear(); + // windowEndTime of prev minion task needs to be re-used for picking up the + // next windowStartTime. This is useful for case where user changes minion config + // after a minion task run was complete. So windowStartTime cannot be watermark + bucketMs windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); } - windowEndMs = windowStartMs + bucketMs; + long windowEndMs = windowStartMs + bucketMs; + // since window changed, pick new segments. List segmentZKMetadataList = generateNewSegmentsToProcess(completedSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, bufferMs, bufferTimePeriod, @@ -224,118 +227,27 @@ public List generateTasks(List tableConfigs) { maxNumRecordsPerTask); } -// if (isPrevMinionTaskSuccessful) { -// // if all offline segments of prev minion tasks were successfully uploaded, -// // we can clear the state of prev minion tasks as now it's useless. -// if (!expectedRealtimeToOfflineSegmentsMapList.isEmpty()) { -// expectedRealtimeToOfflineSegmentsMapList.clear(); -// windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); -// windowEndMs = windowStartMs + bucketMs; -// } -// -//// while (true) { -//// // Check that execution window is older than bufferTime -//// if (windowEndMs > System.currentTimeMillis() - bufferMs) { -//// LOGGER.info( -//// "Window with start: {} and end: {} is not older than buffer time: {} configured as {} ago. -// Skipping " -//// + "task " -//// + "generation: {}", windowStartMs, windowEndMs, bufferMs, bufferTimePeriod, taskType); -//// skipGenerate = true; -//// break; -//// } -//// -//// for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < completedSegmentsZKMetadata.size(); -//// segmentZkMetadataIndex++) { -//// SegmentZKMetadata segmentZKMetadata = completedSegmentsZKMetadata.get(segmentZkMetadataIndex); -//// -//// String segmentName = segmentZKMetadata.getSegmentName(); -//// long segmentStartTimeMs = segmentZKMetadata.getStartTimeMs(); -//// long segmentEndTimeMs = segmentZKMetadata.getEndTimeMs(); -//// // reProcessSegment denotes whether to reschedule a previous segment which was a -//// // part of a failed task. -////// boolean reProcessSegment = false; -//// -//// // Check overlap with window. -//// if (windowStartMs <= segmentEndTimeMs && segmentStartTimeMs < windowEndMs) { -//// // If last completed segment is being used, make sure that segment crosses over end of window. -//// // In the absence of this check, CONSUMING segments could contain some portion of the window. That -// data -//// // would be skipped forever. -//// if (lastLLCSegmentPerPartition.contains(segmentName) && segmentEndTimeMs < windowEndMs) { -//// LOGGER.info("Window data overflows into CONSUMING segments for partition of segment: {}. Skipping -//// task " -//// + "generation: {}", segmentName, taskType); -//// skipGenerate = true; -//// // Note: There can be segments which needs to be reprocessed. -//// // In these cases the window bucket time range will always remain same, -//// // i.e. watermark will never advance and -//// // above condition of skipGenerate = true; will never happen if there -//// // is any segments which needs to be reprocessed. -////// Preconditions.checkState(!reProcessSegment, -////// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); -//// break; -//// } -//// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); -//// segmentNames.add(segmentName); -//// downloadURLs.add(segmentZKMetadata.getDownloadUrl()); -//// -//// numRecordsPerTask += segmentZKMetadata.getTotalDocs(); -//// -//// if (numRecordsPerTask >= maxNumRecordsPerTask) { -//// segmentNamesGroupList.add(segmentNames); -//// downloadURLsGroupList.add(downloadURLs); -//// numRecordsPerTask = 0; -//// segmentNames = new ArrayList<>(); -//// downloadURLs = new ArrayList<>(); -//// } -//// } -//// -//// if ((!segmentNames.isEmpty()) -//// && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { -//// segmentNamesGroupList.add(segmentNames); -//// downloadURLsGroupList.add(downloadURLs); -//// } -//// } -//// if (skipGenerate || !segmentNamesGroupList.isEmpty()) { -//// break; -//// } -//// -//// Preconditions.checkState(isPrevMinionTaskSuccessful, -//// "Prev minion task failed and bucket time window cannot be incremented"); -//// -//// LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time -// bucket", -//// taskType, windowStartMs, windowEndMs); -//// windowStartMs = windowEndMs; -//// windowEndMs += bucketMs; -//// } -// } - if (segmentNamesGroupList.isEmpty()) { -// Preconditions.checkState(true, -// "There are segment(s) which needs to be re-processed, current run can't be skipped."); continue; } List pinotTaskConfigsForTable = new ArrayList<>(); + long newWindowStartTime = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); + long newWindowEndTime = realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); + + LOGGER.info( + "generating tasks for: {} with window start time: {}, window end time: {}, table: {}", taskType, + windowStartMs, + newWindowEndTime, realtimeTableName); for (int segmentNameListIndex = 0; segmentNameListIndex < segmentNamesGroupList.size(); segmentNameListIndex++) { List segmentNameList = segmentNamesGroupList.get(segmentNameListIndex); List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); pinotTaskConfigsForTable.add( createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, - realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), - realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(), taskType)); + newWindowStartTime, + newWindowEndTime, taskType)); } -// if (prevMinionTaskSuccessful) { -// // if there were no segments which needed to be reProcessed, only then we can remove the past minion runs -// // expected results. The past minion runs expected result list should either be cleared all or left as it is -// // so that the successful subtask's segments are not picked again in-case of consecutive minion task failures. -// realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList().clear(); -// realtimeToOfflineSegmentsTaskMetadata.setWindowStartMs(windowStartMs); -// realtimeToOfflineSegmentsTaskMetadata.setWindowEndMs(windowEndMs); -// } try { _clusterInfoAccessor .setMinionTaskMetadata(realtimeToOfflineSegmentsTaskMetadata, @@ -345,6 +257,7 @@ public List generateTasks(List tableConfigs) { LOGGER.error( "Version changed while updating RTO task metadata for table: {}, skip scheduling. There are " + "multiple task schedulers for the same table, need to investigate!", realtimeTableName); + // skip this table for this minion run continue; } @@ -359,7 +272,9 @@ private void deleteInvalidOfflineSegments(String offlineTableName, List segmentsToBeReProcessedList, Set existingOfflineTableSegmentNames, Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask) { + Set segmentsToBeDeleted = new HashSet<>(); + for (SegmentZKMetadata segmentZKMetadata : segmentsToBeReProcessedList) { String segmentName = segmentZKMetadata.getSegmentName(); List expectedCorrespondingOfflineSegments = @@ -417,15 +332,13 @@ private List getSegmentsToBeReprocessed(List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, - long windowStartMs, - long windowEndMs, long bucketMs, long bufferMs, String bufferTimePeriod, Set lastLLCSegmentPerPartition, + long windowStartMs, long windowEndMs, long bucketMs, long bufferMs, String bufferTimePeriod, + Set lastLLCSegmentPerPartition, RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { String taskType = RealtimeToOfflineSegmentsTask.TASK_TYPE; List segmentZKMetadataList = new ArrayList<>(); - boolean skipGenerate = false; - while (true) { // Check that execution window is older than bufferTime if (windowEndMs > System.currentTimeMillis() - bufferMs) { @@ -440,9 +353,6 @@ private List generateNewSegmentsToProcess(List generateNewSegmentsToProcess(List(); - // Note: There can be segments which needs to be reprocessed. - // In these cases the window bucket time range will always remain same, - // i.e. watermark will never advance and - // above condition of skipGenerate = true; will never happen if there - // is any segments which needs to be reprocessed. -// Preconditions.checkState(!reProcessSegment, -// "Segment: " + segmentName + " needs to be reProcessed and shouldn't be skipped."); -// break; } -// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); segmentZKMetadataList.add(segmentZKMetadata); -// segmentNames.add(segmentName); -// downloadURLs.add(segmentZKMetadata.getDownloadUrl()); - -// numRecordsPerTask += segmentZKMetadata.getTotalDocs(); -// -// if (numRecordsPerTask >= maxNumRecordsPerTask) { -// segmentNamesGroupList.add(segmentNames); -// downloadURLsGroupList.add(downloadURLs); -// numRecordsPerTask = 0; -// segmentNames = new ArrayList<>(); -// downloadURLs = new ArrayList<>(); -// } } - -// if ((!segmentNames.isEmpty()) -// && (segmentZkMetadataIndex == (completedSegmentsZKMetadata.size() - 1))) { -// segmentNamesGroupList.add(segmentNames); -// downloadURLsGroupList.add(downloadURLs); -// } } + if (!segmentZKMetadataList.isEmpty()) { break; } -// Preconditions.checkState(isPrevMinionTaskSuccessful, -// "Prev minion task failed and bucket time window cannot be incremented"); - LOGGER.info("Found no eligible segments for task: {} with window [{} - {}), moving to the next time bucket", taskType, windowStartMs, windowEndMs); windowStartMs = windowEndMs; windowEndMs += bucketMs; } + + // At this point, there will be some segment which needs to be processed for RTO. + // Since we have input segments, we can update metadata to new window. realtimeToOfflineSegmentsTaskMetadata.setWindowStartMs(windowStartMs); realtimeToOfflineSegmentsTaskMetadata.setWindowEndMs(windowEndMs); return segmentZKMetadataList; @@ -512,7 +395,6 @@ private void divideSegmentsAmongSubtasks(List segmentsToBeReP for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); segmentZkMetadataIndex++) { SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); -// minSegmentTime = Math.min(minSegmentTime, segmentZKMetadata.getStartTimeMs()); segmentNames.add(segmentZKMetadata.getSegmentName()); downloadURLs.add(segmentZKMetadata.getDownloadUrl()); From 21185e6bdaef4a72a90b51745e93e7fa5c8ff000 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 21:38:41 +0530 Subject: [PATCH 36/72] nit --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 8d9b57558017..b6f17bba5ce4 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -313,18 +313,6 @@ private List getSegmentsToBeReprocessed(List segmentsToDelete = -// getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); -// -// if (!segmentsToDelete.isEmpty()) { -// _clusterInfoAccessor.getPinotHelixResourceManager() -// .deleteSegments(offlineTableName, segmentsToDelete); -// // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the -// // actual state. But there is no need to update existingOfflineTableSegmentNames. -// } } } } From 324a4cf0847bb5bea38125267ae9c0b8d1674929 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 16 Dec 2024 21:49:42 +0530 Subject: [PATCH 37/72] updates code docs of metadata --- .../RealtimeToOfflineSegmentsTaskMetadata.java | 15 +++++++++------ .../RealtimeToOfflineSegmentsTaskExecutor.java | 9 +++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index d610603ffdd6..7b895fa99261 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -29,19 +29,22 @@ /** * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. - * The watermarkMs denotes the time (exclusive) upto which tasks have been executed. + * The _windowStartMs denotes the time (exclusive) until which it's certain that tasks have been + * completed successfully. * The expectedRealtimeToOfflineSegmentsTaskResultList denotes the expected RTO tasks result info. - * This list can contain both completed and in-completed Tasks expected Results. This list is used by - * generator to validate whether a potential segment (for RTO task) has already been successfully - * processed as a RTO task in the past or not. + * This list can contain both completed and in-completed Tasks expected Results. This list is used by + * generator to validate whether a potential segment (for RTO task) has already been successfully + * processed as a RTO task in the past or not. + * The _windowStartMs and _windowEndMs denote the window bucket time + * of currently not successfully completed minion task. + * The window is updated by generator when it's certain that prev minon task run is successful. * * This gets serialized and stored in zookeeper under the path * MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * * PinotTaskGenerator: * The watermarkMs> is used by the RealtimeToOfflineSegmentsTaskGenerator, - * to determine the window of execution for the task it is generating. - * The window of execution will be [watermarkMs, watermarkMs + bucketSize) + * to determine the window of execution of the prev task based on which it generates new task. * * PinotTaskExecutor: * The same watermark is used by the RealtimeToOfflineSegmentsTaskExecutor, to: diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 63577466bcb2..1a706bc15212 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -68,11 +68,12 @@ * Before beginning the task, the watermarkMs is checked in the minion task metadata ZNode, * located at MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * It should match the windowStartMs. - * The version of the znode is cached. * - * After the segments are uploaded, this task updates the watermarkMs in the minion task metadata ZNode. - * The znode version is checked during update, - * and update only succeeds if version matches with the previously cached version + * Before the segments are uploaded, this task updates the ExpectedRealtimeToOfflineTaskResultInfoList + * in the minion task metadata ZNode. + * The znode version is checked during update, retrying until max attempts and version of znode is equal to expected. + * Reason for above is that, since multiple subtasks run in parallel, there can be race condition + * with updating the znode. */ public class RealtimeToOfflineSegmentsTaskExecutor extends BaseMultipleSegmentsConversionExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeToOfflineSegmentsTaskExecutor.class); From 0f27067e0d9b38ba55ad1984d5730c3e984a94b2 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 17 Dec 2024 01:02:41 +0530 Subject: [PATCH 38/72] Adds test --- ...pectedRealtimeToOfflineTaskResultInfo.java | 19 +++ ...RealtimeToOfflineSegmentsTaskMetadata.java | 4 +- ...timeToOfflineSegmentsTaskMetadataTest.java | 75 ++++++++++ ...ealtimeToOfflineSegmentsTaskGenerator.java | 141 ++++++++++-------- 4 files changed, 172 insertions(+), 67 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java index e8379ca137a7..954108838da6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java @@ -19,6 +19,7 @@ package org.apache.pinot.common.minion; import java.util.List; +import java.util.Objects; import java.util.UUID; @@ -63,4 +64,22 @@ public List getSegmentsFrom() { public List getSegmentsTo() { return _segmentsTo; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ExpectedRealtimeToOfflineTaskResultInfo)) { + return false; + } + ExpectedRealtimeToOfflineTaskResultInfo that = (ExpectedRealtimeToOfflineTaskResultInfo) o; + return Objects.equals(_segmentsFrom, that._segmentsFrom) && Objects.equals(_segmentsTo, + that._segmentsTo) && Objects.equals(_id, that._id) && Objects.equals(_taskID, that._taskID); + } + + @Override + public int hashCode() { + return Objects.hash(_segmentsFrom, _segmentsTo, _id, _taskID); + } } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 7b895fa99261..abc73ea10dde 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -63,8 +63,8 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private final List _expectedRealtimeToOfflineSegmentsTaskResultList; private long _windowEndMs; - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long watermarkMs) { - _windowStartMs = watermarkMs; + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs) { + _windowStartMs = windowStartMs; _tableNameWithType = tableNameWithType; _expectedRealtimeToOfflineSegmentsTaskResultList = new ArrayList<>(); } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 43a4844b5554..c03e48687985 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -18,7 +18,12 @@ */ package org.apache.pinot.common.metadata; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; import org.apache.helix.zookeeper.datamodel.ZNRecord; +import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.testng.annotations.Test; @@ -43,4 +48,74 @@ public void testToFromZNRecord() { assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), "testTable_REALTIME"); assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), 1000); } + + @Test + public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { + List expectedRealtimeToOfflineTaskResultInfoList = new ArrayList<>(); + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + new ExpectedRealtimeToOfflineTaskResultInfo( + Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), + Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), + "1"); + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo1 = + new ExpectedRealtimeToOfflineTaskResultInfo( + Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z"), + Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", "githubEventsOffline__0__0__20241213T2403Z"), + "2"); + expectedRealtimeToOfflineTaskResultInfoList.add(expectedRealtimeToOfflineTaskResultInfo); + expectedRealtimeToOfflineTaskResultInfoList.add(expectedRealtimeToOfflineTaskResultInfo1); + + RealtimeToOfflineSegmentsTaskMetadata metadata = + new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000, 2000, + expectedRealtimeToOfflineTaskResultInfoList); + ZNRecord znRecord = metadata.toZNRecord(); + assertEquals(znRecord.getId(), "testTable_REALTIME"); + assertEquals(znRecord.getSimpleField("windowStartMs"), "1000"); + assertEquals(znRecord.getSimpleField("windowEndMs"), "2000"); + Map> listFields = znRecord.getListFields(); + + for (String id : listFields.keySet()) { + List fields = listFields.get(id); + assertEquals(fields.size(), 3); + String taskID = fields.get(2); + switch (taskID) { + case "1": + assertEquals(fields.get(0), "githubEvents__0__0__20241213T2002Z,githubEvents__0__0__20241213T2003Z"); + assertEquals(fields.get(1), + "githubEventsOffline__0__0__20241213T2002Z,githubEventsOffline__0__0__20241213T2003Z"); + break; + case "2": + assertEquals(fields.get(0), "githubEvents__0__0__20241213T2102Z,githubEvents__0__0__20241213T2203Z"); + assertEquals(fields.get(1), + "githubEventsOffline__0__0__20241213T2032Z,githubEventsOffline__0__0__20241213T2403Z"); + break; + default: + throw new RuntimeException("invalid taskID"); + } + } + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), "testTable_REALTIME"); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), 1000); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(), 2000); + List expectedRealtimeToOfflineTaskResultInfoList1 = + realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); + + for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo2 + : expectedRealtimeToOfflineTaskResultInfoList1) { + String taskID = expectedRealtimeToOfflineTaskResultInfo2.getTaskID(); + + switch (taskID) { + case "1": + assertEquals(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo); + break; + case "2": + assertEquals(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo1); + break; + default: + throw new RuntimeException("invalid taskID"); + } + } + } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index b6f17bba5ce4..6071f3c2f44f 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -82,6 +82,9 @@ * Such segments will be checked for segment endTime, to ensure there's no overflow into CONSUMING segments * * - A PinotTaskConfig is created, with segment information, execution window, and any config specific to the task + * + * Generator owns the responsibility to ensure prev minion tasks were successful and only then watermark + * can be updated. */ @TaskGenerator public class RealtimeToOfflineSegmentsTaskGenerator extends BaseTaskGenerator { @@ -111,6 +114,8 @@ public List generateTasks(List tableConfigs) { LOGGER.info("Start generating task configs for table: {} for task: {}", realtimeTableName, taskType); // Only schedule 1 task of this type, per table + // Still there can be scenario where generator can generate additional task, while previous task + // is just about to be enqueued in the helix queue. Map incompleteTasks = TaskGeneratorUtils.getIncompleteTasks(taskType, realtimeTableName, _clusterInfoAccessor); if (!incompleteTasks.isEmpty()) { @@ -166,17 +171,6 @@ public List generateTasks(List tableConfigs) { // (exclusive) Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); - List> segmentNamesGroupList = new ArrayList<>(); - List> downloadURLsGroupList = new ArrayList<>(); - - // max maxNumRecordsPerTask is used to divide a minion tasks among - // multiple subtasks to improve performance. - int maxNumRecordsPerTask = - taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null - ? Integer.parseInt( - taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) - : DEFAULT_MAX_NUM_RECORDS_PER_TASK; - // get past minion task runs expected results. This list can have both successful and // failed task's expected results. List expectedRealtimeToOfflineTaskResultInfoList = @@ -200,12 +194,23 @@ public List generateTasks(List tableConfigs) { // if no failure, no segment to be reprocessed boolean prevMinionTaskSuccessful = segmentsToBeReProcessedList.isEmpty(); + List> segmentNamesGroupList = new ArrayList<>(); + Map segmentNameVsDownloadURL = new HashMap<>(); + + // max maxNumRecordsPerTask is used to divide a minion tasks among + // multiple subtasks to improve performance. + int maxNumRecordsPerTask = + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null + ? Integer.parseInt( + taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) + : DEFAULT_MAX_NUM_RECORDS_PER_TASK; + if (!prevMinionTaskSuccessful) { // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. deleteInvalidOfflineSegments(offlineTableName, segmentsToBeReProcessedList, existingOfflineTableSegmentNames, realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask); - divideSegmentsAmongSubtasks(segmentsToBeReProcessedList, segmentNamesGroupList, downloadURLsGroupList, + divideSegmentsAmongSubtasks(segmentsToBeReProcessedList, segmentNamesGroupList, segmentNameVsDownloadURL, maxNumRecordsPerTask); } else { // if all offline segments of prev minion tasks were successfully uploaded, @@ -223,7 +228,7 @@ public List generateTasks(List tableConfigs) { generateNewSegmentsToProcess(completedSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, bufferMs, bufferTimePeriod, lastLLCSegmentPerPartition, realtimeToOfflineSegmentsTaskMetadata); - divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, downloadURLsGroupList, + divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, segmentNameVsDownloadURL, maxNumRecordsPerTask); } @@ -240,9 +245,9 @@ public List generateTasks(List tableConfigs) { windowStartMs, newWindowEndTime, realtimeTableName); - for (int segmentNameListIndex = 0; segmentNameListIndex < segmentNamesGroupList.size(); segmentNameListIndex++) { - List segmentNameList = segmentNamesGroupList.get(segmentNameListIndex); - List downloadURLList = downloadURLsGroupList.get(segmentNameListIndex); + for (List segmentNameList : segmentNamesGroupList) { + List downloadURLList = getDownloadURLList(segmentNameList, segmentNameVsDownloadURL); + Preconditions.checkState(segmentNameList.size() == downloadURLList.size()); pinotTaskConfigsForTable.add( createPinotTaskConfig(segmentNameList, downloadURLList, realtimeTableName, taskConfigs, tableConfig, newWindowStartTime, @@ -268,6 +273,55 @@ public List generateTasks(List tableConfigs) { return pinotTaskConfigs; } + @Override + public void validateTaskConfigs(TableConfig tableConfig, Map taskConfigs) { + // check table is not upsert + Preconditions.checkState(tableConfig.getUpsertMode() == UpsertConfig.Mode.NONE, + "RealtimeToOfflineTask doesn't support upsert table!"); + // check no malformed period + TimeUtils.convertPeriodToMillis( + taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.BUFFER_TIME_PERIOD_KEY, "2d")); + TimeUtils.convertPeriodToMillis( + taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.BUCKET_TIME_PERIOD_KEY, "1d")); + TimeUtils.convertPeriodToMillis( + taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, "1s")); + // check mergeType is correct + Preconditions.checkState(ImmutableSet.of(MergeType.CONCAT.name(), MergeType.ROLLUP.name(), MergeType.DEDUP.name()) + .contains(taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, MergeType.CONCAT.name()) + .toUpperCase()), "MergeType must be one of [CONCAT, ROLLUP, DEDUP]!"); + + Schema schema = _clusterInfoAccessor.getPinotHelixResourceManager().getSchemaForTableConfig(tableConfig); + // check no mis-configured columns + Set columnNames = schema.getColumnNames(); + for (Map.Entry entry : taskConfigs.entrySet()) { + if (entry.getKey().endsWith(".aggregationType")) { + Preconditions.checkState(columnNames.contains( + StringUtils.removeEnd(entry.getKey(), RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)), + String.format("Column \"%s\" not found in schema!", entry.getKey())); + try { + // check that it's a valid aggregation function type + AggregationFunctionType aft = AggregationFunctionType.getAggregationFunctionType(entry.getValue()); + // check that a value aggregator is available + if (!MinionConstants.RealtimeToOfflineSegmentsTask.AVAILABLE_CORE_VALUE_AGGREGATORS.contains(aft)) { + throw new IllegalArgumentException("ValueAggregator not enabled for type: " + aft.toString()); + } + } catch (IllegalArgumentException e) { + String err = + String.format("Column \"%s\" has invalid aggregate type: %s", entry.getKey(), entry.getValue()); + throw new IllegalStateException(err); + } + } + } + } + + private List getDownloadURLList(List segmentNameList, Map segmentNameVsDownloadURL) { + List downloadURLList = new ArrayList<>(); + for (String segmentName : segmentNameList) { + downloadURLList.add(segmentNameVsDownloadURL.get(segmentName)); + } + return downloadURLList; + } + private void deleteInvalidOfflineSegments(String offlineTableName, List segmentsToBeReProcessedList, Set existingOfflineTableSegmentNames, @@ -285,8 +339,6 @@ private void deleteInvalidOfflineSegments(String offlineTableName, if (!segmentsToBeDeleted.isEmpty()) { _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); - // Note: after deleting above segments existingOfflineTableSegmentNames won't be equal to the - // actual state. But there is no need to update existingOfflineTableSegmentNames. } } @@ -374,31 +426,28 @@ private List generateNewSegmentsToProcess(List segmentsToBeReProcessedList, - List> segmentNamesGroupList, List> downloadURLsGroupList, int maxNumRecordsPerTask) { + List> segmentNamesGroupList, Map segmentNameVsDownloadURL, + int maxNumRecordsPerTask) { long numRecordsPerTask = 0; List segmentNames = new ArrayList<>(); - List downloadURLs = new ArrayList<>(); for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); segmentZkMetadataIndex++) { SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); segmentNames.add(segmentZKMetadata.getSegmentName()); - downloadURLs.add(segmentZKMetadata.getDownloadUrl()); + segmentNameVsDownloadURL.put(segmentZKMetadata.getSegmentName(), segmentZKMetadata.getDownloadUrl()); numRecordsPerTask += segmentZKMetadata.getTotalDocs(); if (numRecordsPerTask >= maxNumRecordsPerTask) { segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); numRecordsPerTask = 0; segmentNames = new ArrayList<>(); - downloadURLs = new ArrayList<>(); } if ((!segmentNames.isEmpty()) && (segmentZkMetadataIndex == (segmentsToBeReProcessedList.size() - 1))) { segmentNamesGroupList.add(segmentNames); - downloadURLsGroupList.add(downloadURLs); } } } @@ -406,6 +455,9 @@ private void divideSegmentsAmongSubtasks(List segmentsToBeReP private List getSegmentsToDelete(List expectedCorrespondingOfflineSegments, Set existingOfflineTableSegmentNames) { List segmentsToDelete = new ArrayList<>(); + + // Iterate on all expectedCorrespondingOfflineSegments of realtime segments to be reprocessed. + // delete any offline segment present. for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { if (existingOfflineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { segmentsToDelete.add(expectedCorrespondingOfflineSegment); @@ -524,47 +576,6 @@ private RealtimeToOfflineSegmentsTaskMetadata getRTOTaskMetadata(String realtime return new RealtimeToOfflineSegmentsTaskMetadata(realtimeTableName, watermarkMs); } - @Override - public void validateTaskConfigs(TableConfig tableConfig, Map taskConfigs) { - // check table is not upsert - Preconditions.checkState(tableConfig.getUpsertMode() == UpsertConfig.Mode.NONE, - "RealtimeToOfflineTask doesn't support upsert table!"); - // check no malformed period - TimeUtils.convertPeriodToMillis( - taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.BUFFER_TIME_PERIOD_KEY, "2d")); - TimeUtils.convertPeriodToMillis( - taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.BUCKET_TIME_PERIOD_KEY, "1d")); - TimeUtils.convertPeriodToMillis( - taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.ROUND_BUCKET_TIME_PERIOD_KEY, "1s")); - // check mergeType is correct - Preconditions.checkState(ImmutableSet.of(MergeType.CONCAT.name(), MergeType.ROLLUP.name(), MergeType.DEDUP.name()) - .contains(taskConfigs.getOrDefault(RealtimeToOfflineSegmentsTask.MERGE_TYPE_KEY, MergeType.CONCAT.name()) - .toUpperCase()), "MergeType must be one of [CONCAT, ROLLUP, DEDUP]!"); - - Schema schema = _clusterInfoAccessor.getPinotHelixResourceManager().getSchemaForTableConfig(tableConfig); - // check no mis-configured columns - Set columnNames = schema.getColumnNames(); - for (Map.Entry entry : taskConfigs.entrySet()) { - if (entry.getKey().endsWith(".aggregationType")) { - Preconditions.checkState(columnNames.contains( - StringUtils.removeEnd(entry.getKey(), RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX)), - String.format("Column \"%s\" not found in schema!", entry.getKey())); - try { - // check that it's a valid aggregation function type - AggregationFunctionType aft = AggregationFunctionType.getAggregationFunctionType(entry.getValue()); - // check that a value aggregator is available - if (!MinionConstants.RealtimeToOfflineSegmentsTask.AVAILABLE_CORE_VALUE_AGGREGATORS.contains(aft)) { - throw new IllegalArgumentException("ValueAggregator not enabled for type: " + aft.toString()); - } - } catch (IllegalArgumentException e) { - String err = - String.format("Column \"%s\" has invalid aggregate type: %s", entry.getKey(), entry.getValue()); - throw new IllegalStateException(err); - } - } - } - } - private PinotTaskConfig createPinotTaskConfig(List segmentNameList, List downloadURLList, String realtimeTableName, Map taskConfigs, TableConfig tableConfig, long windowStartMs, long windowEndMs, String taskType) { From 0eba7cc9e91209d53330b07decbb391583951b3b Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 17 Dec 2024 13:14:01 +0530 Subject: [PATCH 39/72] clean up --- ...pectedRealtimeToOfflineTaskResultInfo.java | 5 ++--- ...RealtimeToOfflineSegmentsTaskMetadata.java | 21 ++++++++----------- ...timeToOfflineSegmentsTaskMetadataTest.java | 16 ++++++++++++-- ...RealtimeToOfflineSegmentsTaskExecutor.java | 6 +++--- ...ealtimeToOfflineSegmentsTaskGenerator.java | 15 +++++++------ 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java index 954108838da6..0561e2694e96 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java @@ -74,12 +74,11 @@ public boolean equals(Object o) { return false; } ExpectedRealtimeToOfflineTaskResultInfo that = (ExpectedRealtimeToOfflineTaskResultInfo) o; - return Objects.equals(_segmentsFrom, that._segmentsFrom) && Objects.equals(_segmentsTo, - that._segmentsTo) && Objects.equals(_id, that._id) && Objects.equals(_taskID, that._taskID); + return Objects.equals(_id, that._id); } @Override public int hashCode() { - return Objects.hash(_segmentsFrom, _segmentsTo, _id, _taskID); + return Objects.hashCode(_id); } } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index abc73ea10dde..e2443c21cf4c 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -31,23 +31,23 @@ * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. * The _windowStartMs denotes the time (exclusive) until which it's certain that tasks have been * completed successfully. - * The expectedRealtimeToOfflineSegmentsTaskResultList denotes the expected RTO tasks result info. + * The _expectedRealtimeToOfflineSegmentsTaskResultList denotes the expected RTO tasks result info. * This list can contain both completed and in-completed Tasks expected Results. This list is used by * generator to validate whether a potential segment (for RTO task) has already been successfully * processed as a RTO task in the past or not. * The _windowStartMs and _windowEndMs denote the window bucket time - * of currently not successfully completed minion task. + * of currently not successfully completed minion task. bucket: [_windowStartMs, _windowEndMs) * The window is updated by generator when it's certain that prev minon task run is successful. * * This gets serialized and stored in zookeeper under the path * MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * * PinotTaskGenerator: - * The watermarkMs> is used by the RealtimeToOfflineSegmentsTaskGenerator, + * The _windowStartMs> is used by the RealtimeToOfflineSegmentsTaskGenerator, * to determine the window of execution of the prev task based on which it generates new task. * * PinotTaskExecutor: - * The same watermark is used by the RealtimeToOfflineSegmentsTaskExecutor, to: + * The same windowStartMs is used by the RealtimeToOfflineSegmentsTaskExecutor, to: * - Verify that it's running the latest task scheduled by the task generator. * - The ExpectedRealtimeToOfflineSegmentsTaskResultList is updated before the offline segments * are uploaded to the table. @@ -70,10 +70,10 @@ public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long wind } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, - long windowEndMs, List expectedRealtimeToOfflineSegmentsMapList) { + long windowEndMs, List expectedRealtimeToOfflineTaskResultInfoList) { _tableNameWithType = tableNameWithType; _windowStartMs = windowStartMs; - _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineSegmentsMapList; + _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineTaskResultInfoList; _windowEndMs = windowEndMs; } @@ -89,9 +89,6 @@ public void setWindowStartMs(long windowStartMs) { _windowStartMs = windowStartMs; } - /** - * Get the watermark in millis - */ public long getWindowStartMs() { return _windowStartMs; } @@ -107,7 +104,7 @@ public void setWindowEndMs(long windowEndMs) { public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long windowStartMs = znRecord.getLongField(WINDOW_START_KEY, 0); long windowEndMs = znRecord.getLongField(WINDOW_END_KEY, 0); - List expectedRealtimeToOfflineSegmentsMapList = new ArrayList<>(); + List expectedRealtimeToOfflineTaskResultInfoList = new ArrayList<>(); Map> listFields = znRecord.getListFields(); for (Map.Entry> listField : listFields.entrySet()) { String realtimeToOfflineSegmentsMapId = listField.getKey(); @@ -116,12 +113,12 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); String taskID = value.get(2); - expectedRealtimeToOfflineSegmentsMapList.add( + expectedRealtimeToOfflineTaskResultInfoList.add( new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID) ); } return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, - expectedRealtimeToOfflineSegmentsMapList); + expectedRealtimeToOfflineTaskResultInfoList); } public ZNRecord toZNRecord() { diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index c03e48687985..35b6f5f1ddb1 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; @@ -108,14 +109,25 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { switch (taskID) { case "1": - assertEquals(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo); + assert isEqual(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo); break; case "2": - assertEquals(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo1); + assert isEqual(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo1); break; default: throw new RuntimeException("invalid taskID"); } } } + + private boolean isEqual(ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo1, + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo2) { + return Objects.equals(expectedRealtimeToOfflineTaskResultInfo1.getSegmentsFrom(), + expectedRealtimeToOfflineTaskResultInfo2.getSegmentsFrom()) && Objects.equals( + expectedRealtimeToOfflineTaskResultInfo1.getSegmentsTo(), + expectedRealtimeToOfflineTaskResultInfo2.getSegmentsTo()) && Objects.equals( + expectedRealtimeToOfflineTaskResultInfo1.getId(), expectedRealtimeToOfflineTaskResultInfo2.getId()) + && Objects.equals( + expectedRealtimeToOfflineTaskResultInfo1.getTaskID(), expectedRealtimeToOfflineTaskResultInfo2.getTaskID()); + } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 1a706bc15212..615509ba0db4 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -106,7 +106,7 @@ public void preProcess(PinotTaskConfig pinotTaskConfig) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); long windowStartMs = Long.parseLong(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY)); - Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs() <= windowStartMs, + Preconditions.checkState(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs() == windowStartMs, "watermarkMs in RealtimeToOfflineSegmentsTask metadata: %s shouldn't be larger than windowStartMs: %d in task" + " configs for table: %s. ZNode may have been modified by another task", realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), windowStartMs, realtimeTableName); @@ -259,13 +259,13 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); List - expectedRealtimeToOfflineSegmentsMapList = + expectedRealtimeToOfflineSegmentsTaskResultList = realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = getExpectedRealtimeToOfflineTaskResultInfo(context); - expectedRealtimeToOfflineSegmentsMapList.add(expectedRealtimeToOfflineTaskResultInfo); + expectedRealtimeToOfflineSegmentsTaskResultList.add(expectedRealtimeToOfflineTaskResultInfo); return realtimeToOfflineSegmentsTaskMetadata; } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 6071f3c2f44f..b5a356f77e2d 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -163,8 +163,7 @@ public List generateTasks(List tableConfigs) { RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); - // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. WindowEnd = - // windowStart + bucket. + // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); // Find all COMPLETED segments with data overlapping execution window: windowStart (inclusive) to windowEnd @@ -483,18 +482,18 @@ private boolean checkIfSegmentNeedsToBeReProcessed(List expectedCorrespo private Map> getRealtimeVsCorrespondingOfflineSegmentNames( List - expectedRealtimeToOfflineSegmentsMapList) { + expectedRealtimeToOfflineTaskResultInfoList) { Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames = new HashMap<>(); - for (ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap - : expectedRealtimeToOfflineSegmentsMapList) { - List segmentsFrom = realtimeToOfflineSegmentsMap.getSegmentsFrom(); - List segmentsTo = realtimeToOfflineSegmentsMap.getSegmentsTo(); + for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo + : expectedRealtimeToOfflineTaskResultInfoList) { + List segmentsFrom = expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom(); + List segmentsTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); for (String segmentFrom : segmentsFrom) { Preconditions.checkState(!realtimeSegmentNameVsCorrespondingOfflineSegmentNames.containsKey(segmentFrom), "Realtime segment: {} was picked by multiple subtasks in the previous minion run with task id: {}", - segmentFrom, realtimeToOfflineSegmentsMap.getTaskID()); + segmentFrom, expectedRealtimeToOfflineTaskResultInfo.getTaskID()); realtimeSegmentNameVsCorrespondingOfflineSegmentNames.put(segmentFrom, segmentsTo); } } From 02b94fe813912a0e7e958799cd6c222b9070479d Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 17 Dec 2024 17:28:55 +0530 Subject: [PATCH 40/72] handles edge case --- ...pectedRealtimeToOfflineTaskResultInfo.java | 12 +- ...RealtimeToOfflineSegmentsTaskMetadata.java | 101 +++++++++--- ...timeToOfflineSegmentsTaskMetadataTest.java | 78 ++++++---- ...RealtimeToOfflineSegmentsTaskExecutor.java | 27 ++-- ...ealtimeToOfflineSegmentsTaskGenerator.java | 146 ++++++++++++------ 5 files changed, 257 insertions(+), 107 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java index 0561e2694e96..02a2e62c8103 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java @@ -33,6 +33,7 @@ public class ExpectedRealtimeToOfflineTaskResultInfo { private final List _segmentsTo; private final String _id; private final String _taskID; + private boolean _taskFailure = false; public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String taskID) { _segmentsFrom = segmentsFrom; @@ -42,11 +43,12 @@ public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsFrom, List segmentsTo, - String realtimeToOfflineSegmentsMapId, String taskID) { + String realtimeToOfflineSegmentsMapId, String taskID, boolean taskFailure) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; _id = realtimeToOfflineSegmentsMapId; _taskID = taskID; + _taskFailure = taskFailure; } public String getTaskID() { @@ -65,6 +67,14 @@ public List getSegmentsTo() { return _segmentsTo; } + public boolean isTaskFailure() { + return _taskFailure; + } + + public void setTaskFailure() { + _taskFailure = true; + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index e2443c21cf4c..549833b4e463 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -19,8 +19,8 @@ package org.apache.pinot.common.minion; import com.google.common.base.Preconditions; -import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; @@ -57,34 +57,36 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WINDOW_START_KEY = "windowStartMs"; private static final String WINDOW_END_KEY = "windowEndMs"; private static final String COMMA_SEPARATOR = ","; + private static final String SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY = "segmentVsExpectedRTOResultId"; private final String _tableNameWithType; private long _windowStartMs; - private final List _expectedRealtimeToOfflineSegmentsTaskResultList; private long _windowEndMs; + private final Map _idVsExpectedRealtimeToOfflineTaskResultInfo; + private final Map _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs) { _windowStartMs = windowStartMs; _tableNameWithType = tableNameWithType; - _expectedRealtimeToOfflineSegmentsTaskResultList = new ArrayList<>(); + _idVsExpectedRealtimeToOfflineTaskResultInfo = new HashMap<>(); + _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = new HashMap<>(); } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, - long windowEndMs, List expectedRealtimeToOfflineTaskResultInfoList) { + long windowEndMs, + Map idVsExpectedRealtimeToOfflineTaskResultInfo, + Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId) { _tableNameWithType = tableNameWithType; _windowStartMs = windowStartMs; - _expectedRealtimeToOfflineSegmentsTaskResultList = expectedRealtimeToOfflineTaskResultInfoList; + _idVsExpectedRealtimeToOfflineTaskResultInfo = idVsExpectedRealtimeToOfflineTaskResultInfo; _windowEndMs = windowEndMs; + _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; } public String getTableNameWithType() { return _tableNameWithType; } - public List getExpectedRealtimeToOfflineSegmentsTaskResultList() { - return _expectedRealtimeToOfflineSegmentsTaskResultList; - } - public void setWindowStartMs(long windowStartMs) { _windowStartMs = windowStartMs; } @@ -101,39 +103,94 @@ public void setWindowEndMs(long windowEndMs) { _windowEndMs = windowEndMs; } + public Map getIdVsExpectedRealtimeToOfflineTaskResultInfo() { + return _idVsExpectedRealtimeToOfflineTaskResultInfo; + } + + public Map getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId() { + return _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; + } + + public void addExpectedRealtimeToOfflineSegmentsTaskResultInfo( + ExpectedRealtimeToOfflineTaskResultInfo newExpectedRealtimeToOfflineTaskResultInfo) { + + List segmentsFrom = newExpectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom(); + + for (String segmentName : segmentsFrom) { + if (_segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.containsKey(segmentName)) { + String prevExpectedRealtimeToOfflineTaskResultInfoId = + _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); + + ExpectedRealtimeToOfflineTaskResultInfo prevExpectedRealtimeToOfflineTaskResultInfo = + _idVsExpectedRealtimeToOfflineTaskResultInfo.get(prevExpectedRealtimeToOfflineTaskResultInfoId); + + if (prevExpectedRealtimeToOfflineTaskResultInfo != null) { + Preconditions.checkState(prevExpectedRealtimeToOfflineTaskResultInfo.isTaskFailure(), + "ExpectedRealtimeToOfflineSegmentsTaskResult can only be replaced if it's of a failed task"); + _idVsExpectedRealtimeToOfflineTaskResultInfo.remove(prevExpectedRealtimeToOfflineTaskResultInfoId); + } + } + + _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.put(segmentName, + newExpectedRealtimeToOfflineTaskResultInfo.getId()); + _idVsExpectedRealtimeToOfflineTaskResultInfo.put(newExpectedRealtimeToOfflineTaskResultInfo.getId(), + newExpectedRealtimeToOfflineTaskResultInfo); + } + } + public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long windowStartMs = znRecord.getLongField(WINDOW_START_KEY, 0); long windowEndMs = znRecord.getLongField(WINDOW_END_KEY, 0); - List expectedRealtimeToOfflineTaskResultInfoList = new ArrayList<>(); + Map idVExpectedRealtimeToOfflineTaskResultInfoList = + new HashMap<>(); Map> listFields = znRecord.getListFields(); + for (Map.Entry> listField : listFields.entrySet()) { String realtimeToOfflineSegmentsMapId = listField.getKey(); + List value = listField.getValue(); - Preconditions.checkState(value.size() == 3); + Preconditions.checkState(value.size() == 4); + List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); String taskID = value.get(2); - expectedRealtimeToOfflineTaskResultInfoList.add( - new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID) + boolean taskFailure = Boolean.parseBoolean(value.get(3)); + + idVExpectedRealtimeToOfflineTaskResultInfoList.put(realtimeToOfflineSegmentsMapId, + new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID, + taskFailure) ); } + + Map> mapFields = znRecord.getMapFields(); + Map segmentNameVsExpectedRTOIDResult = mapFields.get(SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY); + return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, - expectedRealtimeToOfflineTaskResultInfoList); + idVExpectedRealtimeToOfflineTaskResultInfoList, segmentNameVsExpectedRTOIDResult); } public ZNRecord toZNRecord() { ZNRecord znRecord = new ZNRecord(_tableNameWithType); znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); - for (ExpectedRealtimeToOfflineTaskResultInfo realtimeToOfflineSegmentsMap - : _expectedRealtimeToOfflineSegmentsTaskResultList) { - String segmentsFrom = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsFrom()); - String segmentsTo = String.join(COMMA_SEPARATOR, realtimeToOfflineSegmentsMap.getSegmentsTo()); - String taskId = realtimeToOfflineSegmentsMap.getTaskID(); - String realtimeToOfflineSegmentsMapId = realtimeToOfflineSegmentsMap.getId(); - List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId); - znRecord.setListField(realtimeToOfflineSegmentsMapId, listEntry); + + for (String expectedRealtimeToOfflineTaskResultInfoId : _idVsExpectedRealtimeToOfflineTaskResultInfo.keySet()) { + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + _idVsExpectedRealtimeToOfflineTaskResultInfo.get(expectedRealtimeToOfflineTaskResultInfoId); + + String segmentsFrom = String.join(COMMA_SEPARATOR, expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom()); + String segmentsTo = String.join(COMMA_SEPARATOR, expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo()); + String taskId = expectedRealtimeToOfflineTaskResultInfo.getTaskID(); + boolean taskFailure = expectedRealtimeToOfflineTaskResultInfo.isTaskFailure(); + + List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(taskFailure)); + + String id = expectedRealtimeToOfflineTaskResultInfo.getId(); + znRecord.setListField(id, listEntry); } + + znRecord.setMapField(SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY, + _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId); return znRecord; } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 35b6f5f1ddb1..5ac476854a7e 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -18,8 +18,9 @@ */ package org.apache.pinot.common.metadata; -import java.util.ArrayList; +import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -52,7 +53,8 @@ public void testToFromZNRecord() { @Test public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { - List expectedRealtimeToOfflineTaskResultInfoList = new ArrayList<>(); + Map idVsExpectedRealtimeToOfflineTaskResultInfo = + new HashMap<>(); ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = new ExpectedRealtimeToOfflineTaskResultInfo( Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), @@ -63,22 +65,36 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", "githubEventsOffline__0__0__20241213T2403Z"), "2"); - expectedRealtimeToOfflineTaskResultInfoList.add(expectedRealtimeToOfflineTaskResultInfo); - expectedRealtimeToOfflineTaskResultInfoList.add(expectedRealtimeToOfflineTaskResultInfo1); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo.getId(), + expectedRealtimeToOfflineTaskResultInfo); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo1.getId(), + expectedRealtimeToOfflineTaskResultInfo1); - RealtimeToOfflineSegmentsTaskMetadata metadata = + ImmutableMap segmentNameVsId = ImmutableMap.of( + "githubEvents__0__0__20241213T2002Z", expectedRealtimeToOfflineTaskResultInfo.getId(), + "githubEvents__0__0__20241213T2003Z", expectedRealtimeToOfflineTaskResultInfo.getId(), + "githubEvents__0__0__20241213T2102Z", expectedRealtimeToOfflineTaskResultInfo1.getId(), + "githubEvents__0__0__20241213T2203Z", expectedRealtimeToOfflineTaskResultInfo1.getId() + ); + + RealtimeToOfflineSegmentsTaskMetadata originalMetadata = new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000, 2000, - expectedRealtimeToOfflineTaskResultInfoList); - ZNRecord znRecord = metadata.toZNRecord(); + idVsExpectedRealtimeToOfflineTaskResultInfo, segmentNameVsId); + + ZNRecord znRecord = originalMetadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); assertEquals(znRecord.getSimpleField("windowStartMs"), "1000"); assertEquals(znRecord.getSimpleField("windowEndMs"), "2000"); Map> listFields = znRecord.getListFields(); + Map> mapFields = znRecord.getMapFields(); for (String id : listFields.keySet()) { List fields = listFields.get(id); - assertEquals(fields.size(), 3); + assertEquals(fields.size(), 4); String taskID = fields.get(2); + boolean taskFailure = Boolean.parseBoolean(fields.get(3)); + assert !taskFailure; + switch (taskID) { case "1": assertEquals(fields.get(0), "githubEvents__0__0__20241213T2002Z,githubEvents__0__0__20241213T2003Z"); @@ -95,29 +111,39 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { } } + Map map = mapFields.get("segmentVsExpectedRTOResultId"); + assertEquals(map, segmentNameVsId); + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); - assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), "testTable_REALTIME"); - assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), 1000); - assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(), 2000); - List expectedRealtimeToOfflineTaskResultInfoList1 = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); - for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo2 - : expectedRealtimeToOfflineTaskResultInfoList1) { - String taskID = expectedRealtimeToOfflineTaskResultInfo2.getTaskID(); + assert isEqual(realtimeToOfflineSegmentsTaskMetadata, originalMetadata); + } - switch (taskID) { - case "1": - assert isEqual(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo); - break; - case "2": - assert isEqual(expectedRealtimeToOfflineTaskResultInfo2, expectedRealtimeToOfflineTaskResultInfo1); - break; - default: - throw new RuntimeException("invalid taskID"); - } + private boolean isEqual(RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, + RealtimeToOfflineSegmentsTaskMetadata originalMetadata) { + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(), originalMetadata.getWindowEndMs()); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), originalMetadata.getWindowStartMs()); + assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), originalMetadata.getTableNameWithType()); + + Map idVsExpectedRealtimeToOfflineTaskResultInfo = + realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); + Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); + + for (String id : idVsExpectedRealtimeToOfflineTaskResultInfo.keySet()) { + ExpectedRealtimeToOfflineTaskResultInfo actualExpectedRealtimeToOfflineTaskResultInfo = + idVsExpectedRealtimeToOfflineTaskResultInfo.get(id); + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + originalMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo().get(id); + assert expectedRealtimeToOfflineTaskResultInfo != null; + assert isEqual(actualExpectedRealtimeToOfflineTaskResultInfo, expectedRealtimeToOfflineTaskResultInfo); } + + assertEquals(segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId, + originalMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId()); + + return true; } private boolean isEqual(ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo1, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 615509ba0db4..7366378bc841 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -212,15 +212,19 @@ protected void preUploadSegments(SegmentUploadContext context) int attemptCount; try { attemptCount = DEFAULT_RETRY_POLICY.attempt(() -> { - - ZNRecord realtimeToOfflineSegmentsTaskZNRecord = - _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, - RealtimeToOfflineSegmentsTask.TASK_TYPE); - int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - - RealtimeToOfflineSegmentsTaskMetadata updatedRealtimeToOfflineSegmentsTaskMetadata = - getUpdatedTaskMetadata(context, realtimeToOfflineSegmentsTaskZNRecord); try { + ZNRecord realtimeToOfflineSegmentsTaskZNRecord = + _minionTaskZkMetadataManager.getTaskMetadataZNRecord(realtimeTableName, + RealtimeToOfflineSegmentsTask.TASK_TYPE); + int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); + + // Adding ExpectedRealtimeToOfflineSegmentsTaskResultInfo might fail. + // In-case of failure there will be runtime exception thrown + RealtimeToOfflineSegmentsTaskMetadata updatedRealtimeToOfflineSegmentsTaskMetadata = + getUpdatedTaskMetadata(context, realtimeToOfflineSegmentsTaskZNRecord); + + // Setting to zookeeper might fail due to version mismatch, but in this case + // the exception is caught and retried. _minionTaskZkMetadataManager.setTaskMetadataZNRecord(updatedRealtimeToOfflineSegmentsTaskMetadata, RealtimeToOfflineSegmentsTask.TASK_TYPE, expectedVersion); @@ -258,14 +262,11 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - List - expectedRealtimeToOfflineSegmentsTaskResultList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = getExpectedRealtimeToOfflineTaskResultInfo(context); - expectedRealtimeToOfflineSegmentsTaskResultList.add(expectedRealtimeToOfflineTaskResultInfo); + realtimeToOfflineSegmentsTaskMetadata.addExpectedRealtimeToOfflineSegmentsTaskResultInfo( + expectedRealtimeToOfflineTaskResultInfo); return realtimeToOfflineSegmentsTaskMetadata; } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index b5a356f77e2d..46b96e751fc0 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -170,12 +171,12 @@ public List generateTasks(List tableConfigs) { // (exclusive) Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); - // get past minion task runs expected results. This list can have both successful and + // get past minion task runs expected results. This Map can have both successful and // failed task's expected results. - List expectedRealtimeToOfflineTaskResultInfoList = - realtimeToOfflineSegmentsTaskMetadata.getExpectedRealtimeToOfflineSegmentsTaskResultList(); - Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask = - getRealtimeVsCorrespondingOfflineSegmentNames(expectedRealtimeToOfflineTaskResultInfoList); + Map idVsExpectedRealtimeToOfflineTaskResultInfoList = + realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); + Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); // Get all offline table segments. // These are used to validate if previous minion task was successful or not @@ -184,14 +185,14 @@ public List generateTasks(List tableConfigs) { Set existingOfflineTableSegmentNames = new HashSet<>(_clusterInfoAccessor.getPinotHelixResourceManager().getSegmentsFor(offlineTableName, true)); - // In-case of previous minion task failures, get all segments + // In-case of previous minion task failures, get info // of failed minion subtasks. They need to be reprocessed. - List segmentsToBeReProcessedList = - getSegmentsToBeReprocessed(completedSegmentsZKMetadata, - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask, existingOfflineTableSegmentNames); + List failedTasks = + getFailedTasks(realtimeToOfflineSegmentsTaskMetadata, + existingOfflineTableSegmentNames); // if no failure, no segment to be reprocessed - boolean prevMinionTaskSuccessful = segmentsToBeReProcessedList.isEmpty(); + boolean prevMinionTaskSuccessful = failedTasks.isEmpty(); List> segmentNamesGroupList = new ArrayList<>(); Map segmentNameVsDownloadURL = new HashMap<>(); @@ -207,15 +208,20 @@ public List generateTasks(List tableConfigs) { if (!prevMinionTaskSuccessful) { // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. - deleteInvalidOfflineSegments(offlineTableName, segmentsToBeReProcessedList, existingOfflineTableSegmentNames, - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask); - divideSegmentsAmongSubtasks(segmentsToBeReProcessedList, segmentNamesGroupList, segmentNameVsDownloadURL, + deleteInvalidOfflineSegments(offlineTableName, failedTasks, existingOfflineTableSegmentNames); + + List segmentZKMetadataList = + getSegmentsToBeReProcessed(failedTasks, completedSegmentsZKMetadata); + + divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, segmentNameVsDownloadURL, maxNumRecordsPerTask); } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. - if (!expectedRealtimeToOfflineTaskResultInfoList.isEmpty()) { - expectedRealtimeToOfflineTaskResultInfoList.clear(); + if (!realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(). + isEmpty()) { + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId().clear(); + realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo().clear(); // windowEndTime of prev minion task needs to be re-used for picking up the // next windowStartTime. This is useful for case where user changes minion config // after a minion task run was complete. So windowStartTime cannot be watermark + bucketMs @@ -322,52 +328,102 @@ private List getDownloadURLList(List segmentNameList, Map segmentsToBeReProcessedList, - Set existingOfflineTableSegmentNames, - Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask) { + List failedTasks, + Set existingOfflineTableSegmentNames) { Set segmentsToBeDeleted = new HashSet<>(); - for (SegmentZKMetadata segmentZKMetadata : segmentsToBeReProcessedList) { - String segmentName = segmentZKMetadata.getSegmentName(); - List expectedCorrespondingOfflineSegments = - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); + for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : failedTasks) { + List expectedCorrespondingOfflineSegments = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); segmentsToBeDeleted.addAll( getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); } + if (!segmentsToBeDeleted.isEmpty()) { _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); } } - private List getSegmentsToBeReprocessed(List completedSegmentsZKMetadata, - Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask, + private List getFailedTasks( + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { - List segmentsToBeReProcessedList = new ArrayList<>(); + List failedTasks = new ArrayList<>(); - for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { + Map idVsExpectedRealtimeToOfflineTaskResultInfoList = + realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); + + Collection expectedRealtimeToOfflineTaskResultInfoList = + idVsExpectedRealtimeToOfflineTaskResultInfoList.values(); + + for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo + : expectedRealtimeToOfflineTaskResultInfoList) { + List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); + + boolean reProcessTask = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); + if (reProcessTask) { + expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); + failedTasks.add(expectedRealtimeToOfflineTaskResultInfo); + } + } + +// for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { +// if (!currentSegmentNames.contains(segmentName)) { +// // looks like segment now does not exist in realtime table +// continue; +// } +// } + +// for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { +// String segmentName = segmentZKMetadata.getSegmentName(); +// // reProcessSegment denotes whether to reschedule a previous segment which was a +// // part of a failed task. +// boolean reProcessSegment; +// +// if (segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.containsKey(segmentName)) { +// +// String expectedRealtimeToOfflineTaskResultInfoId = +// segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); +// +// ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = +// idVsExpectedRealtimeToOfflineTaskResultInfoList.get(expectedRealtimeToOfflineTaskResultInfoId); +// +// Preconditions.checkNotNull(expectedRealtimeToOfflineTaskResultInfo); +// +// // segment has been picked previously, check if offline segments generated by this segment +// // exists in offline table +// List expectedCorrespondingOfflineSegments = +// expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); +// +// reProcessSegment = +// checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, +// existingOfflineTableSegmentNames); +// +// if (reProcessSegment) { +// segmentsToBeReProcessedList.add(segmentZKMetadata); +// } +// } +// } + return failedTasks; + } + + private List getSegmentsToBeReProcessed(List failedTasks, + List currentSegments) { + + List segmentZKMetadataList = new ArrayList<>(); + Set segmentsFrom = new HashSet<>(); + + for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : failedTasks) { + segmentsFrom.addAll(expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom()); + } + + for (SegmentZKMetadata segmentZKMetadata : currentSegments) { String segmentName = segmentZKMetadata.getSegmentName(); - // reProcessSegment denotes whether to reschedule a previous segment which was a - // part of a failed task. - boolean reProcessSegment; - - if (realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.containsKey(segmentName)) { - // segment has been picked previously, check if offline segments generated by this segment - // exists in offline table - List expectedCorrespondingOfflineSegments = - realtimeSegmentNameVsCorrespondingOfflineSegmentNamesOfPrevTask.get(segmentName); - - reProcessSegment = - checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, - existingOfflineTableSegmentNames); - - if (reProcessSegment) { - segmentsToBeReProcessedList.add(segmentZKMetadata); - } + if (segmentsFrom.contains(segmentName)) { + segmentZKMetadataList.add(segmentZKMetadata); } } - return segmentsToBeReProcessedList; + return segmentZKMetadataList; } private List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, @@ -465,7 +521,7 @@ private List getSegmentsToDelete(List expectedCorrespondingOffli return segmentsToDelete; } - private boolean checkIfSegmentNeedsToBeReProcessed(List expectedCorrespondingOfflineSegments, + private boolean checkIfAllSegmentsExists(List expectedCorrespondingOfflineSegments, Set offlineTableSegmentNames) { for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { From 6405c35cc150e3c3f8d1e4bd790670e444df1233 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 00:38:41 +0530 Subject: [PATCH 41/72] Adds tests and clean up --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 4 + ...ealtimeToOfflineSegmentsTaskGenerator.java | 73 +++++-------------- ...imeToOfflineSegmentsTaskGeneratorTest.java | 71 +++++++++++++++++- 3 files changed, 92 insertions(+), 56 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 549833b4e463..79269a6e4d5f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -124,9 +124,13 @@ public void addExpectedRealtimeToOfflineSegmentsTaskResultInfo( ExpectedRealtimeToOfflineTaskResultInfo prevExpectedRealtimeToOfflineTaskResultInfo = _idVsExpectedRealtimeToOfflineTaskResultInfo.get(prevExpectedRealtimeToOfflineTaskResultInfoId); + // check if prevExpectedRealtimeToOfflineTaskResultInfo is not null, since it could + // have been removed in the same minion run previously. if (prevExpectedRealtimeToOfflineTaskResultInfo != null) { Preconditions.checkState(prevExpectedRealtimeToOfflineTaskResultInfo.isTaskFailure(), "ExpectedRealtimeToOfflineSegmentsTaskResult can only be replaced if it's of a failed task"); + // prevExpectedRealtimeToOfflineTaskResultInfoId is related to a failed task. And all the realtime segments + // were re-scheduled. PrevExpectedRealtimeToOfflineTaskResultInfoId can be removed from metadata. _idVsExpectedRealtimeToOfflineTaskResultInfo.remove(prevExpectedRealtimeToOfflineTaskResultInfoId); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 46b96e751fc0..04d30d688bb5 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -171,13 +171,6 @@ public List generateTasks(List tableConfigs) { // (exclusive) Set lastLLCSegmentPerPartition = new HashSet<>(partitionToLatestLLCSegmentName.values()); - // get past minion task runs expected results. This Map can have both successful and - // failed task's expected results. - Map idVsExpectedRealtimeToOfflineTaskResultInfoList = - realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); - Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); - // Get all offline table segments. // These are used to validate if previous minion task was successful or not String offlineTableName = @@ -188,8 +181,7 @@ public List generateTasks(List tableConfigs) { // In-case of previous minion task failures, get info // of failed minion subtasks. They need to be reprocessed. List failedTasks = - getFailedTasks(realtimeToOfflineSegmentsTaskMetadata, - existingOfflineTableSegmentNames); + getFailedTasks(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); // if no failure, no segment to be reprocessed boolean prevMinionTaskSuccessful = failedTasks.isEmpty(); @@ -197,7 +189,7 @@ public List generateTasks(List tableConfigs) { List> segmentNamesGroupList = new ArrayList<>(); Map segmentNameVsDownloadURL = new HashMap<>(); - // max maxNumRecordsPerTask is used to divide a minion tasks among + // maxNumRecordsPerTask is used to divide a minion tasks among // multiple subtasks to improve performance. int maxNumRecordsPerTask = taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null @@ -350,60 +342,30 @@ private List getFailedTasks( Set existingOfflineTableSegmentNames) { List failedTasks = new ArrayList<>(); + // Get all the ExpectedRealtimeToOfflineTaskResultInfo of prev minion task Map idVsExpectedRealtimeToOfflineTaskResultInfoList = realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); - Collection expectedRealtimeToOfflineTaskResultInfoList = idVsExpectedRealtimeToOfflineTaskResultInfoList.values(); + // Check what all offline segments are present currently for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : expectedRealtimeToOfflineTaskResultInfoList) { - List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); - boolean reProcessTask = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); - if (reProcessTask) { + // get offline segments + List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); + boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); + + if (!taskSuccessful) { + // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be + // related to a failed task. Mark it as a failure, since executor will + // then only replace expectedRealtimeToOfflineTaskResultInfo for the + // segments to be reprocessed. This is to avoid having multiple + // expectedRealtimeToOfflineTaskResultInfo for same segment. expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); failedTasks.add(expectedRealtimeToOfflineTaskResultInfo); } } - -// for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { -// if (!currentSegmentNames.contains(segmentName)) { -// // looks like segment now does not exist in realtime table -// continue; -// } -// } - -// for (SegmentZKMetadata segmentZKMetadata : completedSegmentsZKMetadata) { -// String segmentName = segmentZKMetadata.getSegmentName(); -// // reProcessSegment denotes whether to reschedule a previous segment which was a -// // part of a failed task. -// boolean reProcessSegment; -// -// if (segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.containsKey(segmentName)) { -// -// String expectedRealtimeToOfflineTaskResultInfoId = -// segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); -// -// ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = -// idVsExpectedRealtimeToOfflineTaskResultInfoList.get(expectedRealtimeToOfflineTaskResultInfoId); -// -// Preconditions.checkNotNull(expectedRealtimeToOfflineTaskResultInfo); -// -// // segment has been picked previously, check if offline segments generated by this segment -// // exists in offline table -// List expectedCorrespondingOfflineSegments = -// expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); -// -// reProcessSegment = -// checkIfSegmentNeedsToBeReProcessed(expectedCorrespondingOfflineSegments, -// existingOfflineTableSegmentNames); -// -// if (reProcessSegment) { -// segmentsToBeReProcessedList.add(segmentZKMetadata); -// } -// } -// } return failedTasks; } @@ -417,6 +379,7 @@ private List getSegmentsToBeReProcessed(List getSegmentsToDelete(List expectedCorrespondingOffli List segmentsToDelete = new ArrayList<>(); // Iterate on all expectedCorrespondingOfflineSegments of realtime segments to be reprocessed. - // delete any offline segment present. + // check which segments exists. They need to be deleted. for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { if (existingOfflineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { segmentsToDelete.add(expectedCorrespondingOfflineSegment); @@ -529,11 +492,11 @@ private boolean checkIfAllSegmentsExists(List expectedCorrespondingOffli // If not all corresponding offline segments to a realtime segment exists, // it means there was an issue with prev minion task. And segment needs // to be re-processed. - return true; + return false; } } - return false; + return true; } private Map> getRealtimeVsCorrespondingOfflineSegmentNames( diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 17c90d42e3ad..3c1a154f8cd5 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -29,6 +30,7 @@ import org.apache.helix.model.IdealState; import org.apache.helix.task.TaskState; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; +import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.minion.ClusterInfoAccessor; @@ -398,6 +400,73 @@ public void testGenerateTasksWithMinionMetadata() { assertEquals(configs.get("m1" + RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX), "MAX"); } + @Test + public void testSegmentUploadFailure() { + // store partial offline segments in Zk metadata. + ClusterInfoAccessor mockClusterInfoProvide = mock(ClusterInfoAccessor.class); + when(mockClusterInfoProvide.getTaskStates(RealtimeToOfflineSegmentsTask.TASK_TYPE)).thenReturn(new HashMap<>()); + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + getRealtimeToOfflineSegmentsTaskMetadata(); + when(mockClusterInfoProvide + .getMinionTaskMetadataZNRecord(RealtimeToOfflineSegmentsTask.TASK_TYPE, REALTIME_TABLE_NAME)).thenReturn( + realtimeToOfflineSegmentsTaskMetadata.toZNRecord()); // 21 May 2020 UTC + SegmentZKMetadata segmentZKMetadata1 = + getSegmentZKMetadata("githubEvents__0__0__20241213T2002Z", Status.DONE, 1589972400000L, 1590048000000L, + TimeUnit.MILLISECONDS, "download1"); // 05-20-2020T11:00:00 to 05-21-2020T08:00:00 UTC + SegmentZKMetadata segmentZKMetadata2 = + getSegmentZKMetadata("githubEvents__0__0__20241213T2003Z", Status.DONE, 1590048000000L, 1590134400000L, + TimeUnit.MILLISECONDS, "download2"); // 05-21-2020T08:00:00 UTC to 05-22-2020T08:00:00 UTC + when(mockClusterInfoProvide.getSegmentsZKMetadata(REALTIME_TABLE_NAME)) + .thenReturn(Lists.newArrayList(segmentZKMetadata1, segmentZKMetadata2)); + when(mockClusterInfoProvide.getIdealState(REALTIME_TABLE_NAME)).thenReturn(getIdealState(REALTIME_TABLE_NAME, + Lists.newArrayList(segmentZKMetadata1.getSegmentName(), segmentZKMetadata2.getSegmentName()))); + + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockPinotHelixResourceManager.getSegmentsFor(OFFLINE_TABLE_NAME, true)).thenReturn( + List.of("githubEventsOffline__0__0__20241213T2002Z")); + + when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + + // Default configs + Map> taskConfigsMap = new HashMap<>(); + taskConfigsMap.put(RealtimeToOfflineSegmentsTask.TASK_TYPE, new HashMap<>()); + TableConfig realtimeTableConfig = getRealtimeTableConfig(taskConfigsMap); + + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); + generator.init(mockClusterInfoProvide); + List pinotTaskConfigs = generator.generateTasks(Lists.newArrayList(realtimeTableConfig)); + + assertEquals(pinotTaskConfigs.size(), 1); + assertEquals(pinotTaskConfigs.get(0).getTaskType(), RealtimeToOfflineSegmentsTask.TASK_TYPE); + Map configs = pinotTaskConfigs.get(0).getConfigs(); + assertEquals(configs.get(MinionConstants.TABLE_NAME_KEY), REALTIME_TABLE_NAME); + assertEquals(configs.get(MinionConstants.SEGMENT_NAME_KEY), + "githubEvents__0__0__20241213T2002Z,githubEvents__0__0__20241213T2003Z"); + assertEquals(configs.get(MinionConstants.DOWNLOAD_URL_KEY), "download1,download2"); + assertEquals(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_START_MS_KEY), "1589972400000"); + assertEquals(configs.get(RealtimeToOfflineSegmentsTask.WINDOW_END_MS_KEY), "1590058800000"); + } + + private RealtimeToOfflineSegmentsTaskMetadata getRealtimeToOfflineSegmentsTaskMetadata() { + Map idVsExpectedRealtimeToOfflineTaskResultInfo = + new HashMap<>(); + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + new ExpectedRealtimeToOfflineTaskResultInfo( + Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), + Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), + "1"); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo.getId(), + expectedRealtimeToOfflineTaskResultInfo); + + ImmutableMap segmentNameVsId = ImmutableMap.of( + "githubEvents__0__0__20241213T2002Z", expectedRealtimeToOfflineTaskResultInfo.getId(), + "githubEvents__0__0__20241213T2003Z", expectedRealtimeToOfflineTaskResultInfo.getId() + ); + + return new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1589972400000L, 1590058800000L, + idVsExpectedRealtimeToOfflineTaskResultInfo, segmentNameVsId); + } + /** * Tests for skipping task generation due to CONSUMING segments overlap with window */ @@ -696,7 +765,7 @@ private SegmentZKMetadata getSegmentZKMetadata(String segmentName, Status status private IdealState getIdealState(String tableName, List segmentNames) { IdealState idealState = new IdealState(tableName); idealState.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED); - for (String segmentName: segmentNames) { + for (String segmentName : segmentNames) { idealState.setPartitionState(segmentName, "Server_0", "ONLINE"); } return idealState; From f77e8172a5df1d616856d9a2601777db841af271 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 00:39:17 +0530 Subject: [PATCH 42/72] clean up --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 04d30d688bb5..6e8b3fa6cde1 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -499,27 +499,6 @@ private boolean checkIfAllSegmentsExists(List expectedCorrespondingOffli return true; } - private Map> getRealtimeVsCorrespondingOfflineSegmentNames( - List - expectedRealtimeToOfflineTaskResultInfoList) { - Map> realtimeSegmentNameVsCorrespondingOfflineSegmentNames - = new HashMap<>(); - - for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo - : expectedRealtimeToOfflineTaskResultInfoList) { - List segmentsFrom = expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom(); - List segmentsTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); - for (String segmentFrom : segmentsFrom) { - Preconditions.checkState(!realtimeSegmentNameVsCorrespondingOfflineSegmentNames.containsKey(segmentFrom), - "Realtime segment: {} was picked by multiple subtasks in the previous minion run with task id: {}", - segmentFrom, expectedRealtimeToOfflineTaskResultInfo.getTaskID()); - realtimeSegmentNameVsCorrespondingOfflineSegmentNames.put(segmentFrom, segmentsTo); - } - } - - return realtimeSegmentNameVsCorrespondingOfflineSegmentNames; - } - /** * Fetch completed (DONE/UPLOADED) segment and partition information * From 592f65caaf0192c11bc98a0e2b30c40b08ede65c Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 01:41:00 +0530 Subject: [PATCH 43/72] Updates integration test --- ...eSegmentsMinionClusterIntegrationTest.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index a0b42740385e..f39d6e069b0e 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -31,6 +31,7 @@ import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; import org.apache.pinot.common.minion.MinionTaskMetadataUtils; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; +import org.apache.pinot.controller.helix.core.PinotResourceManagerResponse; import org.apache.pinot.controller.helix.core.minion.PinotHelixTaskResourceManager; import org.apache.pinot.controller.helix.core.minion.PinotTaskManager; import org.apache.pinot.core.common.MinionConstants; @@ -262,6 +263,42 @@ public void testRealtimeToOfflineSegmentsTask() } testHardcodedQueries(); + + // delete all offline segments to test how generator handles prev minion task failure + List allOfflineSegments = _helixResourceManager.getSegmentsFor(_offlineTableName, true); + PinotResourceManagerResponse response = _helixResourceManager.deleteSegments(_offlineTableName, allOfflineSegments); + assert response.isSuccessful(); + expectedWatermark -= 86400000; + + // Schedule task + assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertTrue(_taskResourceManager.getTaskQueues().contains( + PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); + // Should not generate more tasks + assertNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + + // Wait at most 600 seconds for all tasks COMPLETED + waitForTaskToComplete(expectedWatermark, _realtimeTableName); + // check segment is in offline + segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_offlineTableName); + assertEquals(segmentsZKMetadata.size(), (numOfflineSegmentsPerTask)); + + long expectedOfflineSegmentTimeMs = expectedWatermark; + for (SegmentZKMetadata segmentZKMetadata : segmentsZKMetadata) { + assertEquals(segmentZKMetadata.getStartTimeMs(), expectedOfflineSegmentTimeMs); + assertEquals(segmentZKMetadata.getEndTimeMs(), expectedOfflineSegmentTimeMs); + if (segmentPartitionConfig != null) { + assertEquals(segmentZKMetadata.getPartitionMetadata().getColumnPartitionMap().keySet(), + segmentPartitionConfig.getColumnPartitionMap().keySet()); + for (String partitionColumn : segmentPartitionConfig.getColumnPartitionMap().keySet()) { + assertEquals(segmentZKMetadata.getPartitionMetadata().getPartitions(partitionColumn).size(), 1); + } + } + } + + testHardcodedQueries(); } @Test From cf97b6ef4b404fbeec873768bf5140161d449c95 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 01:58:46 +0530 Subject: [PATCH 44/72] adds code docs --- .../ExpectedRealtimeToOfflineTaskResultInfo.java | 10 ++++++++++ .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 +- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java index 02a2e62c8103..ab22b60dff91 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java @@ -27,6 +27,16 @@ * ExpectedRealtimeOfflineTaskResultInfo is created in * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskExecutor} * before uploading offline segment(s) to the offline table. + * + * The _segmentsFrom denotes the input RealtimeSegments. + * The _segmentsTo denotes the expected offline segemnts. + * The _id denotes the unique identifier of object. + * The _taskID denotes the minion taskId. + * The _taskFailure denotes the status of minion task handling the + * current ExpectedResult. This is modified in + * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskGenerator} + * when a prev minion task is failed. + * */ public class ExpectedRealtimeToOfflineTaskResultInfo { private final List _segmentsFrom; diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 6e8b3fa6cde1..12da9e9fc2a4 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -84,7 +84,7 @@ * * - A PinotTaskConfig is created, with segment information, execution window, and any config specific to the task * - * Generator owns the responsibility to ensure prev minion tasks were successful and only then watermark + * - Generator owns the responsibility to ensure prev minion tasks were successful and only then watermark * can be updated. */ @TaskGenerator diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 3c1a154f8cd5..4587ac02507a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -401,7 +401,7 @@ public void testGenerateTasksWithMinionMetadata() { } @Test - public void testSegmentUploadFailure() { + public void testGenerateTasksWithSegmentUploadFailure() { // store partial offline segments in Zk metadata. ClusterInfoAccessor mockClusterInfoProvide = mock(ClusterInfoAccessor.class); when(mockClusterInfoProvide.getTaskStates(RealtimeToOfflineSegmentsTask.TASK_TYPE)).thenReturn(new HashMap<>()); From f2dcef278a46fbe87b44624897185ac4d4193a23 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 02:25:23 +0530 Subject: [PATCH 45/72] handle edge case --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 12da9e9fc2a4..51bb72b719b3 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -197,15 +197,18 @@ public List generateTasks(List tableConfigs) { taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; + List segmentsToBeReProcessed = new ArrayList<>(); if (!prevMinionTaskSuccessful) { + // prev minion tasks could have failed but there might be no segments to be processed. + segmentsToBeReProcessed = getSegmentsToBeReProcessed(failedTasks, completedSegmentsZKMetadata); + } + + if (!segmentsToBeReProcessed.isEmpty()) { // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. deleteInvalidOfflineSegments(offlineTableName, failedTasks, existingOfflineTableSegmentNames); - List segmentZKMetadataList = - getSegmentsToBeReProcessed(failedTasks, completedSegmentsZKMetadata); - - divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, segmentNameVsDownloadURL, + divideSegmentsAmongSubtasks(segmentsToBeReProcessed, segmentNamesGroupList, segmentNameVsDownloadURL, maxNumRecordsPerTask); } else { // if all offline segments of prev minion tasks were successfully uploaded, From b3cebb49da0f5c8862b852bcc7b7f9b95ab505ba Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 03:38:10 +0530 Subject: [PATCH 46/72] handle edge case --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 3 - ...ealtimeToOfflineSegmentsTaskGenerator.java | 113 ++++++++++++------ 2 files changed, 74 insertions(+), 42 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 79269a6e4d5f..9aaa129957f2 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -129,9 +129,6 @@ public void addExpectedRealtimeToOfflineSegmentsTaskResultInfo( if (prevExpectedRealtimeToOfflineTaskResultInfo != null) { Preconditions.checkState(prevExpectedRealtimeToOfflineTaskResultInfo.isTaskFailure(), "ExpectedRealtimeToOfflineSegmentsTaskResult can only be replaced if it's of a failed task"); - // prevExpectedRealtimeToOfflineTaskResultInfoId is related to a failed task. And all the realtime segments - // were re-scheduled. PrevExpectedRealtimeToOfflineTaskResultInfoId can be removed from metadata. - _idVsExpectedRealtimeToOfflineTaskResultInfo.remove(prevExpectedRealtimeToOfflineTaskResultInfoId); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 51bb72b719b3..9898f9c77318 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -180,11 +180,21 @@ public List generateTasks(List tableConfigs) { // In-case of previous minion task failures, get info // of failed minion subtasks. They need to be reprocessed. - List failedTasks = - getFailedTasks(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); + Set failedTaskSegments = + getFailedTaskSegments(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); + + // In-case of partial failure of segments upload in prev minion task run, + // data is inconsistent, delete the corresponding offline segments immediately. + if (!failedTaskSegments.isEmpty()) { + deleteInvalidOfflineSegments(offlineTableName, failedTaskSegments, existingOfflineTableSegmentNames, + realtimeToOfflineSegmentsTaskMetadata); + } + + List segmentsToBeReProcessed = + filterOutRemovedSegments(failedTaskSegments, completedSegmentsZKMetadata); - // if no failure, no segment to be reprocessed - boolean prevMinionTaskSuccessful = failedTasks.isEmpty(); + // if no segment to be reprocessed, no failure + boolean prevMinionTaskSuccessful = segmentsToBeReProcessed.isEmpty(); List> segmentNamesGroupList = new ArrayList<>(); Map segmentNameVsDownloadURL = new HashMap<>(); @@ -197,19 +207,10 @@ public List generateTasks(List tableConfigs) { taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) : DEFAULT_MAX_NUM_RECORDS_PER_TASK; - List segmentsToBeReProcessed = new ArrayList<>(); - if (!prevMinionTaskSuccessful) { - // prev minion tasks could have failed but there might be no segments to be processed. - segmentsToBeReProcessed = getSegmentsToBeReProcessed(failedTasks, completedSegmentsZKMetadata); - } + List segmentsToBeScheduled; - if (!segmentsToBeReProcessed.isEmpty()) { - // In-case of partial failure of segments upload in prev minion task run, - // data is inconsistent, delete the corresponding offline segments immediately. - deleteInvalidOfflineSegments(offlineTableName, failedTasks, existingOfflineTableSegmentNames); - - divideSegmentsAmongSubtasks(segmentsToBeReProcessed, segmentNamesGroupList, segmentNameVsDownloadURL, - maxNumRecordsPerTask); + if (!prevMinionTaskSuccessful) { + segmentsToBeScheduled = segmentsToBeReProcessed; } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. @@ -224,14 +225,15 @@ public List generateTasks(List tableConfigs) { } long windowEndMs = windowStartMs + bucketMs; // since window changed, pick new segments. - List segmentZKMetadataList = + segmentsToBeScheduled = generateNewSegmentsToProcess(completedSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, bufferMs, bufferTimePeriod, lastLLCSegmentPerPartition, realtimeToOfflineSegmentsTaskMetadata); - divideSegmentsAmongSubtasks(segmentZKMetadataList, segmentNamesGroupList, segmentNameVsDownloadURL, - maxNumRecordsPerTask); } + divideSegmentsAmongSubtasks(segmentsToBeScheduled, segmentNamesGroupList, segmentNameVsDownloadURL, + maxNumRecordsPerTask); + if (segmentNamesGroupList.isEmpty()) { continue; } @@ -323,15 +325,35 @@ private List getDownloadURLList(List segmentNameList, Map failedTasks, - Set existingOfflineTableSegmentNames) { + Set realtimeSegmentsToBeReProcessed, + Set existingOfflineTableSegmentNames, + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { + + Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); + Map idVsExpectedRealtimeToOfflineTaskResultInfo = + realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); Set segmentsToBeDeleted = new HashSet<>(); - for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : failedTasks) { + for (String realtimeSegment : realtimeSegmentsToBeReProcessed) { + String id = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(realtimeSegment); + Preconditions.checkNotNull(id); + ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + idVsExpectedRealtimeToOfflineTaskResultInfo.get(id); + // if already marked as failure, no need to delete again. + if (expectedRealtimeToOfflineTaskResultInfo.isTaskFailure()) { + continue; + } List expectedCorrespondingOfflineSegments = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); segmentsToBeDeleted.addAll( getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); + // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be + // related to a failed task. Mark it as a failure, since executor will + // then only replace expectedRealtimeToOfflineTaskResultInfo for the + // segments to be reprocessed. This is to avoid having multiple + // expectedRealtimeToOfflineTaskResultInfo for same segment. + expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); } if (!segmentsToBeDeleted.isEmpty()) { @@ -340,10 +362,10 @@ private void deleteInvalidOfflineSegments(String offlineTableName, } } - private List getFailedTasks( + private Set getFailedTaskSegments( RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { - List failedTasks = new ArrayList<>(); + Set failedTaskIds = new HashSet<>(); // Get all the ExpectedRealtimeToOfflineTaskResultInfo of prev minion task Map idVsExpectedRealtimeToOfflineTaskResultInfoList = @@ -351,41 +373,54 @@ private List getFailedTasks( Collection expectedRealtimeToOfflineTaskResultInfoList = idVsExpectedRealtimeToOfflineTaskResultInfoList.values(); + Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); + Set expectedRealtimeToOfflineTaskResultInfoIds = + new HashSet<>(segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.values()); + + Set segmentNamesToReprocess = new HashSet<>(); + // Check what all offline segments are present currently for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : expectedRealtimeToOfflineTaskResultInfoList) { + if (expectedRealtimeToOfflineTaskResultInfo.isTaskFailure()) { + // if task is failure and is referenced by any segment, only then add to failed task. + if (expectedRealtimeToOfflineTaskResultInfoIds.contains(expectedRealtimeToOfflineTaskResultInfo.getId())) { + failedTaskIds.add(expectedRealtimeToOfflineTaskResultInfo.getTaskID()); + } + continue; + } + // get offline segments List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); if (!taskSuccessful) { - // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be - // related to a failed task. Mark it as a failure, since executor will - // then only replace expectedRealtimeToOfflineTaskResultInfo for the - // segments to be reprocessed. This is to avoid having multiple - // expectedRealtimeToOfflineTaskResultInfo for same segment. - expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); - failedTasks.add(expectedRealtimeToOfflineTaskResultInfo); + failedTaskIds.add(expectedRealtimeToOfflineTaskResultInfo.getTaskID()); } } - return failedTasks; + + for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { + String expectedRealtimeToOfflineTaskResultInfoId = + segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); + if (failedTaskIds.contains(expectedRealtimeToOfflineTaskResultInfoId)) { + segmentNamesToReprocess.add(segmentName); + } + } + + return segmentNamesToReprocess; } - private List getSegmentsToBeReProcessed(List failedTasks, + private List filterOutRemovedSegments(Set failedSegmentNames, List currentSegments) { List segmentZKMetadataList = new ArrayList<>(); - Set segmentsFrom = new HashSet<>(); - - for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo : failedTasks) { - segmentsFrom.addAll(expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom()); - } // filter out deleted/removed segments. for (SegmentZKMetadata segmentZKMetadata : currentSegments) { String segmentName = segmentZKMetadata.getSegmentName(); - if (segmentsFrom.contains(segmentName)) { + if (failedSegmentNames.contains(segmentName)) { segmentZKMetadataList.add(segmentZKMetadata); } } From e8c1b9b900086431692b7f643ebc2c41fd57097b Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 04:05:50 +0530 Subject: [PATCH 47/72] fixes bug --- ...timeToOfflineSegmentsMinionClusterIntegrationTest.java | 2 -- .../RealtimeToOfflineSegmentsTaskGenerator.java | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index f39d6e069b0e..37aed89b7bdf 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -297,8 +297,6 @@ public void testRealtimeToOfflineSegmentsTask() } } } - - testHardcodedQueries(); } @Test diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 9898f9c77318..4b9ab432d0b8 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -365,7 +365,7 @@ private void deleteInvalidOfflineSegments(String offlineTableName, private Set getFailedTaskSegments( RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { - Set failedTaskIds = new HashSet<>(); + Set failedIds = new HashSet<>(); // Get all the ExpectedRealtimeToOfflineTaskResultInfo of prev minion task Map idVsExpectedRealtimeToOfflineTaskResultInfoList = @@ -387,7 +387,7 @@ private Set getFailedTaskSegments( if (expectedRealtimeToOfflineTaskResultInfo.isTaskFailure()) { // if task is failure and is referenced by any segment, only then add to failed task. if (expectedRealtimeToOfflineTaskResultInfoIds.contains(expectedRealtimeToOfflineTaskResultInfo.getId())) { - failedTaskIds.add(expectedRealtimeToOfflineTaskResultInfo.getTaskID()); + failedIds.add(expectedRealtimeToOfflineTaskResultInfo.getId()); } continue; } @@ -397,14 +397,14 @@ private Set getFailedTaskSegments( boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); if (!taskSuccessful) { - failedTaskIds.add(expectedRealtimeToOfflineTaskResultInfo.getTaskID()); + failedIds.add(expectedRealtimeToOfflineTaskResultInfo.getId()); } } for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { String expectedRealtimeToOfflineTaskResultInfoId = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); - if (failedTaskIds.contains(expectedRealtimeToOfflineTaskResultInfoId)) { + if (failedIds.contains(expectedRealtimeToOfflineTaskResultInfoId)) { segmentNamesToReprocess.add(segmentName); } } From 9eacf765019fc0851cdbbd71da97eb086922e3d1 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 14:06:26 +0530 Subject: [PATCH 48/72] test --- .../RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index 37aed89b7bdf..54d19bf10925 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -268,6 +268,8 @@ public void testRealtimeToOfflineSegmentsTask() List allOfflineSegments = _helixResourceManager.getSegmentsFor(_offlineTableName, true); PinotResourceManagerResponse response = _helixResourceManager.deleteSegments(_offlineTableName, allOfflineSegments); assert response.isSuccessful(); + allOfflineSegments = _helixResourceManager.getSegmentsFor(_offlineTableName, true); + assertEquals(allOfflineSegments.size(), 0); expectedWatermark -= 86400000; // Schedule task From 9a2eb80446c029259e6a9fca992a59c0806b8173 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 15:20:50 +0530 Subject: [PATCH 49/72] handles edge case --- .../RealtimeToOfflineSegmentsTaskExecutor.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 7366378bc841..e4d8636a9af4 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -44,6 +44,7 @@ import org.apache.pinot.plugin.minion.tasks.MergeTaskUtils; import org.apache.pinot.plugin.minion.tasks.SegmentConversionResult; import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader; +import org.apache.pinot.segment.spi.creator.name.SimpleSegmentNameGenerator; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.RecordReader; @@ -161,6 +162,11 @@ protected List convert(PinotTaskConfig pinotTaskConfig, // Segment config segmentProcessorConfigBuilder.setSegmentConfig(MergeTaskUtils.getSegmentConfig(configs)); + // Since multiple subtasks run in parallel, there shouldn't be a name conflict. + // Append uuid + segmentProcessorConfigBuilder.setSegmentNameGenerator( + new SimpleSegmentNameGenerator(offlineTableName, null, true, false)); + // Progress observer segmentProcessorConfigBuilder.setProgressObserver(p -> _eventObserver.notifyProgress(_pinotTaskConfig, p)); From 32628d0ddb171dd2464a17900ccf6ea111a9d7f4 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 15:33:27 +0530 Subject: [PATCH 50/72] nit --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 4b9ab432d0b8..8709b402a716 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -351,8 +351,7 @@ private void deleteInvalidOfflineSegments(String offlineTableName, // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be // related to a failed task. Mark it as a failure, since executor will // then only replace expectedRealtimeToOfflineTaskResultInfo for the - // segments to be reprocessed. This is to avoid having multiple - // expectedRealtimeToOfflineTaskResultInfo for same segment. + // segments to be reprocessed. expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); } @@ -394,6 +393,10 @@ private Set getFailedTaskSegments( // get offline segments List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); + + // If not all corresponding offline segments to a realtime segment exists, + // it means there was an issue with prev minion task. And segment needs + // to be re-processed. boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); if (!taskSuccessful) { @@ -401,6 +404,9 @@ private Set getFailedTaskSegments( } } + // source of truth for re-processing task is segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId map. + // consider edge case where multiple segments were re-scheduled among multiple subtasks, but again + // one of the subtask failed. for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { String expectedRealtimeToOfflineTaskResultInfoId = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); @@ -522,18 +528,13 @@ private List getSegmentsToDelete(List expectedCorrespondingOffli return segmentsToDelete; } - private boolean checkIfAllSegmentsExists(List expectedCorrespondingOfflineSegments, - Set offlineTableSegmentNames) { - - for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { - if (!offlineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { - // If not all corresponding offline segments to a realtime segment exists, - // it means there was an issue with prev minion task. And segment needs - // to be re-processed. + private boolean checkIfAllSegmentsExists(List expectedSegments, + Set currentTableSegments) { + for (String expectedSegment : expectedSegments) { + if (!currentTableSegments.contains(expectedSegment)) { return false; } } - return true; } From cc70645824f6fe4806b53175b8f61cb3f1892ca5 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 15:39:39 +0530 Subject: [PATCH 51/72] clean up --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 8709b402a716..20a738f50251 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -126,12 +126,12 @@ public List generateTasks(List tableConfigs) { } // Get all segment metadata for completed segments (DONE/UPLOADED status). - List completedSegmentsZKMetadata = new ArrayList<>(); + List completedRealtimeSegmentsZKMetadata = new ArrayList<>(); Map partitionToLatestLLCSegmentName = new HashMap<>(); Set allPartitions = new HashSet<>(); - getCompletedSegmentsInfo(realtimeTableName, completedSegmentsZKMetadata, partitionToLatestLLCSegmentName, + getCompletedSegmentsInfo(realtimeTableName, completedRealtimeSegmentsZKMetadata, partitionToLatestLLCSegmentName, allPartitions); - if (completedSegmentsZKMetadata.isEmpty()) { + if (completedRealtimeSegmentsZKMetadata.isEmpty()) { LOGGER.info("No realtime-completed segments found for table: {}, skipping task generation: {}", realtimeTableName, taskType); continue; @@ -162,7 +162,8 @@ public List generateTasks(List tableConfigs) { realtimeTableName); int expectedVersion = realtimeToOfflineZNRecord != null ? realtimeToOfflineZNRecord.getVersion() : -1; RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = - getRTOTaskMetadata(realtimeTableName, completedSegmentsZKMetadata, bucketMs, realtimeToOfflineZNRecord); + getRTOTaskMetadata(realtimeTableName, completedRealtimeSegmentsZKMetadata, bucketMs, + realtimeToOfflineZNRecord); // Get watermark from RealtimeToOfflineSegmentsTaskMetadata ZNode. WindowStart = watermark. long windowStartMs = realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); @@ -180,18 +181,18 @@ public List generateTasks(List tableConfigs) { // In-case of previous minion task failures, get info // of failed minion subtasks. They need to be reprocessed. - Set failedTaskSegments = + Set failedTaskInputSegments = getFailedTaskSegments(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. - if (!failedTaskSegments.isEmpty()) { - deleteInvalidOfflineSegments(offlineTableName, failedTaskSegments, existingOfflineTableSegmentNames, + if (!failedTaskInputSegments.isEmpty()) { + deleteInvalidOfflineSegments(offlineTableName, failedTaskInputSegments, existingOfflineTableSegmentNames, realtimeToOfflineSegmentsTaskMetadata); } List segmentsToBeReProcessed = - filterOutRemovedSegments(failedTaskSegments, completedSegmentsZKMetadata); + filterOutRemovedSegments(failedTaskInputSegments, completedRealtimeSegmentsZKMetadata); // if no segment to be reprocessed, no failure boolean prevMinionTaskSuccessful = segmentsToBeReProcessed.isEmpty(); @@ -226,9 +227,8 @@ public List generateTasks(List tableConfigs) { long windowEndMs = windowStartMs + bucketMs; // since window changed, pick new segments. segmentsToBeScheduled = - generateNewSegmentsToProcess(completedSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, bufferMs, - bufferTimePeriod, - lastLLCSegmentPerPartition, realtimeToOfflineSegmentsTaskMetadata); + generateNewSegmentsToProcess(completedRealtimeSegmentsZKMetadata, windowStartMs, windowEndMs, bucketMs, + bufferMs, bufferTimePeriod, lastLLCSegmentPerPartition, realtimeToOfflineSegmentsTaskMetadata); } divideSegmentsAmongSubtasks(segmentsToBeScheduled, segmentNamesGroupList, segmentNameVsDownloadURL, @@ -418,15 +418,15 @@ private Set getFailedTaskSegments( return segmentNamesToReprocess; } - private List filterOutRemovedSegments(Set failedSegmentNames, - List currentSegments) { + private List filterOutRemovedSegments(Set segmentNames, + List currentTableSegments) { List segmentZKMetadataList = new ArrayList<>(); // filter out deleted/removed segments. - for (SegmentZKMetadata segmentZKMetadata : currentSegments) { + for (SegmentZKMetadata segmentZKMetadata : currentTableSegments) { String segmentName = segmentZKMetadata.getSegmentName(); - if (failedSegmentNames.contains(segmentName)) { + if (segmentNames.contains(segmentName)) { segmentZKMetadataList.add(segmentZKMetadata); } } From 903d519ff1365eec0eb0068359b5aa32096d9d8a Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 22:40:48 +0530 Subject: [PATCH 52/72] addresses PR comment --- .../common/minion/RealtimeToOfflineSegmentsTaskMetadata.java | 2 +- .../metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 9aaa129957f2..1fc75dc6de73 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -54,7 +54,7 @@ */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { - private static final String WINDOW_START_KEY = "windowStartMs"; + private static final String WINDOW_START_KEY = "watermarkMs"; private static final String WINDOW_END_KEY = "windowEndMs"; private static final String COMMA_SEPARATOR = ","; private static final String SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY = "segmentVsExpectedRTOResultId"; diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 5ac476854a7e..d8ee6f564ceb 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -43,7 +43,7 @@ public void testToFromZNRecord() { new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000); ZNRecord znRecord = metadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); - assertEquals(znRecord.getSimpleField("windowStartMs"), "1000"); + assertEquals(znRecord.getSimpleField("watermarkMs"), "1000"); RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); @@ -83,7 +83,7 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { ZNRecord znRecord = originalMetadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); - assertEquals(znRecord.getSimpleField("windowStartMs"), "1000"); + assertEquals(znRecord.getSimpleField("watermarkMs"), "1000"); assertEquals(znRecord.getSimpleField("windowEndMs"), "2000"); Map> listFields = znRecord.getListFields(); Map> mapFields = znRecord.getMapFields(); From 066d925aa06bd939f41cfcc0fa0c5e3feba496ef Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Wed, 18 Dec 2024 23:26:15 +0530 Subject: [PATCH 53/72] nit --- .../RealtimeToOfflineSegmentsTaskExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index e4d8636a9af4..c91ec7e56022 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -165,7 +165,7 @@ protected List convert(PinotTaskConfig pinotTaskConfig, // Since multiple subtasks run in parallel, there shouldn't be a name conflict. // Append uuid segmentProcessorConfigBuilder.setSegmentNameGenerator( - new SimpleSegmentNameGenerator(offlineTableName, null, true, false)); + new SimpleSegmentNameGenerator(rawTableName, null, true, false)); // Progress observer segmentProcessorConfigBuilder.setProgressObserver(p -> _eventObserver.notifyProgress(_pinotTaskConfig, p)); From 66ff5adcb60b4002da948e7794990e43af90ce5f Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 24 Dec 2024 00:05:47 +0530 Subject: [PATCH 54/72] Refactor var names --- ...ltInfo.java => ExpectedSubtaskResult.java} | 10 +-- ...RealtimeToOfflineSegmentsTaskMetadata.java | 85 ++++++++++--------- ...timeToOfflineSegmentsTaskMetadataTest.java | 64 +++++++------- ...RealtimeToOfflineSegmentsTaskExecutor.java | 12 +-- ...ealtimeToOfflineSegmentsTaskGenerator.java | 66 +++++++------- ...imeToOfflineSegmentsTaskGeneratorTest.java | 16 ++-- 6 files changed, 127 insertions(+), 126 deletions(-) rename pinot-common/src/main/java/org/apache/pinot/common/minion/{ExpectedRealtimeToOfflineTaskResultInfo.java => ExpectedSubtaskResult.java} (86%) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java similarity index 86% rename from pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java rename to pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java index ab22b60dff91..e3d718f01382 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedRealtimeToOfflineTaskResultInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java @@ -38,21 +38,21 @@ * when a prev minion task is failed. * */ -public class ExpectedRealtimeToOfflineTaskResultInfo { +public class ExpectedSubtaskResult { private final List _segmentsFrom; private final List _segmentsTo; private final String _id; private final String _taskID; private boolean _taskFailure = false; - public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, String taskID) { + public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, String taskID) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; _taskID = taskID; _id = UUID.randomUUID().toString(); } - public ExpectedRealtimeToOfflineTaskResultInfo(List segmentsFrom, List segmentsTo, + public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, String realtimeToOfflineSegmentsMapId, String taskID, boolean taskFailure) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; @@ -90,10 +90,10 @@ public boolean equals(Object o) { if (this == o) { return true; } - if (!(o instanceof ExpectedRealtimeToOfflineTaskResultInfo)) { + if (!(o instanceof ExpectedSubtaskResult)) { return false; } - ExpectedRealtimeToOfflineTaskResultInfo that = (ExpectedRealtimeToOfflineTaskResultInfo) o; + ExpectedSubtaskResult that = (ExpectedSubtaskResult) o; return Objects.equals(_id, that._id); } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 1fc75dc6de73..1a5ad6448adc 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -57,30 +57,30 @@ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WINDOW_START_KEY = "watermarkMs"; private static final String WINDOW_END_KEY = "windowEndMs"; private static final String COMMA_SEPARATOR = ","; - private static final String SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY = "segmentVsExpectedRTOResultId"; + private static final String SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY = "segmentToExpectedSubtaskResultId"; private final String _tableNameWithType; private long _windowStartMs; private long _windowEndMs; - private final Map _idVsExpectedRealtimeToOfflineTaskResultInfo; - private final Map _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; + private final Map _expectedSubtaskResultMap; + private final Map _segmentNameToExpectedSubtaskResultID; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs) { _windowStartMs = windowStartMs; _tableNameWithType = tableNameWithType; - _idVsExpectedRealtimeToOfflineTaskResultInfo = new HashMap<>(); - _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = new HashMap<>(); + _expectedSubtaskResultMap = new HashMap<>(); + _segmentNameToExpectedSubtaskResultID = new HashMap<>(); } public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, long windowEndMs, - Map idVsExpectedRealtimeToOfflineTaskResultInfo, - Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId) { + Map expectedSubtaskResultMap, + Map segmentNameToExpectedSubtaskResultID) { _tableNameWithType = tableNameWithType; _windowStartMs = windowStartMs; - _idVsExpectedRealtimeToOfflineTaskResultInfo = idVsExpectedRealtimeToOfflineTaskResultInfo; + _expectedSubtaskResultMap = expectedSubtaskResultMap; _windowEndMs = windowEndMs; - _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; + _segmentNameToExpectedSubtaskResultID = segmentNameToExpectedSubtaskResultID; } public String getTableNameWithType() { @@ -103,51 +103,51 @@ public void setWindowEndMs(long windowEndMs) { _windowEndMs = windowEndMs; } - public Map getIdVsExpectedRealtimeToOfflineTaskResultInfo() { - return _idVsExpectedRealtimeToOfflineTaskResultInfo; + public Map getExpectedSubtaskResultMap() { + return _expectedSubtaskResultMap; } - public Map getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId() { - return _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId; + public Map getSegmentNameToExpectedSubtaskResultID() { + return _segmentNameToExpectedSubtaskResultID; } - public void addExpectedRealtimeToOfflineSegmentsTaskResultInfo( - ExpectedRealtimeToOfflineTaskResultInfo newExpectedRealtimeToOfflineTaskResultInfo) { + public void addExpectedSubTaskResult( + ExpectedSubtaskResult newExpectedSubtaskResult) { - List segmentsFrom = newExpectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom(); + List segmentsFrom = newExpectedSubtaskResult.getSegmentsFrom(); for (String segmentName : segmentsFrom) { - if (_segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.containsKey(segmentName)) { + if (_segmentNameToExpectedSubtaskResultID.containsKey(segmentName)) { String prevExpectedRealtimeToOfflineTaskResultInfoId = - _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); + _segmentNameToExpectedSubtaskResultID.get(segmentName); - ExpectedRealtimeToOfflineTaskResultInfo prevExpectedRealtimeToOfflineTaskResultInfo = - _idVsExpectedRealtimeToOfflineTaskResultInfo.get(prevExpectedRealtimeToOfflineTaskResultInfoId); + ExpectedSubtaskResult prevExpectedSubtaskResult = + _expectedSubtaskResultMap.get(prevExpectedRealtimeToOfflineTaskResultInfoId); // check if prevExpectedRealtimeToOfflineTaskResultInfo is not null, since it could // have been removed in the same minion run previously. - if (prevExpectedRealtimeToOfflineTaskResultInfo != null) { - Preconditions.checkState(prevExpectedRealtimeToOfflineTaskResultInfo.isTaskFailure(), + if (prevExpectedSubtaskResult != null) { + Preconditions.checkState(prevExpectedSubtaskResult.isTaskFailure(), "ExpectedRealtimeToOfflineSegmentsTaskResult can only be replaced if it's of a failed task"); } } - _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.put(segmentName, - newExpectedRealtimeToOfflineTaskResultInfo.getId()); - _idVsExpectedRealtimeToOfflineTaskResultInfo.put(newExpectedRealtimeToOfflineTaskResultInfo.getId(), - newExpectedRealtimeToOfflineTaskResultInfo); + _segmentNameToExpectedSubtaskResultID.put(segmentName, + newExpectedSubtaskResult.getId()); + _expectedSubtaskResultMap.put(newExpectedSubtaskResult.getId(), + newExpectedSubtaskResult); } } public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long windowStartMs = znRecord.getLongField(WINDOW_START_KEY, 0); long windowEndMs = znRecord.getLongField(WINDOW_END_KEY, 0); - Map idVExpectedRealtimeToOfflineTaskResultInfoList = + Map expectedSubtaskResultMap = new HashMap<>(); Map> listFields = znRecord.getListFields(); for (Map.Entry> listField : listFields.entrySet()) { - String realtimeToOfflineSegmentsMapId = listField.getKey(); + String expectedSubtaskResultId = listField.getKey(); List value = listField.getValue(); Preconditions.checkState(value.size() == 4); @@ -157,17 +157,18 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco String taskID = value.get(2); boolean taskFailure = Boolean.parseBoolean(value.get(3)); - idVExpectedRealtimeToOfflineTaskResultInfoList.put(realtimeToOfflineSegmentsMapId, - new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, realtimeToOfflineSegmentsMapId, taskID, + expectedSubtaskResultMap.put(expectedSubtaskResultId, + new ExpectedSubtaskResult(segmentsFrom, segmentsTo, expectedSubtaskResultId, taskID, taskFailure) ); } Map> mapFields = znRecord.getMapFields(); - Map segmentNameVsExpectedRTOIDResult = mapFields.get(SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY); + Map segmentNameToExpectedSubtaskResultID = mapFields.get( + SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY); return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, - idVExpectedRealtimeToOfflineTaskResultInfoList, segmentNameVsExpectedRTOIDResult); + expectedSubtaskResultMap, segmentNameToExpectedSubtaskResultID); } public ZNRecord toZNRecord() { @@ -175,23 +176,23 @@ public ZNRecord toZNRecord() { znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); - for (String expectedRealtimeToOfflineTaskResultInfoId : _idVsExpectedRealtimeToOfflineTaskResultInfo.keySet()) { - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = - _idVsExpectedRealtimeToOfflineTaskResultInfo.get(expectedRealtimeToOfflineTaskResultInfoId); + for (String expectedRealtimeToOfflineTaskResultInfoId : _expectedSubtaskResultMap.keySet()) { + ExpectedSubtaskResult expectedSubtaskResult = + _expectedSubtaskResultMap.get(expectedRealtimeToOfflineTaskResultInfoId); - String segmentsFrom = String.join(COMMA_SEPARATOR, expectedRealtimeToOfflineTaskResultInfo.getSegmentsFrom()); - String segmentsTo = String.join(COMMA_SEPARATOR, expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo()); - String taskId = expectedRealtimeToOfflineTaskResultInfo.getTaskID(); - boolean taskFailure = expectedRealtimeToOfflineTaskResultInfo.isTaskFailure(); + String segmentsFrom = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsFrom()); + String segmentsTo = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsTo()); + String taskId = expectedSubtaskResult.getTaskID(); + boolean taskFailure = expectedSubtaskResult.isTaskFailure(); List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(taskFailure)); - String id = expectedRealtimeToOfflineTaskResultInfo.getId(); + String id = expectedSubtaskResult.getId(); znRecord.setListField(id, listEntry); } - znRecord.setMapField(SEGMENT_NAME_VS_EXPECTED_RTO_RESULT_ID_KEY, - _segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId); + znRecord.setMapField(SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY, + _segmentNameToExpectedSubtaskResultID); return znRecord; } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index d8ee6f564ceb..4947a89c34cc 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -25,7 +25,7 @@ import java.util.Map; import java.util.Objects; import org.apache.helix.zookeeper.datamodel.ZNRecord; -import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedSubtaskResult; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.testng.annotations.Test; @@ -53,28 +53,28 @@ public void testToFromZNRecord() { @Test public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { - Map idVsExpectedRealtimeToOfflineTaskResultInfo = + Map idVsExpectedRealtimeToOfflineTaskResultInfo = new HashMap<>(); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = - new ExpectedRealtimeToOfflineTaskResultInfo( + ExpectedSubtaskResult expectedSubtaskResult = + new ExpectedSubtaskResult( Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), "1"); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo1 = - new ExpectedRealtimeToOfflineTaskResultInfo( + ExpectedSubtaskResult expectedSubtaskResult1 = + new ExpectedSubtaskResult( Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", "githubEventsOffline__0__0__20241213T2403Z"), "2"); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo.getId(), - expectedRealtimeToOfflineTaskResultInfo); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo1.getId(), - expectedRealtimeToOfflineTaskResultInfo1); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult.getId(), + expectedSubtaskResult); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult1.getId(), + expectedSubtaskResult1); ImmutableMap segmentNameVsId = ImmutableMap.of( - "githubEvents__0__0__20241213T2002Z", expectedRealtimeToOfflineTaskResultInfo.getId(), - "githubEvents__0__0__20241213T2003Z", expectedRealtimeToOfflineTaskResultInfo.getId(), - "githubEvents__0__0__20241213T2102Z", expectedRealtimeToOfflineTaskResultInfo1.getId(), - "githubEvents__0__0__20241213T2203Z", expectedRealtimeToOfflineTaskResultInfo1.getId() + "githubEvents__0__0__20241213T2002Z", expectedSubtaskResult.getId(), + "githubEvents__0__0__20241213T2003Z", expectedSubtaskResult.getId(), + "githubEvents__0__0__20241213T2102Z", expectedSubtaskResult1.getId(), + "githubEvents__0__0__20241213T2203Z", expectedSubtaskResult1.getId() ); RealtimeToOfflineSegmentsTaskMetadata originalMetadata = @@ -111,7 +111,7 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { } } - Map map = mapFields.get("segmentVsExpectedRTOResultId"); + Map map = mapFields.get("segmentToExpectedSubtaskResultId"); assertEquals(map, segmentNameVsId); RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = @@ -126,34 +126,34 @@ private boolean isEqual(RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineS assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), originalMetadata.getWindowStartMs()); assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), originalMetadata.getTableNameWithType()); - Map idVsExpectedRealtimeToOfflineTaskResultInfo = - realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); + Map idVsExpectedRealtimeToOfflineTaskResultInfo = + realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); for (String id : idVsExpectedRealtimeToOfflineTaskResultInfo.keySet()) { - ExpectedRealtimeToOfflineTaskResultInfo actualExpectedRealtimeToOfflineTaskResultInfo = + ExpectedSubtaskResult actualExpectedRealtimeToOfflineTaskResultInfo = idVsExpectedRealtimeToOfflineTaskResultInfo.get(id); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = - originalMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo().get(id); - assert expectedRealtimeToOfflineTaskResultInfo != null; - assert isEqual(actualExpectedRealtimeToOfflineTaskResultInfo, expectedRealtimeToOfflineTaskResultInfo); + ExpectedSubtaskResult expectedSubtaskResult = + originalMetadata.getExpectedSubtaskResultMap().get(id); + assert expectedSubtaskResult != null; + assert isEqual(actualExpectedRealtimeToOfflineTaskResultInfo, expectedSubtaskResult); } assertEquals(segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId, - originalMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId()); + originalMetadata.getSegmentNameToExpectedSubtaskResultID()); return true; } - private boolean isEqual(ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo1, - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo2) { - return Objects.equals(expectedRealtimeToOfflineTaskResultInfo1.getSegmentsFrom(), - expectedRealtimeToOfflineTaskResultInfo2.getSegmentsFrom()) && Objects.equals( - expectedRealtimeToOfflineTaskResultInfo1.getSegmentsTo(), - expectedRealtimeToOfflineTaskResultInfo2.getSegmentsTo()) && Objects.equals( - expectedRealtimeToOfflineTaskResultInfo1.getId(), expectedRealtimeToOfflineTaskResultInfo2.getId()) + private boolean isEqual(ExpectedSubtaskResult expectedSubtaskResult1, + ExpectedSubtaskResult expectedSubtaskResult2) { + return Objects.equals(expectedSubtaskResult1.getSegmentsFrom(), + expectedSubtaskResult2.getSegmentsFrom()) && Objects.equals( + expectedSubtaskResult1.getSegmentsTo(), + expectedSubtaskResult2.getSegmentsTo()) && Objects.equals( + expectedSubtaskResult1.getId(), expectedSubtaskResult2.getId()) && Objects.equals( - expectedRealtimeToOfflineTaskResultInfo1.getTaskID(), expectedRealtimeToOfflineTaskResultInfo2.getTaskID()); + expectedSubtaskResult1.getTaskID(), expectedSubtaskResult2.getTaskID()); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index c91ec7e56022..21f64febe920 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -30,7 +30,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; -import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedSubtaskResult; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; import org.apache.pinot.core.common.MinionConstants.RealtimeToOfflineSegmentsTask; @@ -268,15 +268,15 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = + ExpectedSubtaskResult expectedSubtaskResult = getExpectedRealtimeToOfflineTaskResultInfo(context); - realtimeToOfflineSegmentsTaskMetadata.addExpectedRealtimeToOfflineSegmentsTaskResultInfo( - expectedRealtimeToOfflineTaskResultInfo); + realtimeToOfflineSegmentsTaskMetadata.addExpectedSubTaskResult( + expectedSubtaskResult); return realtimeToOfflineSegmentsTaskMetadata; } - private ExpectedRealtimeToOfflineTaskResultInfo getExpectedRealtimeToOfflineTaskResultInfo( + private ExpectedSubtaskResult getExpectedRealtimeToOfflineTaskResultInfo( SegmentUploadContext context) { PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); @@ -290,6 +290,6 @@ private ExpectedRealtimeToOfflineTaskResultInfo getExpectedRealtimeToOfflineTask context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) .collect(Collectors.toList()); - return new ExpectedRealtimeToOfflineTaskResultInfo(segmentsFrom, segmentsTo, taskId); + return new ExpectedSubtaskResult(segmentsFrom, segmentsTo, taskId); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 20a738f50251..9d3d3344e222 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -32,7 +32,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; -import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedSubtaskResult; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; import org.apache.pinot.controller.helix.core.minion.generator.BaseTaskGenerator; @@ -215,10 +215,10 @@ public List generateTasks(List tableConfigs) { } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. - if (!realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(). + if (!realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(). isEmpty()) { - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId().clear(); - realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo().clear(); + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID().clear(); + realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap().clear(); // windowEndTime of prev minion task needs to be re-used for picking up the // next windowStartTime. This is useful for case where user changes minion config // after a minion task run was complete. So windowStartTime cannot be watermark + bucketMs @@ -329,30 +329,30 @@ private void deleteInvalidOfflineSegments(String offlineTableName, Set existingOfflineTableSegmentNames, RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { - Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); - Map idVsExpectedRealtimeToOfflineTaskResultInfo = - realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); + Map segmentNameToExpectedSubtaskResultID = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); + Map expectedSubtaskResultMap = + realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); Set segmentsToBeDeleted = new HashSet<>(); - for (String realtimeSegment : realtimeSegmentsToBeReProcessed) { - String id = segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(realtimeSegment); + for (String realtimeSegmentName : realtimeSegmentsToBeReProcessed) { + String id = segmentNameToExpectedSubtaskResultID.get(realtimeSegmentName); Preconditions.checkNotNull(id); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = - idVsExpectedRealtimeToOfflineTaskResultInfo.get(id); + ExpectedSubtaskResult expectedSubtaskResult = + expectedSubtaskResultMap.get(id); // if already marked as failure, no need to delete again. - if (expectedRealtimeToOfflineTaskResultInfo.isTaskFailure()) { + if (expectedSubtaskResult.isTaskFailure()) { continue; } - List expectedCorrespondingOfflineSegments = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); + List expectedCorrespondingOfflineSegments = expectedSubtaskResult.getSegmentsTo(); segmentsToBeDeleted.addAll( getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be // related to a failed task. Mark it as a failure, since executor will // then only replace expectedRealtimeToOfflineTaskResultInfo for the // segments to be reprocessed. - expectedRealtimeToOfflineTaskResultInfo.setTaskFailure(); + expectedSubtaskResult.setTaskFailure(); } if (!segmentsToBeDeleted.isEmpty()) { @@ -367,32 +367,32 @@ private Set getFailedTaskSegments( Set failedIds = new HashSet<>(); // Get all the ExpectedRealtimeToOfflineTaskResultInfo of prev minion task - Map idVsExpectedRealtimeToOfflineTaskResultInfoList = - realtimeToOfflineSegmentsTaskMetadata.getIdVsExpectedRealtimeToOfflineTaskResultInfo(); - Collection expectedRealtimeToOfflineTaskResultInfoList = - idVsExpectedRealtimeToOfflineTaskResultInfoList.values(); + Map expectedSubtaskResultMap = + realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); + Collection expectedSubtaskResultList = + expectedSubtaskResultMap.values(); - Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameVsExpectedRealtimeToOfflineTaskResultInfoId(); - Set expectedRealtimeToOfflineTaskResultInfoIds = - new HashSet<>(segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.values()); + Map segmentNameToExpectedSubtaskResultID = + realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); + Set expectedSubtaskResultIds = + new HashSet<>(segmentNameToExpectedSubtaskResultID.values()); Set segmentNamesToReprocess = new HashSet<>(); // Check what all offline segments are present currently - for (ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo - : expectedRealtimeToOfflineTaskResultInfoList) { + for (ExpectedSubtaskResult expectedSubtaskResult + : expectedSubtaskResultList) { - if (expectedRealtimeToOfflineTaskResultInfo.isTaskFailure()) { + if (expectedSubtaskResult.isTaskFailure()) { // if task is failure and is referenced by any segment, only then add to failed task. - if (expectedRealtimeToOfflineTaskResultInfoIds.contains(expectedRealtimeToOfflineTaskResultInfo.getId())) { - failedIds.add(expectedRealtimeToOfflineTaskResultInfo.getId()); + if (expectedSubtaskResultIds.contains(expectedSubtaskResult.getId())) { + failedIds.add(expectedSubtaskResult.getId()); } continue; } // get offline segments - List segmentTo = expectedRealtimeToOfflineTaskResultInfo.getSegmentsTo(); + List segmentTo = expectedSubtaskResult.getSegmentsTo(); // If not all corresponding offline segments to a realtime segment exists, // it means there was an issue with prev minion task. And segment needs @@ -400,16 +400,16 @@ private Set getFailedTaskSegments( boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); if (!taskSuccessful) { - failedIds.add(expectedRealtimeToOfflineTaskResultInfo.getId()); + failedIds.add(expectedSubtaskResult.getId()); } } - // source of truth for re-processing task is segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId map. + // source of truth for re-processing task is segmentNameToExpectedSubtaskResultID map. // consider edge case where multiple segments were re-scheduled among multiple subtasks, but again // one of the subtask failed. - for (String segmentName : segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.keySet()) { + for (String segmentName : segmentNameToExpectedSubtaskResultID.keySet()) { String expectedRealtimeToOfflineTaskResultInfoId = - segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId.get(segmentName); + segmentNameToExpectedSubtaskResultID.get(segmentName); if (failedIds.contains(expectedRealtimeToOfflineTaskResultInfoId)) { segmentNamesToReprocess.add(segmentName); } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 4587ac02507a..fd16ace773fc 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -30,7 +30,7 @@ import org.apache.helix.model.IdealState; import org.apache.helix.task.TaskState; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; -import org.apache.pinot.common.minion.ExpectedRealtimeToOfflineTaskResultInfo; +import org.apache.pinot.common.minion.ExpectedSubtaskResult; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.minion.ClusterInfoAccessor; @@ -448,19 +448,19 @@ public void testGenerateTasksWithSegmentUploadFailure() { } private RealtimeToOfflineSegmentsTaskMetadata getRealtimeToOfflineSegmentsTaskMetadata() { - Map idVsExpectedRealtimeToOfflineTaskResultInfo = + Map idVsExpectedRealtimeToOfflineTaskResultInfo = new HashMap<>(); - ExpectedRealtimeToOfflineTaskResultInfo expectedRealtimeToOfflineTaskResultInfo = - new ExpectedRealtimeToOfflineTaskResultInfo( + ExpectedSubtaskResult expectedSubtaskResult = + new ExpectedSubtaskResult( Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), "1"); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedRealtimeToOfflineTaskResultInfo.getId(), - expectedRealtimeToOfflineTaskResultInfo); + idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult.getId(), + expectedSubtaskResult); ImmutableMap segmentNameVsId = ImmutableMap.of( - "githubEvents__0__0__20241213T2002Z", expectedRealtimeToOfflineTaskResultInfo.getId(), - "githubEvents__0__0__20241213T2003Z", expectedRealtimeToOfflineTaskResultInfo.getId() + "githubEvents__0__0__20241213T2002Z", expectedSubtaskResult.getId(), + "githubEvents__0__0__20241213T2003Z", expectedSubtaskResult.getId() ); return new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1589972400000L, 1590058800000L, From b307458e559f4bc500de03a22c6c154d5b46f838 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 24 Dec 2024 00:07:22 +0530 Subject: [PATCH 55/72] nit --- .../org/apache/pinot/common/minion/ExpectedSubtaskResult.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java index e3d718f01382..10366586e323 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java @@ -32,7 +32,7 @@ * The _segmentsTo denotes the expected offline segemnts. * The _id denotes the unique identifier of object. * The _taskID denotes the minion taskId. - * The _taskFailure denotes the status of minion task handling the + * The _taskFailure denotes the failure status of minion task handling the * current ExpectedResult. This is modified in * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskGenerator} * when a prev minion task is failed. From 2d1f0868a3f44b12dff5120126b66d11dbed6922 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 24 Dec 2024 00:19:58 +0530 Subject: [PATCH 56/72] nit --- .../common/minion/ExpectedSubtaskResult.java | 6 ++--- ...RealtimeToOfflineSegmentsTaskMetadata.java | 18 +++++++-------- ...timeToOfflineSegmentsTaskMetadataTest.java | 22 +++++++++---------- ...RealtimeToOfflineSegmentsTaskExecutor.java | 8 +++---- ...ealtimeToOfflineSegmentsTaskGenerator.java | 10 ++++----- ...imeToOfflineSegmentsTaskGeneratorTest.java | 6 ++--- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java index 10366586e323..77eae267a34e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java @@ -24,7 +24,7 @@ /** - * ExpectedRealtimeOfflineTaskResultInfo is created in + * ExpectedRealtimeOfflineTaskResult is created in * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskExecutor} * before uploading offline segment(s) to the offline table. * @@ -53,10 +53,10 @@ public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, } public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, - String realtimeToOfflineSegmentsMapId, String taskID, boolean taskFailure) { + String id, String taskID, boolean taskFailure) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; - _id = realtimeToOfflineSegmentsMapId; + _id = id; _taskID = taskID; _taskFailure = taskFailure; } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 1a5ad6448adc..a40b9380ecd8 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -31,8 +31,8 @@ * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. * The _windowStartMs denotes the time (exclusive) until which it's certain that tasks have been * completed successfully. - * The _expectedRealtimeToOfflineSegmentsTaskResultList denotes the expected RTO tasks result info. - * This list can contain both completed and in-completed Tasks expected Results. This list is used by + * The _expectedSubtaskResultMap contains the expected RTO tasks result info. + * This map can contain both completed and in-completed Tasks expected Results. This map is used by * generator to validate whether a potential segment (for RTO task) has already been successfully * processed as a RTO task in the past or not. * The _windowStartMs and _windowEndMs denote the window bucket time @@ -49,7 +49,7 @@ * PinotTaskExecutor: * The same windowStartMs is used by the RealtimeToOfflineSegmentsTaskExecutor, to: * - Verify that it's running the latest task scheduled by the task generator. - * - The ExpectedRealtimeToOfflineSegmentsTaskResultList is updated before the offline segments + * - The _expectedSubtaskResultMap is updated before the offline segments * are uploaded to the table. */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { @@ -118,17 +118,17 @@ public void addExpectedSubTaskResult( for (String segmentName : segmentsFrom) { if (_segmentNameToExpectedSubtaskResultID.containsKey(segmentName)) { - String prevExpectedRealtimeToOfflineTaskResultInfoId = + String prevExpectedSubtaskResultID = _segmentNameToExpectedSubtaskResultID.get(segmentName); ExpectedSubtaskResult prevExpectedSubtaskResult = - _expectedSubtaskResultMap.get(prevExpectedRealtimeToOfflineTaskResultInfoId); + _expectedSubtaskResultMap.get(prevExpectedSubtaskResultID); - // check if prevExpectedRealtimeToOfflineTaskResultInfo is not null, since it could + // check if prevExpectedRealtimeToOfflineSubtaskResult is not null, since it could // have been removed in the same minion run previously. if (prevExpectedSubtaskResult != null) { Preconditions.checkState(prevExpectedSubtaskResult.isTaskFailure(), - "ExpectedRealtimeToOfflineSegmentsTaskResult can only be replaced if it's of a failed task"); + "ExpectedSubtaskResult can only be replaced if it's of a failed task"); } } @@ -176,9 +176,9 @@ public ZNRecord toZNRecord() { znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); - for (String expectedRealtimeToOfflineTaskResultInfoId : _expectedSubtaskResultMap.keySet()) { + for (String expectedSubtaskResultID : _expectedSubtaskResultMap.keySet()) { ExpectedSubtaskResult expectedSubtaskResult = - _expectedSubtaskResultMap.get(expectedRealtimeToOfflineTaskResultInfoId); + _expectedSubtaskResultMap.get(expectedSubtaskResultID); String segmentsFrom = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsFrom()); String segmentsTo = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsTo()); diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 4947a89c34cc..2e1ae291d02b 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -53,7 +53,7 @@ public void testToFromZNRecord() { @Test public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { - Map idVsExpectedRealtimeToOfflineTaskResultInfo = + Map idVsExpectedRealtimeToOfflineTaskResult = new HashMap<>(); ExpectedSubtaskResult expectedSubtaskResult = new ExpectedSubtaskResult( @@ -65,9 +65,9 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", "githubEventsOffline__0__0__20241213T2403Z"), "2"); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult.getId(), + idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult.getId(), expectedSubtaskResult); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult1.getId(), + idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult1.getId(), expectedSubtaskResult1); ImmutableMap segmentNameVsId = ImmutableMap.of( @@ -79,7 +79,7 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { RealtimeToOfflineSegmentsTaskMetadata originalMetadata = new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000, 2000, - idVsExpectedRealtimeToOfflineTaskResultInfo, segmentNameVsId); + idVsExpectedRealtimeToOfflineTaskResult, segmentNameVsId); ZNRecord znRecord = originalMetadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); @@ -126,21 +126,21 @@ private boolean isEqual(RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineS assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), originalMetadata.getWindowStartMs()); assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), originalMetadata.getTableNameWithType()); - Map idVsExpectedRealtimeToOfflineTaskResultInfo = + Map idVsExpectedRealtimeToOfflineTaskResult = realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); - Map segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId = + Map segmentNameVsExpectedRealtimeToOfflineTaskResultId = realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); - for (String id : idVsExpectedRealtimeToOfflineTaskResultInfo.keySet()) { - ExpectedSubtaskResult actualExpectedRealtimeToOfflineTaskResultInfo = - idVsExpectedRealtimeToOfflineTaskResultInfo.get(id); + for (String id : idVsExpectedRealtimeToOfflineTaskResult.keySet()) { + ExpectedSubtaskResult actualExpectedRealtimeToOfflineTaskResult = + idVsExpectedRealtimeToOfflineTaskResult.get(id); ExpectedSubtaskResult expectedSubtaskResult = originalMetadata.getExpectedSubtaskResultMap().get(id); assert expectedSubtaskResult != null; - assert isEqual(actualExpectedRealtimeToOfflineTaskResultInfo, expectedSubtaskResult); + assert isEqual(actualExpectedRealtimeToOfflineTaskResult, expectedSubtaskResult); } - assertEquals(segmentNameVsExpectedRealtimeToOfflineTaskResultInfoId, + assertEquals(segmentNameVsExpectedRealtimeToOfflineTaskResultId, originalMetadata.getSegmentNameToExpectedSubtaskResultID()); return true; diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 21f64febe920..969cafa897b3 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -70,7 +70,7 @@ * located at MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * It should match the windowStartMs. * - * Before the segments are uploaded, this task updates the ExpectedRealtimeToOfflineTaskResultInfoList + * Before the segments are uploaded, this task updates the ExpectedRealtimeToOfflineTaskResultList * in the minion task metadata ZNode. * The znode version is checked during update, retrying until max attempts and version of znode is equal to expected. * Reason for above is that, since multiple subtasks run in parallel, there can be race condition @@ -224,7 +224,7 @@ protected void preUploadSegments(SegmentUploadContext context) RealtimeToOfflineSegmentsTask.TASK_TYPE); int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - // Adding ExpectedRealtimeToOfflineSegmentsTaskResultInfo might fail. + // Adding ExpectedRealtimeToOfflineSegmentsTaskResult might fail. // In-case of failure there will be runtime exception thrown RealtimeToOfflineSegmentsTaskMetadata updatedRealtimeToOfflineSegmentsTaskMetadata = getUpdatedTaskMetadata(context, realtimeToOfflineSegmentsTaskZNRecord); @@ -269,14 +269,14 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); ExpectedSubtaskResult expectedSubtaskResult = - getExpectedRealtimeToOfflineTaskResultInfo(context); + getExpectedRealtimeToOfflineTaskResult(context); realtimeToOfflineSegmentsTaskMetadata.addExpectedSubTaskResult( expectedSubtaskResult); return realtimeToOfflineSegmentsTaskMetadata; } - private ExpectedSubtaskResult getExpectedRealtimeToOfflineTaskResultInfo( + private ExpectedSubtaskResult getExpectedRealtimeToOfflineTaskResult( SegmentUploadContext context) { PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 9d3d3344e222..3adacf9a95dd 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -348,9 +348,9 @@ private void deleteInvalidOfflineSegments(String offlineTableName, List expectedCorrespondingOfflineSegments = expectedSubtaskResult.getSegmentsTo(); segmentsToBeDeleted.addAll( getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); - // The expectedRealtimeToOfflineTaskResultInfo is confirmed to be + // The expectedRealtimeToOfflineTaskResult is confirmed to be // related to a failed task. Mark it as a failure, since executor will - // then only replace expectedRealtimeToOfflineTaskResultInfo for the + // then only replace expectedRealtimeToOfflineTaskResult for the // segments to be reprocessed. expectedSubtaskResult.setTaskFailure(); } @@ -366,7 +366,7 @@ private Set getFailedTaskSegments( Set existingOfflineTableSegmentNames) { Set failedIds = new HashSet<>(); - // Get all the ExpectedRealtimeToOfflineTaskResultInfo of prev minion task + // Get all the ExpectedRealtimeToOfflineTaskResult of prev minion task Map expectedSubtaskResultMap = realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); Collection expectedSubtaskResultList = @@ -408,9 +408,9 @@ private Set getFailedTaskSegments( // consider edge case where multiple segments were re-scheduled among multiple subtasks, but again // one of the subtask failed. for (String segmentName : segmentNameToExpectedSubtaskResultID.keySet()) { - String expectedRealtimeToOfflineTaskResultInfoId = + String expectedRealtimeToOfflineTaskResultId = segmentNameToExpectedSubtaskResultID.get(segmentName); - if (failedIds.contains(expectedRealtimeToOfflineTaskResultInfoId)) { + if (failedIds.contains(expectedRealtimeToOfflineTaskResultId)) { segmentNamesToReprocess.add(segmentName); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index fd16ace773fc..fac76b32b7d3 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -448,14 +448,14 @@ public void testGenerateTasksWithSegmentUploadFailure() { } private RealtimeToOfflineSegmentsTaskMetadata getRealtimeToOfflineSegmentsTaskMetadata() { - Map idVsExpectedRealtimeToOfflineTaskResultInfo = + Map idVsExpectedRealtimeToOfflineTaskResult = new HashMap<>(); ExpectedSubtaskResult expectedSubtaskResult = new ExpectedSubtaskResult( Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), "1"); - idVsExpectedRealtimeToOfflineTaskResultInfo.put(expectedSubtaskResult.getId(), + idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult.getId(), expectedSubtaskResult); ImmutableMap segmentNameVsId = ImmutableMap.of( @@ -464,7 +464,7 @@ private RealtimeToOfflineSegmentsTaskMetadata getRealtimeToOfflineSegmentsTaskMe ); return new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1589972400000L, 1590058800000L, - idVsExpectedRealtimeToOfflineTaskResultInfo, segmentNameVsId); + idVsExpectedRealtimeToOfflineTaskResult, segmentNameVsId); } /** From f183b85f3fd2301d618df0aba87b725803fa5fad Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 24 Dec 2024 00:23:26 +0530 Subject: [PATCH 57/72] nit --- .../RealtimeToOfflineSegmentsTaskExecutor.java | 6 +++--- .../RealtimeToOfflineSegmentsTaskGenerator.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index 969cafa897b3..c0e2874dc22b 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -70,7 +70,7 @@ * located at MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * It should match the windowStartMs. * - * Before the segments are uploaded, this task updates the ExpectedRealtimeToOfflineTaskResultList + * Before the segments are uploaded, this task updates the _expectedSubtaskResultMap * in the minion task metadata ZNode. * The znode version is checked during update, retrying until max attempts and version of znode is equal to expected. * Reason for above is that, since multiple subtasks run in parallel, there can be race condition @@ -269,14 +269,14 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); ExpectedSubtaskResult expectedSubtaskResult = - getExpectedRealtimeToOfflineTaskResult(context); + getExpectedSubtaskResult(context); realtimeToOfflineSegmentsTaskMetadata.addExpectedSubTaskResult( expectedSubtaskResult); return realtimeToOfflineSegmentsTaskMetadata; } - private ExpectedSubtaskResult getExpectedRealtimeToOfflineTaskResult( + private ExpectedSubtaskResult getExpectedSubtaskResult( SegmentUploadContext context) { PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 3adacf9a95dd..482cdc4f2e60 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -408,9 +408,9 @@ private Set getFailedTaskSegments( // consider edge case where multiple segments were re-scheduled among multiple subtasks, but again // one of the subtask failed. for (String segmentName : segmentNameToExpectedSubtaskResultID.keySet()) { - String expectedRealtimeToOfflineTaskResultId = + String expectedSubtaskResultID = segmentNameToExpectedSubtaskResultID.get(segmentName); - if (failedIds.contains(expectedRealtimeToOfflineTaskResultId)) { + if (failedIds.contains(expectedSubtaskResultID)) { segmentNamesToReprocess.add(segmentName); } } From 46bbd201a0382a16ade769a5f599b5dc65205dc6 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 30 Dec 2024 12:39:25 +0530 Subject: [PATCH 58/72] minor edge cases --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 68 +++++++++---------- ...ealtimeToOfflineSegmentsTaskGenerator.java | 2 +- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index a40b9380ecd8..296c43f8ad2b 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -28,29 +28,25 @@ /** - * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. - * The _windowStartMs denotes the time (exclusive) until which it's certain that tasks have been - * completed successfully. - * The _expectedSubtaskResultMap contains the expected RTO tasks result info. - * This map can contain both completed and in-completed Tasks expected Results. This map is used by - * generator to validate whether a potential segment (for RTO task) has already been successfully - * processed as a RTO task in the past or not. - * The _windowStartMs and _windowEndMs denote the window bucket time - * of currently not successfully completed minion task. bucket: [_windowStartMs, _windowEndMs) - * The window is updated by generator when it's certain that prev minon task run is successful. - * + * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. The _windowStartMs + * denotes the time (exclusive) until which it's certain that tasks have been completed successfully. The + * _expectedSubtaskResultMap contains the expected RTO tasks result info. This map can contain both + * completed and in-completed Tasks expected Results. This map is used by generator to validate whether a potential + * segment (for RTO task) has already been successfully processed as a RTO task in the past or not. The + * _windowStartMs and _windowEndMs denote the window bucket time of currently not + * successfully completed minion task. bucket: [_windowStartMs, _windowEndMs) The window is updated by generator when + * it's certain that prev minon task run is successful. + *

* This gets serialized and stored in zookeeper under the path * MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask - * - * PinotTaskGenerator: - * The _windowStartMs> is used by the RealtimeToOfflineSegmentsTaskGenerator, - * to determine the window of execution of the prev task based on which it generates new task. - * - * PinotTaskExecutor: - * The same windowStartMs is used by the RealtimeToOfflineSegmentsTaskExecutor, to: + *

+ * PinotTaskGenerator: The _windowStartMs> is used by the + * RealtimeToOfflineSegmentsTaskGenerator, to determine the window of execution of the prev task based on + * which it generates new task. + *

+ * PinotTaskExecutor: The same windowStartMs is used by the RealtimeToOfflineSegmentsTaskExecutor, to: * - Verify that it's running the latest task scheduled by the task generator. - * - The _expectedSubtaskResultMap is updated before the offline segments - * are uploaded to the table. + * - The _expectedSubtaskResultMap is updated before the offline segments are uploaded to the table. */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { @@ -146,26 +142,30 @@ public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znReco new HashMap<>(); Map> listFields = znRecord.getListFields(); - for (Map.Entry> listField : listFields.entrySet()) { - String expectedSubtaskResultId = listField.getKey(); + if (listFields != null) { + for (Map.Entry> listField : listFields.entrySet()) { + String expectedSubtaskResultId = listField.getKey(); - List value = listField.getValue(); - Preconditions.checkState(value.size() == 4); + List value = listField.getValue(); + Preconditions.checkState(value.size() == 4); - List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); - List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); - String taskID = value.get(2); - boolean taskFailure = Boolean.parseBoolean(value.get(3)); + List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); + List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); + String taskID = value.get(2); + boolean taskFailure = Boolean.parseBoolean(value.get(3)); - expectedSubtaskResultMap.put(expectedSubtaskResultId, - new ExpectedSubtaskResult(segmentsFrom, segmentsTo, expectedSubtaskResultId, taskID, - taskFailure) - ); + expectedSubtaskResultMap.put(expectedSubtaskResultId, + new ExpectedSubtaskResult(segmentsFrom, segmentsTo, expectedSubtaskResultId, taskID, + taskFailure) + ); + } } Map> mapFields = znRecord.getMapFields(); - Map segmentNameToExpectedSubtaskResultID = mapFields.get( - SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY); + Map segmentNameToExpectedSubtaskResultID = new HashMap<>(); + if (mapFields != null) { + segmentNameToExpectedSubtaskResultID = mapFields.get(SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY); + } return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, expectedSubtaskResultMap, segmentNameToExpectedSubtaskResultID); diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 482cdc4f2e60..fde0b080c67c 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -93,7 +93,7 @@ public class RealtimeToOfflineSegmentsTaskGenerator extends BaseTaskGenerator { private static final String DEFAULT_BUCKET_PERIOD = "1d"; private static final String DEFAULT_BUFFER_PERIOD = "2d"; - private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = 50_000_000; + private static final int DEFAULT_MAX_NUM_RECORDS_PER_TASK = Integer.MAX_VALUE; @Override public String getTaskType() { From 00cc1bc63856efe19ee238e4f721b78c448cf530 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 30 Dec 2024 12:43:41 +0530 Subject: [PATCH 59/72] nit --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 296c43f8ad2b..719fb1274407 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -176,19 +176,21 @@ public ZNRecord toZNRecord() { znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); - for (String expectedSubtaskResultID : _expectedSubtaskResultMap.keySet()) { - ExpectedSubtaskResult expectedSubtaskResult = - _expectedSubtaskResultMap.get(expectedSubtaskResultID); + if (_expectedSubtaskResultMap != null) { + for (String expectedSubtaskResultID : _expectedSubtaskResultMap.keySet()) { + ExpectedSubtaskResult expectedSubtaskResult = + _expectedSubtaskResultMap.get(expectedSubtaskResultID); - String segmentsFrom = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsFrom()); - String segmentsTo = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsTo()); - String taskId = expectedSubtaskResult.getTaskID(); - boolean taskFailure = expectedSubtaskResult.isTaskFailure(); + String segmentsFrom = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsFrom()); + String segmentsTo = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsTo()); + String taskId = expectedSubtaskResult.getTaskID(); + boolean taskFailure = expectedSubtaskResult.isTaskFailure(); - List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(taskFailure)); + List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(taskFailure)); - String id = expectedSubtaskResult.getId(); - znRecord.setListField(id, listEntry); + String id = expectedSubtaskResult.getId(); + znRecord.setListField(id, listEntry); + } } znRecord.setMapField(SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY, From dca57364d3c3e4f9aa0364cb80d64bbedf225034 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 30 Dec 2024 12:44:22 +0530 Subject: [PATCH 60/72] fixes lintg --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 719fb1274407..255758f162a9 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -1,20 +1,14 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. */ package org.apache.pinot.common.minion; From 6040793768effa43072030ab8a721ea9126a9ccb Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 30 Dec 2024 12:46:19 +0530 Subject: [PATCH 61/72] nit --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 255758f162a9..719fb1274407 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -1,14 +1,20 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE - * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.pinot.common.minion; From 7380368afef6dc3dcf3d71fa89b2c76e56b6ea5b Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Thu, 23 Jan 2025 01:07:12 +0530 Subject: [PATCH 62/72] minor refactoring --- ...RealtimeToOfflineSegmentsTaskMetadata.java | 8 ++----- ...ealtimeToOfflineSegmentsTaskGenerator.java | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 719fb1274407..51f675f3d630 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -120,12 +120,8 @@ public void addExpectedSubTaskResult( ExpectedSubtaskResult prevExpectedSubtaskResult = _expectedSubtaskResultMap.get(prevExpectedSubtaskResultID); - // check if prevExpectedRealtimeToOfflineSubtaskResult is not null, since it could - // have been removed in the same minion run previously. - if (prevExpectedSubtaskResult != null) { - Preconditions.checkState(prevExpectedSubtaskResult.isTaskFailure(), - "ExpectedSubtaskResult can only be replaced if it's of a failed task"); - } + Preconditions.checkState(prevExpectedSubtaskResult.isTaskFailure(), + "ExpectedSubtaskResult can only be replaced if it's of a failed task"); } _segmentNameToExpectedSubtaskResultID.put(segmentName, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 8e65e0e9e75a..a03ccfe091de 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -35,6 +35,7 @@ import org.apache.pinot.common.minion.ExpectedSubtaskResult; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; +import org.apache.pinot.controller.helix.core.PinotResourceManagerResponse; import org.apache.pinot.controller.helix.core.minion.generator.BaseTaskGenerator; import org.apache.pinot.controller.helix.core.minion.generator.PinotTaskGenerator; import org.apache.pinot.controller.helix.core.minion.generator.TaskGeneratorUtils; @@ -192,7 +193,7 @@ public List generateTasks(List tableConfigs) { } List segmentsToBeReProcessed = - filterOutRemovedSegments(failedTaskInputSegments, completedRealtimeSegmentsZKMetadata); + filterOutDeletedSegments(failedTaskInputSegments, completedRealtimeSegmentsZKMetadata); // if no segment to be reprocessed, no failure boolean prevMinionTaskSuccessful = segmentsToBeReProcessed.isEmpty(); @@ -202,7 +203,7 @@ public List generateTasks(List tableConfigs) { // maxNumRecordsPerTask is used to divide a minion tasks among // multiple subtasks to improve performance. - int maxNumRecordsPerTask = + int maxNumRecordsPerSubTask = taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY) != null ? Integer.parseInt( taskConfigs.get(MinionConstants.RealtimeToOfflineSegmentsTask.MAX_NUM_RECORDS_PER_TASK_KEY)) @@ -232,7 +233,7 @@ public List generateTasks(List tableConfigs) { } divideSegmentsAmongSubtasks(segmentsToBeScheduled, segmentNamesGroupList, segmentNameVsDownloadURL, - maxNumRecordsPerTask); + maxNumRecordsPerSubTask); if (segmentNamesGroupList.isEmpty()) { continue; @@ -336,6 +337,7 @@ private void deleteInvalidOfflineSegments(String offlineTableName, realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); Set segmentsToBeDeleted = new HashSet<>(); + List subtasksToBeMarkedAsFailed = new ArrayList<>(); for (String realtimeSegmentName : realtimeSegmentsToBeReProcessed) { String id = segmentNameToExpectedSubtaskResultID.get(realtimeSegmentName); @@ -353,12 +355,19 @@ private void deleteInvalidOfflineSegments(String offlineTableName, // related to a failed task. Mark it as a failure, since executor will // then only replace expectedRealtimeToOfflineTaskResult for the // segments to be reprocessed. - expectedSubtaskResult.setTaskFailure(); + subtasksToBeMarkedAsFailed.add(expectedSubtaskResult); } if (!segmentsToBeDeleted.isEmpty()) { - _clusterInfoAccessor.getPinotHelixResourceManager() + PinotResourceManagerResponse pinotResourceManagerResponse = _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); + + if (pinotResourceManagerResponse.isSuccessful()) { + // Invalid segments are deleted, set expectedSubtaskResults as failed. + for (ExpectedSubtaskResult expectedSubtaskResult : subtasksToBeMarkedAsFailed) { + expectedSubtaskResult.setTaskFailure(); + } + } } } @@ -419,7 +428,7 @@ private Set getFailedTaskSegments( return segmentNamesToReprocess; } - private List filterOutRemovedSegments(Set segmentNames, + private List filterOutDeletedSegments(Set segmentNames, List currentTableSegments) { List segmentZKMetadataList = new ArrayList<>(); From f4371c2538c889898e5ac674af2906bacddcae68 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Thu, 23 Jan 2025 01:13:05 +0530 Subject: [PATCH 63/72] throws exception if failed to delete invalid segment --- .../RealtimeToOfflineSegmentsTaskGenerator.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index a03ccfe091de..d13499cd0e0f 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -367,6 +367,8 @@ private void deleteInvalidOfflineSegments(String offlineTableName, for (ExpectedSubtaskResult expectedSubtaskResult : subtasksToBeMarkedAsFailed) { expectedSubtaskResult.setTaskFailure(); } + } else { + throw new RuntimeException(String.format("unable to delete invalid offline segments: %s", segmentsToBeDeleted)); } } } From d00ea279cc0051e7f2b8f2664a88442e00b08ed6 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Thu, 23 Jan 2025 01:47:13 +0530 Subject: [PATCH 64/72] Adds logs --- .../RealtimeToOfflineSegmentsTaskExecutor.java | 15 +++++++++------ .../RealtimeToOfflineSegmentsTaskGenerator.java | 6 ++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index c0e2874dc22b..fc1e2424bf84 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -215,7 +215,9 @@ protected void preUploadSegments(SegmentUploadContext context) throws Exception { super.preUploadSegments(context); String realtimeTableName = context.getTableNameWithType(); - int attemptCount; + PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); + String taskId = pinotTaskConfig.getTaskId(); + int attemptCount = 0; try { attemptCount = DEFAULT_RETRY_POLICY.attempt(() -> { try { @@ -237,22 +239,23 @@ protected void preUploadSegments(SegmentUploadContext context) return true; } catch (ZkBadVersionException e) { LOGGER.info( - "Version changed while updating num of subtasks left in RTO task metadata for table: {}, Retrying.", - realtimeTableName); + "Version changed while updating num of subtasks left in RTO task metadata for table: {}, taskId: {}, " + + "Retrying.", + realtimeTableName, taskId); return false; } }); } catch (Exception e) { String errorMsg = String.format("Failed to update the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments. " - + "(tableName = %s)", realtimeTableName); + + "(tableName = %s), (attemptCount = %d), (taskId = %s)", realtimeTableName, attemptCount, taskId); LOGGER.error(errorMsg, e); throw new RuntimeException(errorMsg, e); } LOGGER.info( "Successfully updated the RealtimeToOfflineSegmentsTaskMetadata during preUploadSegments for table: {}, " - + "attemptCount: {}", - realtimeTableName, attemptCount); + + "attemptCount: {}, taskId: {}", + realtimeTableName, attemptCount, taskId); } @Override diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index d13499cd0e0f..d1d0d71436ab 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -188,6 +188,8 @@ public List generateTasks(List tableConfigs) { // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. if (!failedTaskInputSegments.isEmpty()) { + LOGGER.warn("found prev minion task failures for table: {}. failedTaskInputSegments: {}", realtimeTableName, + failedTaskInputSegments); deleteInvalidOfflineSegments(offlineTableName, failedTaskInputSegments, existingOfflineTableSegmentNames, realtimeToOfflineSegmentsTaskMetadata); } @@ -212,6 +214,9 @@ public List generateTasks(List tableConfigs) { List segmentsToBeScheduled; if (!prevMinionTaskSuccessful) { + LOGGER.warn( + "Found prev minion task failures. Re-Scheduling previously failed task input segments: {} of table: {}", + segmentsToBeReProcessed, realtimeTableName); segmentsToBeScheduled = segmentsToBeReProcessed; } else { // if all offline segments of prev minion tasks were successfully uploaded, @@ -359,6 +364,7 @@ private void deleteInvalidOfflineSegments(String offlineTableName, } if (!segmentsToBeDeleted.isEmpty()) { + LOGGER.warn("Deleting invalid offline segments: {} of table: {}", segmentsToBeDeleted, offlineTableName); PinotResourceManagerResponse pinotResourceManagerResponse = _clusterInfoAccessor.getPinotHelixResourceManager() .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); From 1dda7caa53942229ce930a0a18fb451f6c9429f6 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 26 Jan 2025 00:31:23 +0530 Subject: [PATCH 65/72] simplifies code --- ...ealtimeToOfflineCheckpointCheckPoint.java} | 56 ++---- ...RealtimeToOfflineSegmentsTaskMetadata.java | 116 +++++------- ...timeToOfflineSegmentsTaskMetadataTest.java | 98 ++++------ ...RealtimeToOfflineSegmentsTaskExecutor.java | 26 +-- ...ealtimeToOfflineSegmentsTaskGenerator.java | 174 +++++++----------- ...imeToOfflineSegmentsTaskGeneratorTest.java | 25 +-- 6 files changed, 193 insertions(+), 302 deletions(-) rename pinot-common/src/main/java/org/apache/pinot/common/minion/{ExpectedSubtaskResult.java => RealtimeToOfflineCheckpointCheckPoint.java} (61%) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineCheckpointCheckPoint.java similarity index 61% rename from pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java rename to pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineCheckpointCheckPoint.java index 77eae267a34e..0db0992a566a 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/ExpectedSubtaskResult.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineCheckpointCheckPoint.java @@ -18,47 +18,48 @@ */ package org.apache.pinot.common.minion; -import java.util.List; -import java.util.Objects; +import java.util.Set; import java.util.UUID; /** - * ExpectedRealtimeOfflineTaskResult is created in + * RealtimeToOfflineCheckpointCheckPoint is created in * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskExecutor} * before uploading offline segment(s) to the offline table. * + * RealtimeToOfflineCheckpointCheckPoint is ExpectedSubtaskResult. + * * The _segmentsFrom denotes the input RealtimeSegments. * The _segmentsTo denotes the expected offline segemnts. * The _id denotes the unique identifier of object. * The _taskID denotes the minion taskId. - * The _taskFailure denotes the failure status of minion task handling the - * current ExpectedResult. This is modified in + * The _failed denotes the failure status of minion subtask handling the + * checkpoint. This is modified in * {@link org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments.RealtimeToOfflineSegmentsTaskGenerator} * when a prev minion task is failed. * */ -public class ExpectedSubtaskResult { - private final List _segmentsFrom; - private final List _segmentsTo; +public class RealtimeToOfflineCheckpointCheckPoint { + private final Set _segmentsFrom; + private final Set _segmentsTo; private final String _id; private final String _taskID; - private boolean _taskFailure = false; + private boolean _failed = false; - public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, String taskID) { + public RealtimeToOfflineCheckpointCheckPoint(Set segmentsFrom, Set segmentsTo, String taskID) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; _taskID = taskID; _id = UUID.randomUUID().toString(); } - public ExpectedSubtaskResult(List segmentsFrom, List segmentsTo, - String id, String taskID, boolean taskFailure) { + public RealtimeToOfflineCheckpointCheckPoint(Set segmentsFrom, Set segmentsTo, + String id, String taskID, boolean failed) { _segmentsFrom = segmentsFrom; _segmentsTo = segmentsTo; _id = id; _taskID = taskID; - _taskFailure = taskFailure; + _failed = failed; } public String getTaskID() { @@ -69,36 +70,19 @@ public String getId() { return _id; } - public List getSegmentsFrom() { + public Set getSegmentsFrom() { return _segmentsFrom; } - public List getSegmentsTo() { + public Set getSegmentsTo() { return _segmentsTo; } - public boolean isTaskFailure() { - return _taskFailure; - } - - public void setTaskFailure() { - _taskFailure = true; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof ExpectedSubtaskResult)) { - return false; - } - ExpectedSubtaskResult that = (ExpectedSubtaskResult) o; - return Objects.equals(_id, that._id); + public boolean isFailed() { + return _failed; } - @Override - public int hashCode() { - return Objects.hashCode(_id); + public void setFailed() { + _failed = true; } } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 51f675f3d630..4fbfe68e4a0e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -19,10 +19,12 @@ package org.apache.pinot.common.minion; import com.google.common.base.Preconditions; +import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.apache.helix.zookeeper.datamodel.ZNRecord; @@ -30,7 +32,7 @@ /** * Metadata for the minion task of type RealtimeToOfflineSegmentsTask. The _windowStartMs * denotes the time (exclusive) until which it's certain that tasks have been completed successfully. The - * _expectedSubtaskResultMap contains the expected RTO tasks result info. This map can contain both + * _checkPoints contains the expected RTO tasks result info. This map can contain both * completed and in-completed Tasks expected Results. This map is used by generator to validate whether a potential * segment (for RTO task) has already been successfully processed as a RTO task in the past or not. The * _windowStartMs and _windowEndMs denote the window bucket time of currently not @@ -46,37 +48,31 @@ *

* PinotTaskExecutor: The same windowStartMs is used by the RealtimeToOfflineSegmentsTaskExecutor, to: * - Verify that it's running the latest task scheduled by the task generator. - * - The _expectedSubtaskResultMap is updated before the offline segments are uploaded to the table. + * - The _checkPoints is updated before the offline segments are uploaded to the table. */ public class RealtimeToOfflineSegmentsTaskMetadata extends BaseTaskMetadata { private static final String WINDOW_START_KEY = "watermarkMs"; private static final String WINDOW_END_KEY = "windowEndMs"; private static final String COMMA_SEPARATOR = ","; - private static final String SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY = "segmentToExpectedSubtaskResultId"; private final String _tableNameWithType; private long _windowStartMs; private long _windowEndMs; - private final Map _expectedSubtaskResultMap; - private final Map _segmentNameToExpectedSubtaskResultID; + private final List _checkPoints; public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs) { _windowStartMs = windowStartMs; _tableNameWithType = tableNameWithType; - _expectedSubtaskResultMap = new HashMap<>(); - _segmentNameToExpectedSubtaskResultID = new HashMap<>(); + _checkPoints = new ArrayList<>(); } - public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, - long windowEndMs, - Map expectedSubtaskResultMap, - Map segmentNameToExpectedSubtaskResultID) { - _tableNameWithType = tableNameWithType; + public RealtimeToOfflineSegmentsTaskMetadata(String tableNameWithType, long windowStartMs, long windowEndMs, + List checkPoints) { _windowStartMs = windowStartMs; - _expectedSubtaskResultMap = expectedSubtaskResultMap; _windowEndMs = windowEndMs; - _segmentNameToExpectedSubtaskResultID = segmentNameToExpectedSubtaskResultID; + _tableNameWithType = tableNameWithType; + _checkPoints = checkPoints; } public String getTableNameWithType() { @@ -99,72 +95,56 @@ public void setWindowEndMs(long windowEndMs) { _windowEndMs = windowEndMs; } - public Map getExpectedSubtaskResultMap() { - return _expectedSubtaskResultMap; + public List getCheckPoints() { + return _checkPoints; } - public Map getSegmentNameToExpectedSubtaskResultID() { - return _segmentNameToExpectedSubtaskResultID; + public void addCheckpoint(RealtimeToOfflineCheckpointCheckPoint newCheckPoint) { + if (canAddCheckpoint(newCheckPoint)) { + _checkPoints.add(newCheckPoint); + } } - public void addExpectedSubTaskResult( - ExpectedSubtaskResult newExpectedSubtaskResult) { - - List segmentsFrom = newExpectedSubtaskResult.getSegmentsFrom(); - + private boolean canAddCheckpoint(RealtimeToOfflineCheckpointCheckPoint newCheckPoint) { + Set segmentsFrom = newCheckPoint.getSegmentsFrom(); for (String segmentName : segmentsFrom) { - if (_segmentNameToExpectedSubtaskResultID.containsKey(segmentName)) { - String prevExpectedSubtaskResultID = - _segmentNameToExpectedSubtaskResultID.get(segmentName); - - ExpectedSubtaskResult prevExpectedSubtaskResult = - _expectedSubtaskResultMap.get(prevExpectedSubtaskResultID); - - Preconditions.checkState(prevExpectedSubtaskResult.isTaskFailure(), - "ExpectedSubtaskResult can only be replaced if it's of a failed task"); + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : _checkPoints) { + if (checkPoint.isFailed()) { + continue; + } + Set prevSegmentsFrom = checkPoint.getSegmentsFrom(); + Preconditions.checkState(!prevSegmentsFrom.contains(segmentName), + "Checkpoint can only be replaced if it's of a failed task"); } - - _segmentNameToExpectedSubtaskResultID.put(segmentName, - newExpectedSubtaskResult.getId()); - _expectedSubtaskResultMap.put(newExpectedSubtaskResult.getId(), - newExpectedSubtaskResult); } + return true; } public static RealtimeToOfflineSegmentsTaskMetadata fromZNRecord(ZNRecord znRecord) { long windowStartMs = znRecord.getLongField(WINDOW_START_KEY, 0); long windowEndMs = znRecord.getLongField(WINDOW_END_KEY, 0); - Map expectedSubtaskResultMap = - new HashMap<>(); + Map> listFields = znRecord.getListFields(); + List checkPoints = new ArrayList<>(); if (listFields != null) { for (Map.Entry> listField : listFields.entrySet()) { - String expectedSubtaskResultId = listField.getKey(); + String checkpointID = listField.getKey(); List value = listField.getValue(); Preconditions.checkState(value.size() == 4); - List segmentsFrom = Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR)); - List segmentsTo = Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR)); + Set segmentsFrom = new HashSet<>(Arrays.asList(StringUtils.split(value.get(0), COMMA_SEPARATOR))); + Set segmentsTo = new HashSet<>(Arrays.asList(StringUtils.split(value.get(1), COMMA_SEPARATOR))); String taskID = value.get(2); - boolean taskFailure = Boolean.parseBoolean(value.get(3)); + boolean isFailedCheckpoint = Boolean.parseBoolean(value.get(3)); - expectedSubtaskResultMap.put(expectedSubtaskResultId, - new ExpectedSubtaskResult(segmentsFrom, segmentsTo, expectedSubtaskResultId, taskID, - taskFailure) - ); + checkPoints.add(new RealtimeToOfflineCheckpointCheckPoint(segmentsFrom, segmentsTo, checkpointID, taskID, + isFailedCheckpoint)); } } - Map> mapFields = znRecord.getMapFields(); - Map segmentNameToExpectedSubtaskResultID = new HashMap<>(); - if (mapFields != null) { - segmentNameToExpectedSubtaskResultID = mapFields.get(SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY); - } - - return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, - expectedSubtaskResultMap, segmentNameToExpectedSubtaskResultID); + return new RealtimeToOfflineSegmentsTaskMetadata(znRecord.getId(), windowStartMs, windowEndMs, checkPoints); } public ZNRecord toZNRecord() { @@ -172,25 +152,17 @@ public ZNRecord toZNRecord() { znRecord.setLongField(WINDOW_START_KEY, _windowStartMs); znRecord.setLongField(WINDOW_END_KEY, _windowEndMs); - if (_expectedSubtaskResultMap != null) { - for (String expectedSubtaskResultID : _expectedSubtaskResultMap.keySet()) { - ExpectedSubtaskResult expectedSubtaskResult = - _expectedSubtaskResultMap.get(expectedSubtaskResultID); + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : _checkPoints) { + String segmentsFrom = String.join(COMMA_SEPARATOR, checkPoint.getSegmentsFrom()); + String segmentsTo = String.join(COMMA_SEPARATOR, checkPoint.getSegmentsTo()); + String taskId = checkPoint.getTaskID(); + boolean isFailedCheckpoint = checkPoint.isFailed(); + String id = checkPoint.getId(); - String segmentsFrom = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsFrom()); - String segmentsTo = String.join(COMMA_SEPARATOR, expectedSubtaskResult.getSegmentsTo()); - String taskId = expectedSubtaskResult.getTaskID(); - boolean taskFailure = expectedSubtaskResult.isTaskFailure(); - - List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(taskFailure)); - - String id = expectedSubtaskResult.getId(); - znRecord.setListField(id, listEntry); - } + List listEntry = Arrays.asList(segmentsFrom, segmentsTo, taskId, Boolean.toString(isFailedCheckpoint)); + znRecord.setListField(id, listEntry); } - znRecord.setMapField(SEGMENT_NAME_TO_EXPECTED_SUBTASK_RESULT_ID_KEY, - _segmentNameToExpectedSubtaskResultID); return znRecord; } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java index 2e1ae291d02b..80a6d37f33ad 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metadata/RealtimeToOfflineSegmentsTaskMetadataTest.java @@ -18,14 +18,15 @@ */ package org.apache.pinot.common.metadata; -import com.google.common.collect.ImmutableMap; +import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; +import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import org.apache.helix.zookeeper.datamodel.ZNRecord; -import org.apache.pinot.common.minion.ExpectedSubtaskResult; +import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.testng.annotations.Test; @@ -53,40 +54,31 @@ public void testToFromZNRecord() { @Test public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { - Map idVsExpectedRealtimeToOfflineTaskResult = - new HashMap<>(); - ExpectedSubtaskResult expectedSubtaskResult = - new ExpectedSubtaskResult( - Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), - Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), + List checkPoints = new ArrayList<>(); + RealtimeToOfflineCheckpointCheckPoint checkPoint = + new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z")), + new HashSet<>(Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", + "githubEventsOffline__0__0__20241213T2003Z")), "1"); - ExpectedSubtaskResult expectedSubtaskResult1 = - new ExpectedSubtaskResult( - Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z"), - Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", "githubEventsOffline__0__0__20241213T2403Z"), + RealtimeToOfflineCheckpointCheckPoint checkPoint1 = + new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("githubEvents__0__0__20241213T2102Z", "githubEvents__0__0__20241213T2203Z")), + new HashSet<>(Arrays.asList("githubEventsOffline__0__0__20241213T2032Z", + "githubEventsOffline__0__0__20241213T2403Z")), "2"); - idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult.getId(), - expectedSubtaskResult); - idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult1.getId(), - expectedSubtaskResult1); - - ImmutableMap segmentNameVsId = ImmutableMap.of( - "githubEvents__0__0__20241213T2002Z", expectedSubtaskResult.getId(), - "githubEvents__0__0__20241213T2003Z", expectedSubtaskResult.getId(), - "githubEvents__0__0__20241213T2102Z", expectedSubtaskResult1.getId(), - "githubEvents__0__0__20241213T2203Z", expectedSubtaskResult1.getId() - ); + + checkPoints.add(checkPoint); + checkPoints.add(checkPoint1); RealtimeToOfflineSegmentsTaskMetadata originalMetadata = - new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000, 2000, - idVsExpectedRealtimeToOfflineTaskResult, segmentNameVsId); + new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1000, 2000, checkPoints); ZNRecord znRecord = originalMetadata.toZNRecord(); assertEquals(znRecord.getId(), "testTable_REALTIME"); assertEquals(znRecord.getSimpleField("watermarkMs"), "1000"); assertEquals(znRecord.getSimpleField("windowEndMs"), "2000"); Map> listFields = znRecord.getListFields(); - Map> mapFields = znRecord.getMapFields(); for (String id : listFields.keySet()) { List fields = listFields.get(id); @@ -97,23 +89,18 @@ public void testToFromZNRecordWithWindowIntervalAndExpectedResults() { switch (taskID) { case "1": - assertEquals(fields.get(0), "githubEvents__0__0__20241213T2002Z,githubEvents__0__0__20241213T2003Z"); - assertEquals(fields.get(1), - "githubEventsOffline__0__0__20241213T2002Z,githubEventsOffline__0__0__20241213T2003Z"); + assertEquals(fields.get(0), String.join(",", checkPoint.getSegmentsFrom())); + assertEquals(fields.get(1), String.join(",", checkPoint.getSegmentsTo())); break; case "2": - assertEquals(fields.get(0), "githubEvents__0__0__20241213T2102Z,githubEvents__0__0__20241213T2203Z"); - assertEquals(fields.get(1), - "githubEventsOffline__0__0__20241213T2032Z,githubEventsOffline__0__0__20241213T2403Z"); + assertEquals(fields.get(0), String.join(",", checkPoint1.getSegmentsFrom())); + assertEquals(fields.get(1), String.join(",", checkPoint1.getSegmentsTo())); break; default: throw new RuntimeException("invalid taskID"); } } - Map map = mapFields.get("segmentToExpectedSubtaskResultId"); - assertEquals(map, segmentNameVsId); - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(znRecord); @@ -126,34 +113,25 @@ private boolean isEqual(RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineS assertEquals(realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(), originalMetadata.getWindowStartMs()); assertEquals(realtimeToOfflineSegmentsTaskMetadata.getTableNameWithType(), originalMetadata.getTableNameWithType()); - Map idVsExpectedRealtimeToOfflineTaskResult = - realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); - Map segmentNameVsExpectedRealtimeToOfflineTaskResultId = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); - - for (String id : idVsExpectedRealtimeToOfflineTaskResult.keySet()) { - ExpectedSubtaskResult actualExpectedRealtimeToOfflineTaskResult = - idVsExpectedRealtimeToOfflineTaskResult.get(id); - ExpectedSubtaskResult expectedSubtaskResult = - originalMetadata.getExpectedSubtaskResultMap().get(id); - assert expectedSubtaskResult != null; - assert isEqual(actualExpectedRealtimeToOfflineTaskResult, expectedSubtaskResult); - } - - assertEquals(segmentNameVsExpectedRealtimeToOfflineTaskResultId, - originalMetadata.getSegmentNameToExpectedSubtaskResultID()); + originalMetadata.getCheckPoints().sort(Comparator.comparing(RealtimeToOfflineCheckpointCheckPoint::getId)); + realtimeToOfflineSegmentsTaskMetadata.getCheckPoints() + .sort(Comparator.comparing(RealtimeToOfflineCheckpointCheckPoint::getId)); + for (int checkpointIndex = 0; checkpointIndex < originalMetadata.getCheckPoints().size(); checkpointIndex++) { + assert isEqual((originalMetadata.getCheckPoints().get(checkpointIndex)), + realtimeToOfflineSegmentsTaskMetadata.getCheckPoints().get(checkpointIndex)); + } return true; } - private boolean isEqual(ExpectedSubtaskResult expectedSubtaskResult1, - ExpectedSubtaskResult expectedSubtaskResult2) { - return Objects.equals(expectedSubtaskResult1.getSegmentsFrom(), - expectedSubtaskResult2.getSegmentsFrom()) && Objects.equals( - expectedSubtaskResult1.getSegmentsTo(), - expectedSubtaskResult2.getSegmentsTo()) && Objects.equals( - expectedSubtaskResult1.getId(), expectedSubtaskResult2.getId()) + private boolean isEqual(RealtimeToOfflineCheckpointCheckPoint checkPoint1, + RealtimeToOfflineCheckpointCheckPoint checkPoint2) { + return Objects.equals(checkPoint1.getSegmentsFrom(), + checkPoint2.getSegmentsFrom()) && Objects.equals( + checkPoint1.getSegmentsTo(), + checkPoint2.getSegmentsTo()) && Objects.equals( + checkPoint1.getId(), checkPoint2.getId()) && Objects.equals( - expectedSubtaskResult1.getTaskID(), expectedSubtaskResult2.getTaskID()); + checkPoint1.getTaskID(), checkPoint2.getTaskID()); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java index fc1e2424bf84..49fb5dcb99d2 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskExecutor.java @@ -25,12 +25,13 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadataCustomMapModifier; -import org.apache.pinot.common.minion.ExpectedSubtaskResult; +import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.core.common.MinionConstants; import org.apache.pinot.core.common.MinionConstants.RealtimeToOfflineSegmentsTask; @@ -70,7 +71,7 @@ * located at MINION_TASK_METADATA/${tableNameWithType}/RealtimeToOfflineSegmentsTask * It should match the windowStartMs. * - * Before the segments are uploaded, this task updates the _expectedSubtaskResultMap + * Before the segments are uploaded, this task updates the _checkpoints * in the minion task metadata ZNode. * The znode version is checked during update, retrying until max attempts and version of znode is equal to expected. * Reason for above is that, since multiple subtasks run in parallel, there can be race condition @@ -226,7 +227,7 @@ protected void preUploadSegments(SegmentUploadContext context) RealtimeToOfflineSegmentsTask.TASK_TYPE); int expectedVersion = realtimeToOfflineSegmentsTaskZNRecord.getVersion(); - // Adding ExpectedRealtimeToOfflineSegmentsTaskResult might fail. + // Adding Checkpoint might fail. // In-case of failure there will be runtime exception thrown RealtimeToOfflineSegmentsTaskMetadata updatedRealtimeToOfflineSegmentsTaskMetadata = getUpdatedTaskMetadata(context, realtimeToOfflineSegmentsTaskZNRecord); @@ -271,28 +272,27 @@ private RealtimeToOfflineSegmentsTaskMetadata getUpdatedTaskMetadata(SegmentUplo RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = RealtimeToOfflineSegmentsTaskMetadata.fromZNRecord(realtimeToOfflineSegmentsTaskZNRecord); - ExpectedSubtaskResult expectedSubtaskResult = - getExpectedSubtaskResult(context); + RealtimeToOfflineCheckpointCheckPoint checkPoint = getExpectedSubtaskResult(context); + + realtimeToOfflineSegmentsTaskMetadata.addCheckpoint(checkPoint); - realtimeToOfflineSegmentsTaskMetadata.addExpectedSubTaskResult( - expectedSubtaskResult); return realtimeToOfflineSegmentsTaskMetadata; } - private ExpectedSubtaskResult getExpectedSubtaskResult( + private RealtimeToOfflineCheckpointCheckPoint getExpectedSubtaskResult( SegmentUploadContext context) { PinotTaskConfig pinotTaskConfig = context.getPinotTaskConfig(); String taskId = pinotTaskConfig.getTaskId(); - List segmentsFrom = + Set segmentsFrom = Arrays.stream(StringUtils.split(context.getInputSegmentNames(), MinionConstants.SEGMENT_NAME_SEPARATOR)) - .map(String::trim).collect(Collectors.toList()); + .map(String::trim).collect(Collectors.toSet()); - List segmentsTo = + Set segmentsTo = context.getSegmentConversionResults().stream().map(SegmentConversionResult::getSegmentName) - .collect(Collectors.toList()); + .collect(Collectors.toSet()); - return new ExpectedSubtaskResult(segmentsFrom, segmentsTo, taskId); + return new RealtimeToOfflineCheckpointCheckPoint(segmentsFrom, segmentsTo, taskId); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index d1d0d71436ab..f6f830e20d64 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -21,7 +21,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -32,7 +31,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.zookeeper.zkclient.exception.ZkBadVersionException; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; -import org.apache.pinot.common.minion.ExpectedSubtaskResult; +import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.common.utils.LLCSegmentName; import org.apache.pinot.controller.helix.core.PinotResourceManagerResponse; @@ -182,20 +181,25 @@ public List generateTasks(List tableConfigs) { // In-case of previous minion task failures, get info // of failed minion subtasks. They need to be reprocessed. - Set failedTaskInputSegments = - getFailedTaskSegments(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); + List failedTaskCheckpoints = + getFailedCheckpoints(realtimeToOfflineSegmentsTaskMetadata, existingOfflineTableSegmentNames); // In-case of partial failure of segments upload in prev minion task run, // data is inconsistent, delete the corresponding offline segments immediately. - if (!failedTaskInputSegments.isEmpty()) { - LOGGER.warn("found prev minion task failures for table: {}. failedTaskInputSegments: {}", realtimeTableName, - failedTaskInputSegments); - deleteInvalidOfflineSegments(offlineTableName, failedTaskInputSegments, existingOfflineTableSegmentNames, - realtimeToOfflineSegmentsTaskMetadata); - } + Set failedRealtimeSegments; + List segmentsToBeReProcessed = new ArrayList<>(); + + if (!failedTaskCheckpoints.isEmpty()) { + failedRealtimeSegments = new HashSet<>(); + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { + failedRealtimeSegments.addAll(checkPoint.getSegmentsFrom()); + } + LOGGER.warn("found prev minion task failures for table: {}, failed task RealtimeSegments: {}", + realtimeTableName, failedRealtimeSegments); - List segmentsToBeReProcessed = - filterOutDeletedSegments(failedTaskInputSegments, completedRealtimeSegmentsZKMetadata); + deleteInvalidOfflineSegments(offlineTableName, existingOfflineTableSegmentNames, failedTaskCheckpoints); + segmentsToBeReProcessed = filterOutDeletedSegments(failedRealtimeSegments, completedRealtimeSegmentsZKMetadata); + } // if no segment to be reprocessed, no failure boolean prevMinionTaskSuccessful = segmentsToBeReProcessed.isEmpty(); @@ -221,10 +225,9 @@ public List generateTasks(List tableConfigs) { } else { // if all offline segments of prev minion tasks were successfully uploaded, // we can clear the state of prev minion tasks as now it's useless. - if (!realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(). + if (!realtimeToOfflineSegmentsTaskMetadata.getCheckPoints(). isEmpty()) { - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID().clear(); - realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap().clear(); + realtimeToOfflineSegmentsTaskMetadata.getCheckPoints().clear(); // windowEndTime of prev minion task needs to be re-used for picking up the // next windowStartTime. This is useful for case where user changes minion config // after a minion task run was complete. So windowStartTime cannot be watermark + bucketMs @@ -331,109 +334,74 @@ private List getDownloadURLList(List segmentNameList, Map realtimeSegmentsToBeReProcessed, - Set existingOfflineTableSegmentNames, - RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { + private void deleteInvalidOfflineSegments(String offlineTableName, Set existingOfflineTableSegmentNames, + List failedTaskCheckpoints) { - Map segmentNameToExpectedSubtaskResultID = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); - Map expectedSubtaskResultMap = - realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); - - Set segmentsToBeDeleted = new HashSet<>(); - List subtasksToBeMarkedAsFailed = new ArrayList<>(); - - for (String realtimeSegmentName : realtimeSegmentsToBeReProcessed) { - String id = segmentNameToExpectedSubtaskResultID.get(realtimeSegmentName); - Preconditions.checkNotNull(id); - ExpectedSubtaskResult expectedSubtaskResult = - expectedSubtaskResultMap.get(id); - // if already marked as failure, no need to delete again. - if (expectedSubtaskResult.isTaskFailure()) { - continue; + List invalidOfflineSegments = new ArrayList<>(); + + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { + Set expectedCorrespondingOfflineSegments = checkPoint.getSegmentsTo(); + List segmentsToDelete = + getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); + + if (!segmentsToDelete.isEmpty()) { + invalidOfflineSegments.addAll(segmentsToDelete); } - List expectedCorrespondingOfflineSegments = expectedSubtaskResult.getSegmentsTo(); - segmentsToBeDeleted.addAll( - getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames)); - // The expectedRealtimeToOfflineTaskResult is confirmed to be - // related to a failed task. Mark it as a failure, since executor will - // then only replace expectedRealtimeToOfflineTaskResult for the - // segments to be reprocessed. - subtasksToBeMarkedAsFailed.add(expectedSubtaskResult); } - if (!segmentsToBeDeleted.isEmpty()) { - LOGGER.warn("Deleting invalid offline segments: {} of table: {}", segmentsToBeDeleted, offlineTableName); + if (!invalidOfflineSegments.isEmpty()) { + LOGGER.warn("Deleting invalid offline segments: {} of table: {}", invalidOfflineSegments, offlineTableName); PinotResourceManagerResponse pinotResourceManagerResponse = _clusterInfoAccessor.getPinotHelixResourceManager() - .deleteSegments(offlineTableName, new ArrayList<>(segmentsToBeDeleted)); + .deleteSegments(offlineTableName, invalidOfflineSegments); - if (pinotResourceManagerResponse.isSuccessful()) { - // Invalid segments are deleted, set expectedSubtaskResults as failed. - for (ExpectedSubtaskResult expectedSubtaskResult : subtasksToBeMarkedAsFailed) { - expectedSubtaskResult.setTaskFailure(); - } - } else { - throw new RuntimeException(String.format("unable to delete invalid offline segments: %s", segmentsToBeDeleted)); - } + Preconditions.checkState(pinotResourceManagerResponse.isSuccessful(), + String.format("unable to delete invalid offline segments: %s", invalidOfflineSegments)); + } + + // All Invalid segments have been sent to Controller for deletion. + // Now we can mark these checkpoints as failed. + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { + checkPoint.setFailed(); } } - private Set getFailedTaskSegments( + private List getFailedCheckpoints( RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { - Set failedIds = new HashSet<>(); - - // Get all the ExpectedRealtimeToOfflineTaskResult of prev minion task - Map expectedSubtaskResultMap = - realtimeToOfflineSegmentsTaskMetadata.getExpectedSubtaskResultMap(); - Collection expectedSubtaskResultList = - expectedSubtaskResultMap.values(); - - Map segmentNameToExpectedSubtaskResultID = - realtimeToOfflineSegmentsTaskMetadata.getSegmentNameToExpectedSubtaskResultID(); - Set expectedSubtaskResultIds = - new HashSet<>(segmentNameToExpectedSubtaskResultID.values()); - Set segmentNamesToReprocess = new HashSet<>(); + List checkPoints = + realtimeToOfflineSegmentsTaskMetadata.getCheckPoints(); - // Check what all offline segments are present currently - for (ExpectedSubtaskResult expectedSubtaskResult - : expectedSubtaskResultList) { + Set failedCheckpointSegments = new HashSet<>(); + List failedCheckPoints = new ArrayList<>(); - if (expectedSubtaskResult.isTaskFailure()) { - // if task is failure and is referenced by any segment, only then add to failed task. - if (expectedSubtaskResultIds.contains(expectedSubtaskResult.getId())) { - failedIds.add(expectedSubtaskResult.getId()); - } + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : checkPoints) { + if (checkPoint.isFailed()) { + // checkpoint is marked as failed only when its invalid offline segments + // of the checkpoints are deleted. This checkpoint has been already + // marked as failed. + // it's safe to skip them here. continue; } - // get offline segments - List segmentTo = expectedSubtaskResult.getSegmentsTo(); - + Set segmentTo = checkPoint.getSegmentsTo(); // If not all corresponding offline segments to a realtime segment exists, // it means there was an issue with prev minion task. And segment needs // to be re-processed. boolean taskSuccessful = checkIfAllSegmentsExists(segmentTo, existingOfflineTableSegmentNames); if (!taskSuccessful) { - failedIds.add(expectedSubtaskResult.getId()); - } - } - - // source of truth for re-processing task is segmentNameToExpectedSubtaskResultID map. - // consider edge case where multiple segments were re-scheduled among multiple subtasks, but again - // one of the subtask failed. - for (String segmentName : segmentNameToExpectedSubtaskResultID.keySet()) { - String expectedSubtaskResultID = - segmentNameToExpectedSubtaskResultID.get(segmentName); - if (failedIds.contains(expectedSubtaskResultID)) { - segmentNamesToReprocess.add(segmentName); + Set segmentsFrom = checkPoint.getSegmentsFrom(); + for (String segmentFrom : segmentsFrom) { + Preconditions.checkState(!failedCheckpointSegments.contains(segmentFrom), + "Multiple live checkpoints found for the segment"); + failedCheckpointSegments.add(segmentFrom); + } + failedCheckPoints.add(checkPoint); } } - return segmentNamesToReprocess; + return failedCheckPoints; } private List filterOutDeletedSegments(Set segmentNames, @@ -532,7 +500,7 @@ private void divideSegmentsAmongSubtasks(List segmentsToBeReP } } - private List getSegmentsToDelete(List expectedCorrespondingOfflineSegments, + private List getSegmentsToDelete(Set expectedCorrespondingOfflineSegments, Set existingOfflineTableSegmentNames) { List segmentsToDelete = new ArrayList<>(); @@ -546,23 +514,18 @@ private List getSegmentsToDelete(List expectedCorrespondingOffli return segmentsToDelete; } - private boolean checkIfAllSegmentsExists(List expectedSegments, + private boolean checkIfAllSegmentsExists(Set expectedSegments, Set currentTableSegments) { - for (String expectedSegment : expectedSegments) { - if (!currentTableSegments.contains(expectedSegment)) { - return false; - } - } - return true; + return currentTableSegments.containsAll(expectedSegments); } /** * Fetch completed (DONE/UPLOADED) segment and partition information * - * @param realtimeTableName the realtime table name - * @param completedSegmentsZKMetadata list for collecting the completed (DONE/UPLOADED) segments ZK metadata + * @param realtimeTableName the realtime table name + * @param completedSegmentsZKMetadata list for collecting the completed (DONE/UPLOADED) segments ZK metadata * @param partitionToLatestLLCSegmentName map for collecting the partitionId to the latest LLC segment name - * @param allPartitions set for collecting all partition ids + * @param allPartitions set for collecting all partition ids */ private void getCompletedSegmentsInfo(String realtimeTableName, List completedSegmentsZKMetadata, Map partitionToLatestLLCSegmentName, Set allPartitions) { @@ -599,9 +562,8 @@ private void getCompletedSegmentsInfo(String realtimeTableName, List completedSegmentsZKMetadata, diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 8645008d1b58..ab0d2bdc3efe 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -24,13 +24,14 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.helix.model.IdealState; import org.apache.helix.task.TaskState; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; -import org.apache.pinot.common.minion.ExpectedSubtaskResult; +import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.minion.ClusterInfoAccessor; @@ -448,23 +449,17 @@ public void testGenerateTasksWithSegmentUploadFailure() { } private RealtimeToOfflineSegmentsTaskMetadata getRealtimeToOfflineSegmentsTaskMetadata() { - Map idVsExpectedRealtimeToOfflineTaskResult = - new HashMap<>(); - ExpectedSubtaskResult expectedSubtaskResult = - new ExpectedSubtaskResult( - Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z"), - Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", "githubEventsOffline__0__0__20241213T2003Z"), + List checkPoints = new ArrayList<>(); + RealtimeToOfflineCheckpointCheckPoint checkPoint = + new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("githubEvents__0__0__20241213T2002Z", "githubEvents__0__0__20241213T2003Z")), + new HashSet<>(Arrays.asList("githubEventsOffline__0__0__20241213T2002Z", + "githubEventsOffline__0__0__20241213T2003Z")), "1"); - idVsExpectedRealtimeToOfflineTaskResult.put(expectedSubtaskResult.getId(), - expectedSubtaskResult); - - ImmutableMap segmentNameVsId = ImmutableMap.of( - "githubEvents__0__0__20241213T2002Z", expectedSubtaskResult.getId(), - "githubEvents__0__0__20241213T2003Z", expectedSubtaskResult.getId() - ); + checkPoints.add(checkPoint); return new RealtimeToOfflineSegmentsTaskMetadata("testTable_REALTIME", 1589972400000L, 1590058800000L, - idVsExpectedRealtimeToOfflineTaskResult, segmentNameVsId); + checkPoints); } /** From 97846ffc561701a3fe8f8f7a7b78e2b5b4237058 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 26 Jan 2025 00:44:10 +0530 Subject: [PATCH 66/72] nit --- .../common/minion/RealtimeToOfflineSegmentsTaskMetadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java index 4fbfe68e4a0e..964126f89ccd 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/minion/RealtimeToOfflineSegmentsTaskMetadata.java @@ -114,7 +114,7 @@ private boolean canAddCheckpoint(RealtimeToOfflineCheckpointCheckPoint newCheckP } Set prevSegmentsFrom = checkPoint.getSegmentsFrom(); Preconditions.checkState(!prevSegmentsFrom.contains(segmentName), - "Checkpoint can only be replaced if it's of a failed task"); + "A live Checkpoints already exists for segment: " + segmentName); } } return true; From 97bf14697bff01b283f9594fce8f107717caa881 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Sun, 26 Jan 2025 01:40:37 +0530 Subject: [PATCH 67/72] Adds unit tests --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 111 +++++++++--------- ...imeToOfflineSegmentsTaskGeneratorTest.java | 60 +++++++++- 2 files changed, 116 insertions(+), 55 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index f6f830e20d64..bc4f830d65a9 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; @@ -334,37 +335,6 @@ private List getDownloadURLList(List segmentNameList, Map existingOfflineTableSegmentNames, - List failedTaskCheckpoints) { - - List invalidOfflineSegments = new ArrayList<>(); - - for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { - Set expectedCorrespondingOfflineSegments = checkPoint.getSegmentsTo(); - List segmentsToDelete = - getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); - - if (!segmentsToDelete.isEmpty()) { - invalidOfflineSegments.addAll(segmentsToDelete); - } - } - - if (!invalidOfflineSegments.isEmpty()) { - LOGGER.warn("Deleting invalid offline segments: {} of table: {}", invalidOfflineSegments, offlineTableName); - PinotResourceManagerResponse pinotResourceManagerResponse = _clusterInfoAccessor.getPinotHelixResourceManager() - .deleteSegments(offlineTableName, invalidOfflineSegments); - - Preconditions.checkState(pinotResourceManagerResponse.isSuccessful(), - String.format("unable to delete invalid offline segments: %s", invalidOfflineSegments)); - } - - // All Invalid segments have been sent to Controller for deletion. - // Now we can mark these checkpoints as failed. - for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { - checkPoint.setFailed(); - } - } - private List getFailedCheckpoints( RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { @@ -404,19 +374,71 @@ private List getFailedCheckpoints( return failedCheckPoints; } - private List filterOutDeletedSegments(Set segmentNames, + private boolean checkIfAllSegmentsExists(Set expectedSegments, + Set currentTableSegments) { + return currentTableSegments.containsAll(expectedSegments); + } + + @VisibleForTesting + void deleteInvalidOfflineSegments(String offlineTableName, Set existingOfflineTableSegmentNames, + List failedTaskCheckpoints) { + + List invalidOfflineSegments = new ArrayList<>(); + + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { + Set expectedCorrespondingOfflineSegments = checkPoint.getSegmentsTo(); + List segmentsToDelete = + getSegmentsToDelete(expectedCorrespondingOfflineSegments, existingOfflineTableSegmentNames); + + if (!segmentsToDelete.isEmpty()) { + invalidOfflineSegments.addAll(segmentsToDelete); + } + } + + if (!invalidOfflineSegments.isEmpty()) { + LOGGER.warn("Deleting invalid offline segments: {} of table: {}", invalidOfflineSegments, offlineTableName); + PinotResourceManagerResponse pinotResourceManagerResponse = _clusterInfoAccessor.getPinotHelixResourceManager() + .deleteSegments(offlineTableName, invalidOfflineSegments); + + Preconditions.checkState(pinotResourceManagerResponse.isSuccessful(), + String.format("unable to delete invalid offline segments: %s", invalidOfflineSegments)); + } + + // All Invalid segments have been sent to Controller for deletion. + // Now we can mark these checkpoints as failed. + for (RealtimeToOfflineCheckpointCheckPoint checkPoint : failedTaskCheckpoints) { + checkPoint.setFailed(); + } + } + + private List getSegmentsToDelete(Set expectedCorrespondingOfflineSegments, + Set existingOfflineTableSegmentNames) { + List segmentsToDelete = new ArrayList<>(); + + // Iterate on all expectedCorrespondingOfflineSegments of realtime segments to be reprocessed. + // check which segments exists. They need to be deleted. + for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { + if (existingOfflineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { + segmentsToDelete.add(expectedCorrespondingOfflineSegment); + } + } + return segmentsToDelete; + } + + @VisibleForTesting + List filterOutDeletedSegments(Set segmentNames, List currentTableSegments) { - List segmentZKMetadataList = new ArrayList<>(); + List segmentZKMetadataListToRet = new ArrayList<>(); // filter out deleted/removed segments. for (SegmentZKMetadata segmentZKMetadata : currentTableSegments) { String segmentName = segmentZKMetadata.getSegmentName(); if (segmentNames.contains(segmentName)) { - segmentZKMetadataList.add(segmentZKMetadata); + segmentZKMetadataListToRet.add(segmentZKMetadata); } } - return segmentZKMetadataList; + return segmentZKMetadataListToRet; } private List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, @@ -500,25 +522,6 @@ private void divideSegmentsAmongSubtasks(List segmentsToBeReP } } - private List getSegmentsToDelete(Set expectedCorrespondingOfflineSegments, - Set existingOfflineTableSegmentNames) { - List segmentsToDelete = new ArrayList<>(); - - // Iterate on all expectedCorrespondingOfflineSegments of realtime segments to be reprocessed. - // check which segments exists. They need to be deleted. - for (String expectedCorrespondingOfflineSegment : expectedCorrespondingOfflineSegments) { - if (existingOfflineTableSegmentNames.contains(expectedCorrespondingOfflineSegment)) { - segmentsToDelete.add(expectedCorrespondingOfflineSegment); - } - } - return segmentsToDelete; - } - - private boolean checkIfAllSegmentsExists(Set expectedSegments, - Set currentTableSegments) { - return currentTableSegments.containsAll(expectedSegments); - } - /** * Fetch completed (DONE/UPLOADED) segment and partition information * diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index ab0d2bdc3efe..b21fe82128a6 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.helix.model.IdealState; import org.apache.helix.task.TaskState; @@ -34,6 +35,7 @@ import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; +import org.apache.pinot.controller.helix.core.PinotResourceManagerResponse; import org.apache.pinot.controller.helix.core.minion.ClusterInfoAccessor; import org.apache.pinot.core.common.MinionConstants; import org.apache.pinot.core.common.MinionConstants.RealtimeToOfflineSegmentsTask; @@ -47,11 +49,13 @@ import org.apache.pinot.spi.stream.StreamConfigProperties; import org.apache.pinot.spi.utils.CommonConstants.Segment.Realtime.Status; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; @@ -626,7 +630,7 @@ public void testRealtimeToOfflineSegmentsTaskConfig() { .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") .setPrimaryKeyColumns(Lists.newArrayList("myCol")).build(); - when(mockPinotHelixResourceManager.getSchemaForTableConfig(Mockito.any())).thenReturn(schema); + when(mockPinotHelixResourceManager.getSchemaForTableConfig(any())).thenReturn(schema); RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); taskGenerator.init(mockClusterInfoAccessor); @@ -746,6 +750,60 @@ public void testRealtimeToOfflineSegmentsTaskConfig() { taskGenerator.validateTaskConfigs(tableConfig, schema, validAgg2Config); } + @Test + public void testFilterOutDeletedSegments() { + RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); + Set segmentNames = new HashSet<>(Arrays.asList("seg_1", "seg_2", "seg_3", "seg_4")); + List currentTableSegments = + Arrays.asList(new SegmentZKMetadata("seg_1"), new SegmentZKMetadata("seg_3"), new SegmentZKMetadata("seg_4")); + List segmentZKMetadataList = + taskGenerator.filterOutDeletedSegments(segmentNames, currentTableSegments); + assert segmentZKMetadataList.size() == 3; + StringBuilder liveSegmentNames = new StringBuilder(); + for (SegmentZKMetadata segmentZKMetadata: segmentZKMetadataList) { + liveSegmentNames.append(segmentZKMetadata.getSegmentName()).append(","); + } + assert "seg_1,seg_3,seg_4,".contentEquals(liveSegmentNames); + } + + @Test + public void testDeleteInvalidOfflineSegments() { + Set existingOfflineSegmentNames = new HashSet<>(Arrays.asList("seg_1", "seg_2", "seg_3", "seg_4")); + + List checkPoints = new ArrayList<>(); + checkPoints.add(new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("seg_realtime_1", "seg_realtime_2")), + new HashSet<>(Arrays.asList("seg_1", "seg_4", "seg_5")), + "1") + ); + checkPoints.add(new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("seg_realtime_3", "seg_realtime_4")), + new HashSet<>(Arrays.asList("seg_2")), + "1") + ); + + ClusterInfoAccessor mockClusterInfoAccessor = mock(ClusterInfoAccessor.class); + PinotHelixResourceManager mockPinotHelixResourceManager = mock(PinotHelixResourceManager.class); + when(mockClusterInfoAccessor.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); + taskGenerator.init(mockClusterInfoAccessor); + ArgumentCaptor captor = ArgumentCaptor.forClass(List.class); + when(mockPinotHelixResourceManager.deleteSegments(Mockito.eq("test_OFFLINE"), captor.capture())).thenReturn( + PinotResourceManagerResponse.success("")); + + taskGenerator.deleteInvalidOfflineSegments("test_OFFLINE", existingOfflineSegmentNames, checkPoints); + List capturedList = captor.getValue(); + + assert checkPoints.get(0).isFailed(); + assert checkPoints.get(1).isFailed(); + + StringBuilder segmentNames = new StringBuilder(); + for (String segmentName: capturedList) { + segmentNames.append(segmentName).append(","); + } + assert "seg_1,seg_4,seg_2,".contentEquals(segmentNames); + } + private SegmentZKMetadata getSegmentZKMetadata(String segmentName, Status status, long startTime, long endTime, TimeUnit timeUnit, String downloadURL) { SegmentZKMetadata realtimeSegmentZKMetadata = new SegmentZKMetadata(segmentName); From b12980b4e98fc91c5ba05618708488a1c7c500f0 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 27 Jan 2025 00:46:53 +0530 Subject: [PATCH 68/72] Adds unit tests --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 32 +++---- ...imeToOfflineSegmentsTaskGeneratorTest.java | 96 +++++++++++++++++++ 2 files changed, 111 insertions(+), 17 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index bc4f830d65a9..6f8aa617bf2f 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -335,12 +335,12 @@ private List getDownloadURLList(List segmentNameList, Map getFailedCheckpoints( + @VisibleForTesting + List getFailedCheckpoints( RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata, Set existingOfflineTableSegmentNames) { - List checkPoints = - realtimeToOfflineSegmentsTaskMetadata.getCheckPoints(); + List checkPoints = realtimeToOfflineSegmentsTaskMetadata.getCheckPoints(); Set failedCheckpointSegments = new HashSet<>(); List failedCheckPoints = new ArrayList<>(); @@ -495,30 +495,28 @@ private List generateNewSegmentsToProcess(List segmentsToBeReProcessedList, + @VisibleForTesting + void divideSegmentsAmongSubtasks(List segmentsToBeScheduled, List> segmentNamesGroupList, Map segmentNameVsDownloadURL, - int maxNumRecordsPerTask) { - - long numRecordsPerTask = 0; + int maxNumRecordsPerSubTask) { + long numRecordsAdded = 0; List segmentNames = new ArrayList<>(); - for (int segmentZkMetadataIndex = 0; segmentZkMetadataIndex < segmentsToBeReProcessedList.size(); - segmentZkMetadataIndex++) { - SegmentZKMetadata segmentZKMetadata = segmentsToBeReProcessedList.get(segmentZkMetadataIndex); + for (SegmentZKMetadata segmentZKMetadata: segmentsToBeScheduled) { segmentNames.add(segmentZKMetadata.getSegmentName()); segmentNameVsDownloadURL.put(segmentZKMetadata.getSegmentName(), segmentZKMetadata.getDownloadUrl()); - numRecordsPerTask += segmentZKMetadata.getTotalDocs(); + numRecordsAdded += segmentZKMetadata.getTotalDocs(); - if (numRecordsPerTask >= maxNumRecordsPerTask) { + if (numRecordsAdded >= maxNumRecordsPerSubTask) { segmentNamesGroupList.add(segmentNames); - numRecordsPerTask = 0; segmentNames = new ArrayList<>(); + numRecordsAdded = 0; } - if ((!segmentNames.isEmpty()) - && (segmentZkMetadataIndex == (segmentsToBeReProcessedList.size() - 1))) { - segmentNamesGroupList.add(segmentNames); - } + } + + if (!segmentNames.isEmpty()) { + segmentNamesGroupList.add(segmentNames); } } diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index b21fe82128a6..ac2d3f845da9 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.plugin.minion.tasks.realtimetoofflinesegments; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import java.util.ArrayList; @@ -31,6 +32,7 @@ import java.util.concurrent.TimeUnit; import org.apache.helix.model.IdealState; import org.apache.helix.task.TaskState; +import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.pinot.common.metadata.segment.SegmentZKMetadata; import org.apache.pinot.common.minion.RealtimeToOfflineCheckpointCheckPoint; import org.apache.pinot.common.minion.RealtimeToOfflineSegmentsTaskMetadata; @@ -47,6 +49,7 @@ import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.stream.StreamConfigProperties; +import org.apache.pinot.spi.utils.CommonConstants; import org.apache.pinot.spi.utils.CommonConstants.Segment.Realtime.Status; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; import org.mockito.ArgumentCaptor; @@ -750,6 +753,99 @@ public void testRealtimeToOfflineSegmentsTaskConfig() { taskGenerator.validateTaskConfigs(tableConfig, schema, validAgg2Config); } + @Test + public void testDivideSegmentsAmongSubtasks() { + RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); + + ZNRecord znRecord1 = new ZNRecord("seg_1"); + znRecord1.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "70"); + znRecord1.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_1.tar"); + + ZNRecord znRecord2 = new ZNRecord("seg_2"); + znRecord2.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "30"); + znRecord2.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_2.tar"); + + ZNRecord znRecord3 = new ZNRecord("seg_3"); + znRecord3.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "101"); + znRecord3.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_3.tar"); + + ZNRecord znRecord4 = new ZNRecord("seg_4"); + znRecord4.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "1"); + znRecord4.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_4.tar"); + + ZNRecord znRecord5 = new ZNRecord("seg_5"); + znRecord5.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "98"); + znRecord5.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_5.tar"); + + ZNRecord znRecord6 = new ZNRecord("seg_6"); + znRecord6.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "123"); + znRecord6.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_6.tar"); + + ZNRecord znRecord7 = new ZNRecord("seg_7"); + znRecord7.setSimpleField(CommonConstants.Segment.TOTAL_DOCS, "1"); + znRecord7.setSimpleField(CommonConstants.Segment.DOWNLOAD_URL, "seg_7.tar"); + + List znRecordList = + ImmutableList.of(znRecord1, znRecord2, znRecord3, znRecord4, znRecord5, znRecord6, znRecord7); + + List segmentsToBeScheduled = new ArrayList<>(); + List> segmentNamesGroupList = new ArrayList<>(); + Map segmentNameVsDownloadURL = new HashMap<>(); + int maxNumRecordsPerSubTask = 100; + + for (ZNRecord znRecord: znRecordList) { + segmentsToBeScheduled.add(new SegmentZKMetadata(znRecord)); + } + + taskGenerator.divideSegmentsAmongSubtasks(segmentsToBeScheduled, segmentNamesGroupList, segmentNameVsDownloadURL, + maxNumRecordsPerSubTask); + + assert segmentNamesGroupList.size() == 4; + assert "seg_1,seg_2".equals(String.join(",", segmentNamesGroupList.get(0))); + assert "seg_3".equals(String.join(",", segmentNamesGroupList.get(1))); + assert "seg_4,seg_5,seg_6".equals(String.join(",", segmentNamesGroupList.get(2))); + assert "seg_7".equals(String.join(",", segmentNamesGroupList.get(3))); + + assert segmentNameVsDownloadURL.size() == 7; + for (String segmentName: segmentNameVsDownloadURL.keySet()) { + assert (segmentName + ".tar").equals(segmentNameVsDownloadURL.get(segmentName)); + } + } + + @Test + public void testGetFailedCheckpoints() { + RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); + Set segmentsPresentInOfflineTable = + new HashSet<>(Arrays.asList("seg_1", "seg_2", "seg_3", "seg_4", "seg_5", "seg_6", "seg_7")); + + List checkPoints = new ArrayList<>(); + checkPoints.add(new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("seg_realtime_1", "seg_realtime_2")), + new HashSet<>(Arrays.asList("seg_1", "seg_4", "seg_5")), + "1", "task_1", true) + ); + checkPoints.add(new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("seg_realtime_3", "seg_realtime_4")), + new HashSet<>(Arrays.asList("seg_2")), + "2") + ); + RealtimeToOfflineCheckpointCheckPoint checkPoint = new RealtimeToOfflineCheckpointCheckPoint( + new HashSet<>(Arrays.asList("seg_realtime_5", "seg_realtime_6")), + new HashSet<>(Arrays.asList("seg_6", "seg_8")), + "2"); + checkPoints.add(checkPoint); + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + new RealtimeToOfflineSegmentsTaskMetadata("test_REALTIME", System.currentTimeMillis() - 100000, + System.currentTimeMillis() - 1000, checkPoints); + List failedCheckpoints = + taskGenerator.getFailedCheckpoints(realtimeToOfflineSegmentsTaskMetadata, segmentsPresentInOfflineTable); + + assert failedCheckpoints.size() == 1; + assert !failedCheckpoints.get(0).isFailed(); + assert failedCheckpoints.get(0).getId().equals(checkPoint.getId()); + } + @Test public void testFilterOutDeletedSegments() { RealtimeToOfflineSegmentsTaskGenerator taskGenerator = new RealtimeToOfflineSegmentsTaskGenerator(); From 2cdc946209369b6e416bc473ae49ee69a6a6c486 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 27 Jan 2025 00:58:18 +0530 Subject: [PATCH 69/72] Fixes unit test --- .../RealtimeToOfflineSegmentsTaskGeneratorTest.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index ac2d3f845da9..1a430343df9a 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -434,6 +434,9 @@ public void testGenerateTasksWithSegmentUploadFailure() { List.of("githubEventsOffline__0__0__20241213T2002Z")); when(mockClusterInfoProvide.getPinotHelixResourceManager()).thenReturn(mockPinotHelixResourceManager); + ArgumentCaptor captor = ArgumentCaptor.forClass(List.class); + when(mockPinotHelixResourceManager.deleteSegments(Mockito.eq(OFFLINE_TABLE_NAME), captor.capture())).thenReturn( + PinotResourceManagerResponse.success("")); // Default configs Map> taskConfigsMap = new HashMap<>(); @@ -444,6 +447,10 @@ public void testGenerateTasksWithSegmentUploadFailure() { generator.init(mockClusterInfoProvide); List pinotTaskConfigs = generator.generateTasks(Lists.newArrayList(realtimeTableConfig)); + List capturedList = captor.getValue(); + assert capturedList.size() == 1; + assert capturedList.get(0).equals("githubEventsOffline__0__0__20241213T2002Z"); + assertEquals(pinotTaskConfigs.size(), 1); assertEquals(pinotTaskConfigs.get(0).getTaskType(), RealtimeToOfflineSegmentsTask.TASK_TYPE); Map configs = pinotTaskConfigs.get(0).getConfigs(); From 279a33951acea70de02661666de76cac8c175a09 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Mon, 27 Jan 2025 01:04:25 +0530 Subject: [PATCH 70/72] fixes integration test --- .../RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index 0d5e06ecaee7..e6da066b051b 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -278,7 +278,7 @@ public void testRealtimeToOfflineSegmentsTask() assertTrue(_taskResourceManager.getTaskQueues().contains( PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); // Should not generate more tasks - assertNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) + MinionTaskTestUtils.assertNoTaskSchedule(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); // Wait at most 600 seconds for all tasks COMPLETED From bfad067a34a1021ba84ee2b7b255218b69090b14 Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Tue, 28 Jan 2025 02:02:37 +0530 Subject: [PATCH 71/72] fixes log format output --- ...ealtimeToOfflineSegmentsTaskGenerator.java | 9 +++- ...imeToOfflineSegmentsTaskGeneratorTest.java | 48 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java index 6f8aa617bf2f..b9d9f98b1d35 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGenerator.java @@ -219,9 +219,13 @@ public List generateTasks(List tableConfigs) { List segmentsToBeScheduled; if (!prevMinionTaskSuccessful) { + List segmentsNamesToBeReprocessed = new ArrayList<>(); + for (SegmentZKMetadata segmentZKMetadata : segmentsToBeReProcessed) { + segmentsNamesToBeReprocessed.add(segmentZKMetadata.getSegmentName()); + } LOGGER.warn( "Found prev minion task failures. Re-Scheduling previously failed task input segments: {} of table: {}", - segmentsToBeReProcessed, realtimeTableName); + segmentsNamesToBeReprocessed, realtimeTableName); segmentsToBeScheduled = segmentsToBeReProcessed; } else { // if all offline segments of prev minion tasks were successfully uploaded, @@ -441,7 +445,8 @@ List filterOutDeletedSegments(Set segmentNames, return segmentZKMetadataListToRet; } - private List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, + @VisibleForTesting + List generateNewSegmentsToProcess(List completedSegmentsZKMetadata, long windowStartMs, long windowEndMs, long bucketMs, long bufferMs, String bufferTimePeriod, Set lastLLCSegmentPerPartition, RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata) { diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java index 1a430343df9a..fa1daac1baea 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/test/java/org/apache/pinot/plugin/minion/tasks/realtimetoofflinesegments/RealtimeToOfflineSegmentsTaskGeneratorTest.java @@ -408,6 +408,54 @@ public void testGenerateTasksWithMinionMetadata() { assertEquals(configs.get("m1" + RealtimeToOfflineSegmentsTask.AGGREGATION_TYPE_KEY_SUFFIX), "MAX"); } + @Test + public void testGenerateNewSegmentsToProcess() { + List completedSegmentsZKMetadata = new ArrayList<>(); + + long hourMillis = 3600 * 1000; + long pastTime = System.currentTimeMillis() - (2 * 24 * hourMillis); + + ZNRecord znRecord1 = new ZNRecord("seg_1"); + znRecord1.setSimpleField(CommonConstants.Segment.START_TIME, String.valueOf(pastTime + hourMillis)); + znRecord1.setSimpleField(CommonConstants.Segment.END_TIME, String.valueOf(pastTime + 2 * hourMillis)); + + ZNRecord znRecord2 = new ZNRecord("seg_2"); + znRecord2.setSimpleField(CommonConstants.Segment.START_TIME, String.valueOf(pastTime + hourMillis + 1)); + znRecord2.setSimpleField(CommonConstants.Segment.END_TIME, String.valueOf(pastTime + 2 * hourMillis - 90)); + + ZNRecord znRecord3 = new ZNRecord("seg_3"); + znRecord3.setSimpleField(CommonConstants.Segment.START_TIME, String.valueOf(pastTime + 6 * hourMillis + 1)); + znRecord3.setSimpleField(CommonConstants.Segment.END_TIME, String.valueOf(pastTime + 8 * hourMillis)); + + ZNRecord znRecord4 = new ZNRecord("seg_4"); + znRecord4.setSimpleField(CommonConstants.Segment.START_TIME, String.valueOf(pastTime + 6 * hourMillis + 90)); + znRecord4.setSimpleField(CommonConstants.Segment.END_TIME, String.valueOf(pastTime + 8 * hourMillis + 12)); + + List znRecordList = ImmutableList.of(znRecord1, znRecord2, znRecord3, znRecord4); + for (ZNRecord znRecord : znRecordList) { + znRecord.setSimpleField(CommonConstants.Segment.TIME_UNIT, TimeUnit.MILLISECONDS.toString()); + completedSegmentsZKMetadata.add(new SegmentZKMetadata(znRecord)); + } + + Set lastLLCSegmentPerPartition = new HashSet<>(); + lastLLCSegmentPerPartition.add("seg_4"); + + RealtimeToOfflineSegmentsTaskMetadata realtimeToOfflineSegmentsTaskMetadata = + new RealtimeToOfflineSegmentsTaskMetadata("test_REALTIME", 1); + + RealtimeToOfflineSegmentsTaskGenerator generator = new RealtimeToOfflineSegmentsTaskGenerator(); + List segmentZKMetadataList = + generator.generateNewSegmentsToProcess(completedSegmentsZKMetadata, pastTime, pastTime + hourMillis, hourMillis, + (24 * hourMillis), "1d", lastLLCSegmentPerPartition, + realtimeToOfflineSegmentsTaskMetadata); + + assert segmentZKMetadataList.size() == 2; + assert "seg_1".equals(segmentZKMetadataList.get(0).getSegmentName()); + assert "seg_2".equals(segmentZKMetadataList.get(1).getSegmentName()); + assert (pastTime + hourMillis) == realtimeToOfflineSegmentsTaskMetadata.getWindowStartMs(); + assert (pastTime + 2 * hourMillis) == realtimeToOfflineSegmentsTaskMetadata.getWindowEndMs(); + } + @Test public void testGenerateTasksWithSegmentUploadFailure() { // store partial offline segments in Zk metadata. From 986e8bf657172fe1734127235065d306892722ff Mon Sep 17 00:00:00 2001 From: Harnoor7 Date: Thu, 30 Jan 2025 01:10:18 +0530 Subject: [PATCH 72/72] Attempts to fix test --- ...timeToOfflineSegmentsMinionClusterIntegrationTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index e6da066b051b..2d305709e98c 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -270,6 +270,14 @@ public void testRealtimeToOfflineSegmentsTask() assert response.isSuccessful(); allOfflineSegments = _helixResourceManager.getSegmentsFor(_offlineTableName, true); assertEquals(allOfflineSegments.size(), 0); + _helixResourceManager.getSegmentDeletionManager().removeSegmentsFromStore(_offlineTableName, allOfflineSegments); + TestUtils.waitForCondition(k -> { + try { + return _helixResourceManager.getSegmentsZKMetadata(_offlineTableName).isEmpty(); + } catch (Exception e) { + return false; + } + }, 90000L, "Unable to delete all offline segments before timeout!"); expectedWatermark -= 86400000; // Schedule task