Skip to content
This repository has been archived by the owner on Dec 14, 2022. It is now read-only.

Commit

Permalink
fix lost first message (#363)
Browse files Browse the repository at this point in the history
(cherry picked from commit cf0ec23)
  • Loading branch information
jianyun8023 committed Jun 29, 2021
1 parent 3a4cb8f commit 1bcfb14
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import java.util.Map;
import java.util.Properties;
import java.util.Set;

import static org.apache.flink.streaming.connectors.pulsar.internal.PulsarOptions.USE_EXTEND_FIELD;

Expand Down Expand Up @@ -100,7 +101,8 @@ protected PulsarFetcher<Row> createFetcher(
ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
StreamingRuntimeContext streamingRuntime) throws Exception {
StreamingRuntimeContext streamingRuntime,
Set<TopicRange> excludeStartMessageIds) throws Exception {
boolean useExtendField = Boolean.parseBoolean((String) properties.get(USE_EXTEND_FIELD));
return new PulsarRowFetcher(
sourceContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
Expand Down Expand Up @@ -189,6 +190,7 @@ public class FlinkPulsarSource<T>
* to seed the partition discoverer.
*/
private transient volatile TreeMap<TopicRange, MessageId> restoredState;
private transient volatile Set<TopicRange> excludeStartMessageIds;

/**
* Accessor for state in the operator state backend.
Expand Down Expand Up @@ -386,6 +388,7 @@ public void open(Configuration parameters) throws Exception {
this.metadataReader = createMetadataReader();

ownedTopicStarts = new HashMap<>();
excludeStartMessageIds = new HashSet<>();
Set<TopicRange> allTopics = metadataReader.discoverTopicChanges();

if (specificStartupOffsets == null && specificStartupOffsetsAsBytes != null) {
Expand All @@ -406,7 +409,10 @@ public void open(Configuration parameters) throws Exception {

restoredState.entrySet().stream()
.filter(e -> SourceSinkUtils.belongsTo(e.getKey(), numParallelTasks, taskIndex))
.forEach(e -> ownedTopicStarts.put(e.getKey(), e.getValue()));
.forEach(e -> {
ownedTopicStarts.put(e.getKey(), e.getValue());
excludeStartMessageIds.add(e.getKey());
});

Set<TopicRange> goneTopics = Sets.difference(restoredState.keySet(), allTopics).stream()
.filter(k -> SourceSinkUtils.belongsTo(k, numParallelTasks, taskIndex))
Expand Down Expand Up @@ -502,7 +508,8 @@ public void onException(Throwable cause) {
streamingRuntime.getProcessingTimeService(),
streamingRuntime.getExecutionConfig().getAutoWatermarkInterval(),
getRuntimeContext().getUserCodeClassLoader(),
streamingRuntime);
streamingRuntime,
excludeStartMessageIds);

if (!running) {
return;
Expand All @@ -523,13 +530,15 @@ protected PulsarFetcher<T> createFetcher(
ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
StreamingRuntimeContext streamingRuntime) throws Exception {
StreamingRuntimeContext streamingRuntime,
Set<TopicRange> excludeStartMessageIds) throws Exception {

//readerConf.putIfAbsent(PulsarOptions.SUBSCRIPTION_ROLE_OPTION_KEY, getSubscriptionName());

return new PulsarFetcher(
sourceContext,
seedTopicsWithInitialOffsets,
excludeStartMessageIds,
watermarksPeriodic,
watermarksPunctuated,
processingTimeProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pulsar.shade.com.google.common.collect.ImmutableList;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -58,6 +59,7 @@ public class PulsarFetcher<T> {
protected final SourceContext<T> sourceContext;

protected final Map<TopicRange, MessageId> seedTopicsWithInitialOffsets;
protected final Set<TopicRange> excludeStartMessageIds;

/** The lock that guarantees that record emission and state updates are atomic,
* from the view of taking a checkpoint. */
Expand Down Expand Up @@ -139,6 +141,7 @@ public PulsarFetcher(
this(
sourceContext,
seedTopicsWithInitialOffsets,
Collections.emptySet(),
watermarksPeriodic,
watermarksPunctuated,
processingTimeProvider,
Expand All @@ -156,6 +159,7 @@ public PulsarFetcher(
public PulsarFetcher(
SourceContext<T> sourceContext,
Map<TopicRange, MessageId> seedTopicsWithInitialOffsets,
Set<TopicRange> excludeStartMessageIds,
SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
ProcessingTimeService processingTimeProvider,
Expand All @@ -171,6 +175,7 @@ public PulsarFetcher(

this.sourceContext = sourceContext;
this.seedTopicsWithInitialOffsets = seedTopicsWithInitialOffsets;
this.excludeStartMessageIds = excludeStartMessageIds;
this.checkpointLock = sourceContext.getCheckpointLock();
this.userCodeClassLoader = userCodeClassLoader;
this.runtimeContext = runtimeContext;
Expand Down Expand Up @@ -558,7 +563,8 @@ protected ReaderThread createReaderThread(ExceptionProxy exceptionProxy, PulsarT
pollTimeoutMs,
exceptionProxy,
failOnDataLoss,
useEarliestWhenDataLoss);
useEarliestWhenDataLoss,
excludeStartMessageIds.contains(state.getTopicRange()));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class ReaderThread<T> extends Thread {
protected final ExceptionProxy exceptionProxy;
protected final TopicRange topicRange;
protected final MessageId startMessageId;
protected boolean excludeMessageId = false;
private boolean failOnDataLoss = true;
private boolean useEarliestWhenDataLoss = false;

Expand Down Expand Up @@ -83,10 +84,12 @@ public ReaderThread(
int pollTimeoutMs,
ExceptionProxy exceptionProxy,
boolean failOnDataLoss,
boolean useEarliestWhenDataLoss) {
boolean useEarliestWhenDataLoss,
boolean excludeMessageId) {
this(owner, state, clientConf, readerConf, deserializer, pollTimeoutMs, exceptionProxy);
this.failOnDataLoss = failOnDataLoss;
this.useEarliestWhenDataLoss = useEarliestWhenDataLoss;
this.excludeMessageId = excludeMessageId;
}

@Override
Expand All @@ -96,7 +99,7 @@ public void run() {
try {
createActualReader();

skipFirstMessageIfNeeded();
handleUnAvailedCursor();

log.info("Starting to read {} with reader thread {}", topicRange, getName());

Expand Down Expand Up @@ -125,20 +128,20 @@ protected void createActualReader() throws PulsarClientException {
.newReader()
.topic(topicRange.getTopic())
.startMessageId(startMessageId)
.startMessageIdInclusive()
.loadConf(readerConf);
log.info("Create a reader at topic {} starting from message {} (inclusive) : config = {}",
topicRange, startMessageId, readerConf);
if (!excludeMessageId){
readerBuilder.startMessageIdInclusive();
}
if (!topicRange.isFullRange()) {
readerBuilder.keyHashRange(topicRange.getPulsarRange());
}

reader = readerBuilder.create();
}

protected void skipFirstMessageIfNeeded() throws PulsarClientException {
Message<?> currentMessage = null;
MessageId currentId;
protected void handleUnAvailedCursor() throws PulsarClientException {
boolean failOnDataLoss = this.failOnDataLoss;
if (!startMessageId.equals(MessageId.earliest)
&& !startMessageId.equals(MessageId.latest)
Expand All @@ -152,7 +155,7 @@ protected void skipFirstMessageIfNeeded() throws PulsarClientException {
if (failOnDataLoss) {
log.error("the start message id is beyond the last commit message id, with topic:{}", this.topicRange);
throw new RuntimeException("start message id beyond the last commit");
} else if (useEarliestWhenDataLoss){
} else if (useEarliestWhenDataLoss) {
log.info("reset message to earliest");
reader.seek(MessageId.earliest);
metaDataReader.resetCursor(this.topicRange, MessageId.earliest);
Expand All @@ -164,40 +167,6 @@ protected void skipFirstMessageIfNeeded() throws PulsarClientException {
}

}
while (currentMessage == null && running) {
currentMessage = reader.readNext(pollTimeoutMs, TimeUnit.MILLISECONDS);
if (failOnDataLoss) {
break;
}
}
if (currentMessage == null) {
reportDataLoss(String.format("Cannot read data at offset %s from topic: %s",
startMessageId.toString(),
topicRange));
} else {
currentId = currentMessage.getMessageId();
state.setOffset(currentId);
if (!messageIdRoughEquals(currentId, startMessageId) && failOnDataLoss) {
reportDataLoss(
String.format(
"Potential Data Loss in reading %s: intended to start at %s, actually we get %s",
topicRange, startMessageId.toString(), currentId.toString()));
}

if (startMessageId instanceof BatchMessageIdImpl && currentId instanceof BatchMessageIdImpl) {
// we seek using a batch message id, we can read next directly later
} else if (startMessageId instanceof MessageIdImpl && currentId instanceof BatchMessageIdImpl) {
// we seek using a message id, this is supposed to be read by previous task since it's
// inclusive for the checkpoint, so we skip this batch
BatchMessageIdImpl cbmid = (BatchMessageIdImpl) currentId;

MessageIdImpl newStart =
new MessageIdImpl(cbmid.getLedgerId(), cbmid.getEntryId() + 1, cbmid.getPartitionIndex());
reader.seek(newStart);
} else if (startMessageId instanceof MessageIdImpl && currentId instanceof MessageIdImpl) {
// current entry is a non-batch entry, we can read next directly later
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ public void testStartFromSpecific() throws Exception {
-20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty());

Map<String, Set<Integer>> expectedData = new HashMap<>();
expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));
expectedData.put(topic, new HashSet<>(Arrays.asList(1, 2, 3, 10, 11, 12)));

Map<String, MessageId> offset = new HashMap<>();
offset.put(topic, mids.get(3));
Expand All @@ -550,7 +550,7 @@ public void testStartFromSpecific() throws Exception {
DataStream stream = see.addSource(
new FlinkPulsarSource<>(serviceUrl, adminUrl, new AtomicRowDeserializationSchemaWrapper(
new AtomicRowDeserializationSchema(Integer.class.getCanonicalName(), true)), sourceProps).setStartFromSpecificOffsets(offset));
stream.flatMap(new CheckAllMessageExist(expectedData, 5, 2)).setParallelism(1);
stream.flatMap(new CheckAllMessageExist(expectedData, 6, 2)).setParallelism(1);

TestUtils.tryExecute(see, "start from specific");
}
Expand All @@ -569,7 +569,7 @@ public void testStartFromExternalSubscription() throws Exception {
admin.topics().createSubscription(TopicName.get(topic).toString(), subName, mids.get(3));

Map<String, Set<Integer>> expectedData = new HashMap<>();
expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));
expectedData.put(topic, new HashSet<>(Arrays.asList(1, 2, 3, 10, 11, 12)));

StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
Expand All @@ -580,7 +580,7 @@ public void testStartFromExternalSubscription() throws Exception {

DataStream stream = see.addSource(new FlinkPulsarSource<Row>(serviceUrl, adminUrl, new AtomicRowDeserializationSchemaWrapper(
new AtomicRowDeserializationSchema(Integer.class.getCanonicalName(), true)), sourceProps).setStartFromSubscription(subName));
stream.flatMap(new CheckAllMessageExist(expectedData, 5, 2)).setParallelism(1);
stream.flatMap(new CheckAllMessageExist(expectedData, 6, 2)).setParallelism(1);

TestUtils.tryExecute(see, "start from specific");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,8 @@ protected PulsarFetcher<T> createFetcher(
ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
StreamingRuntimeContext streamingRuntime) throws Exception {
StreamingRuntimeContext streamingRuntime,
Set<TopicRange> inclusiveStartMessageIds) throws Exception {
return new TestingFetcher<>(sourceContext, seedTopicsWithInitialOffsets, watermarksPeriodic,
watermarksPunctuated, processingTimeProvider, autoWatermarkInterval);
}
Expand Down Expand Up @@ -664,7 +665,8 @@ protected PulsarFetcher<T> createFetcher(
ProcessingTimeService processingTimeProvider,
long autoWatermarkInterval,
ClassLoader userCodeClassLoader,
StreamingRuntimeContext streamingRuntime) throws Exception {
StreamingRuntimeContext streamingRuntime,
Set<TopicRange> inclusiveStartMessageIds) throws Exception {
return testFetcherSupplier.get();
}

Expand Down

0 comments on commit 1bcfb14

Please sign in to comment.