Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(proposer): split proofs that failed multiple times #346

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions proposer/op/proposer/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,19 @@ func (db *ProofDB) GetConsecutiveSpanProofs(start, end uint64) ([][]byte, error)

return result, nil
}

// Get the proofs with start block and end block of a specific status.
func (db *ProofDB) GetProofRequestsWithBlockRangeAndStatus(proofType proofrequest.Type, startBlock, endBlock uint64, status proofrequest.Status) ([]*ent.ProofRequest, error) {
proofs, err := db.readClient.ProofRequest.Query().
Where(
proofrequest.TypeEQ(proofType),
proofrequest.StartBlockEQ(startBlock),
proofrequest.EndBlockEQ(endBlock),
proofrequest.StatusEQ(status),
).
All(context.Background())
if err != nil {
return nil, fmt.Errorf("failed to query proofs with block range and status: %w", err)
}
return proofs, nil
}
31 changes: 25 additions & 6 deletions proposer/op/proposer/prove.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (l *L2OutputSubmitter) ProcessWitnessgenRequests() error {

// Retry a proof request. Sets the status of a proof to FAILED and retries the proof based on the optional proof status response.
// If an error response is received:
// - Range Proof: Split in two if the block range is > 1. Retry the same request if range is 1 block.
// - Range Proof: Split in two if the block range is > 1 AND the proof is unexecutable OR has failed before. Retry the same request if range is 1 block.
// - Agg Proof: Retry the same request.
func (l *L2OutputSubmitter) RetryRequest(req *ent.ProofRequest, status ProofStatusResponse) error {
err := l.db.UpdateProofStatus(req.ID, proofrequest.StatusFAILED)
Expand All @@ -90,11 +90,30 @@ func (l *L2OutputSubmitter) RetryRequest(req *ent.ProofRequest, status ProofStat
return err
}

// If there's an execution error AND the request is a SPAN proof AND the block range is > 1, split the request into two requests.
// This is likely caused by an SP1 OOM due to a large block range with many transactions.
// TODO: This solution can be removed once the embedded allocator is used, because then the programs
// will never OOM.
if req.Type == proofrequest.TypeSPAN && status.ExecutionStatus == SP1ExecutionStatusUnexecutable && req.EndBlock-req.StartBlock > 1 {
unexecutable := status.ExecutionStatus == SP1ExecutionStatusUnexecutable
spanProof := req.Type == proofrequest.TypeSPAN
multiBlockRange := req.EndBlock-req.StartBlock > 1

// Get the number of failed requests with the same block range and status.
prevFailedReq, err := l.db.GetProofRequestsWithBlockRangeAndStatus(req.Type, req.StartBlock, req.EndBlock, proofrequest.StatusFAILED)
if err != nil {
l.Log.Error("failed to check for previous failures", "err", err)
return err
}

// Check if there is another proof (besides the one marked as failed above) with the same block range that also failed.
severalFailedRequests := len(prevFailedReq) > 1

// If there's an execution error OR several failed requests AND the request is a SPAN proof AND the block range is > 1,
// split the request into two requests.
//
// If the embedded allocator is enabled, the proof will never be unexecutable. Instead, the issue is because there's a limit on the number
// of shards in V4. This will be fixed in V5 when the cycle limit is removed.
//
// If the embedded allocator is not enabled, the trigger for unexecutable is the SP1 OOM.
//
// The reason why we only split with multiple failed requests is to avoid transient errors causing unnecessary splits.
if spanProof && (unexecutable || severalFailedRequests) && multiBlockRange {
// Split the request into two requests.
midBlock := (req.StartBlock + req.EndBlock) / 2
err = l.db.NewEntry(req.Type, req.StartBlock, midBlock)
Expand Down
Loading