Fix up chain resync deadlock during cascade failure
This commit is contained in:
@@ -754,6 +754,8 @@ bool SideChain::add_external_block(PoolBlock& block, std::vector<hash>& missing_
|
||||
if (added && block.m_verified) {
|
||||
if (block.m_invalid) {
|
||||
on_block_rejected(&block, "external block validation failed");
|
||||
// Check for deferred recovery after on_block_rejected (we're outside the lock here)
|
||||
check_and_run_deferred_recovery();
|
||||
} else {
|
||||
on_block_accepted();
|
||||
}
|
||||
@@ -791,6 +793,8 @@ bool SideChain::add_block(const PoolBlock& block)
|
||||
prune_seen_data();
|
||||
}
|
||||
|
||||
// Scope the lock so we can check for deferred recovery after releasing it
|
||||
{
|
||||
WriteLock lock(m_sidechainLock);
|
||||
|
||||
auto result = m_blocksById.insert({ new_block->m_sidechainId, new_block });
|
||||
@@ -832,6 +836,11 @@ bool SideChain::add_block(const PoolBlock& block)
|
||||
else {
|
||||
verify_loop(new_block);
|
||||
}
|
||||
} // Lock released here
|
||||
|
||||
// Check if recovery was triggered during verify_loop and execute it now
|
||||
// (must be done outside the lock to avoid deadlock)
|
||||
check_and_run_deferred_recovery();
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -2716,6 +2725,8 @@ void SideChain::get_missing_blocks(unordered_set<hash>& missing_blocks) const
|
||||
|
||||
void SideChain::retry_unverified_blocks()
|
||||
{
|
||||
// Scope the lock so we can check for deferred recovery after releasing it
|
||||
{
|
||||
WriteLock lock(m_sidechainLock);
|
||||
|
||||
// Scan for unverified blocks and retry them
|
||||
@@ -2756,6 +2767,10 @@ void SideChain::retry_unverified_blocks()
|
||||
if (verified_count > 0) {
|
||||
LOGINFO(3, "Verified " << verified_count << " blocks after mainchain update");
|
||||
}
|
||||
} // Lock released here
|
||||
|
||||
// Check if recovery was triggered during verify_loop and execute it now
|
||||
check_and_run_deferred_recovery();
|
||||
}
|
||||
|
||||
bool SideChain::consider_peer_genesis(const hash& genesis_id, uint64_t timestamp, uint64_t height)
|
||||
@@ -3255,8 +3270,21 @@ void SideChain::trigger_recovery(uint64_t failure_height)
|
||||
|
||||
m_pendingRecoveryHeight = recovery_checkpoint;
|
||||
|
||||
// Request checkpoint validation from peers via P2P server
|
||||
// NOTE: Don't call request_checkpoint_validation() here!
|
||||
// We may be called while holding m_sidechainLock (from verify_loop -> on_block_rejected).
|
||||
// The actual recovery will be executed by check_and_run_deferred_recovery() after
|
||||
// the lock is released.
|
||||
}
|
||||
|
||||
void SideChain::check_and_run_deferred_recovery()
|
||||
{
|
||||
// This function should be called AFTER releasing m_sidechainLock
|
||||
// It checks if recovery was triggered and executes it
|
||||
uint64_t checkpoint_height = m_pendingRecoveryHeight.load();
|
||||
if (checkpoint_height > 0 && m_recoveryMode.load()) {
|
||||
LOGINFO(0, "Executing deferred recovery to checkpoint " << checkpoint_height);
|
||||
request_checkpoint_validation();
|
||||
}
|
||||
}
|
||||
|
||||
void SideChain::request_checkpoint_validation()
|
||||
|
||||
@@ -110,6 +110,7 @@ public:
|
||||
// Recovery
|
||||
void trigger_recovery(uint64_t failure_height);
|
||||
void reset_to_checkpoint(uint64_t checkpoint_height);
|
||||
void check_and_run_deferred_recovery();
|
||||
bool is_in_recovery() const { return m_recoveryMode.load(); }
|
||||
|
||||
[[nodiscard]] FORCEINLINE difficulty_type difficulty() const { ReadLock lock(m_curDifficultyLock); return m_curDifficulty; }
|
||||
|
||||
Reference in New Issue
Block a user