fix: Disconnect stalling peers after timeout of header sync

This commit is contained in:
p-j01 2021-06-13 23:33:42 +00:00
parent 51aca8e38d
commit 4c68245cf7
2 changed files with 40 additions and 3 deletions

View file

@ -169,6 +169,8 @@ struct CNodeState {
int nUnconnectingHeaders;
//! Whether we've started headers synchronization with this peer.
bool fSyncStarted;
//! When to potentially disconnect peer for stalling headers download
int64_t nHeadersSyncTimeout;
//! Since when we're stalling block download progress (in microseconds), or 0.
int64_t nStallingSince;
std::list<QueuedBlock> vBlocksInFlight;
@ -208,6 +210,7 @@ struct CNodeState {
pindexBestHeaderSent = NULL;
nUnconnectingHeaders = 0;
fSyncStarted = false;
nHeadersSyncTimeout = 0;
nStallingSince = 0;
nDownloadingSince = 0;
nBlocksInFlight = 0;
@ -2899,6 +2902,7 @@ bool SendMessages(CNode* pto, CConnman& connman, const std::atomic<bool>& interr
// Only actively request headers from a single peer, unless we're close to today.
if ((nSyncStarted == 0 && fFetch) || pindexBestHeader->GetBlockTime() > GetAdjustedTime() - 24 * 60 * 60) {
state.fSyncStarted = true;
state.nHeadersSyncTimeout = GetTimeMicros() + HEADERS_DOWNLOAD_TIMEOUT_BASE + HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER * (GetAdjustedTime() - pindexBestHeader->GetBlockTime())/(consensusParams.nPowTargetSpacing);
nSyncStarted++;
const CBlockIndex *pindexStart = pindexBestHeader;
/* If possible, start at the block preceding the currently
@ -3197,7 +3201,6 @@ bool SendMessages(CNode* pto, CConnman& connman, const std::atomic<bool>& interr
}
if (!vInv.empty())
connman.PushMessage(pto, msgMaker.Make(NetMsgType::INV, vInv));
// Detect whether we're stalling
nNow = GetTimeMicros();
if (state.nStallingSince && state.nStallingSince < nNow - 1000000 * BLOCK_STALLING_TIMEOUT) {
@ -3222,7 +3225,38 @@ bool SendMessages(CNode* pto, CConnman& connman, const std::atomic<bool>& interr
return true;
}
}
// Check for headers sync timeouts
if (state.fSyncStarted && state.nHeadersSyncTimeout < std::numeric_limits<int64_t>::max()) {
// Detect whether this is a stalling initial-headers-sync peer
if (pindexBestHeader->GetBlockTime() <= GetAdjustedTime() - 24*60*60) {
if (nNow > state.nHeadersSyncTimeout && nSyncStarted == 1 && (nPreferredDownload - state.fPreferredDownload >= 1)) {
// Disconnect a (non-whitelisted) peer if it is our only sync peer,
// and we have others we could be using instead.
// Note: If all our peers are inbound, then we won't
// disconnect our sync peer for stalling; we have bigger
// problems if we can't get any outbound peers.
if (!pto->fWhitelisted) {
LogPrintf("Timeout downloading headers from peer=%d, disconnecting\n", pto->GetId());
pto->fDisconnect = true;
return true;
} else {
LogPrintf("Timeout downloading headers from whitelisted peer=%d, not disconnecting\n", pto->GetId());
// Reset the headers sync state so that we have a
// chance to try downloading from a different peer.
// Note: this will also result in at least one more
// getheaders message to be sent to
// this peer (eventually).
state.fSyncStarted = false;
nSyncStarted--;
state.nHeadersSyncTimeout = 0;
}
}
} else {
// After we've caught up once, reset the timeout so we can't trigger
// disconnect later.
state.nHeadersSyncTimeout = std::numeric_limits<int64_t>::max();
}
}
//
// Message: getdata (blocks)
//

View file

@ -17,7 +17,10 @@ static const int64_t ORPHAN_TX_EXPIRE_TIME = 20 * 60;
static const int64_t ORPHAN_TX_EXPIRE_INTERVAL = 5 * 60;
/** Default number of orphan+recently-replaced txn to keep around for block reconstruction */
static const unsigned int DEFAULT_BLOCK_RECONSTRUCTION_EXTRA_TXN = 100;
/** Headers download timeout expressed in microseconds
* Timeout = base + per_header * (expected number of headers) */
static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_BASE = 15 * 60 * 1000000; // 15 minutes
static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/header
/** Register with a network node to receive its signals */
void RegisterNodeSignals(CNodeSignals& nodeSignals);
/** Unregister a network node */