Merge #15402: Granular invalidateblock and RewindBlockIndex

519b0bc5dc Make last disconnected block BLOCK_FAILED_VALID, even when aborted (Pieter Wuille)
8d220417cd Optimization: don't add txn back to mempool after 10 invalidates (Pieter Wuille)
9ce9c37004 Prevent callback overruns in InvalidateBlock and RewindBlockIndex (Pieter Wuille)
9bb32eb571 Release cs_main during InvalidateBlock iterations (Pieter Wuille)
9b1ff5c742 Call InvalidateBlock without cs_main held (Pieter Wuille)
241b2c74ac Make RewindBlockIndex interruptible (Pieter Wuille)
880ce7d46b Call RewindBlockIndex without cs_main held (Pieter Wuille)
436f7d735f Release cs_main during RewindBlockIndex operation (Pieter Wuille)
1d342875c2 Merge the disconnection and erasing loops in RewindBlockIndex (Pieter Wuille)
32b2696ab4 Move erasure of non-active blocks to a separate loop in RewindBlockIndex (Pieter Wuille)
9d6dcc52c6 Abstract EraseBlockData out of RewindBlockIndex (Pieter Wuille)

Pull request description:

  This PR makes a number of improvements to the InvalidateBlock (`invalidateblock` RPC) and RewindBlockIndex functions, primarily around breaking up their long-term cs_main holding. In addition:
  * They're made safely interruptible (`bitcoind` can be shutdown, and no progress in either will be lost, though if incomplete, `invalidateblock` won't continue after restart and will need to be called again)
  * The validation queue is prevented from overflowing (meaning `invalidateblock` on a very old block will not drive bitcoind OOM) (see #14289).
  * `invalidateblock` won't bother to move transactions back into the mempool after 10 blocks (optimization).

  This is not an optimal solution, as we're relying on the scheduler call sites to make sure the scheduler doesn't overflow. Ideally, the scheduler would guarantee this directly, but that needs a few further changes (moving the signal emissions out of cs_main) to prevent deadlocks.

  I have manually tested the `invalidateblock` changes (including interrupting, and running with -checkblockindex and -checkmempool), but haven't tried the rewinding (which is probably becoming increasingly unnecessary, as very few pre-0.13.1 nodes remain that would care to upgrade).

Tree-SHA512: 692e42758bd3d3efc2eb701984a8cb5db25fbeee32e7575df0183a00d0c2c30fdf72ce64c7625c32ad8c8bdc56313da72a7471658faeb0d39eefe39c4b8b8474
This commit is contained in:
Wladimir J. van der Laan 2019-03-07 17:39:54 +01:00
commit 3db0cc3947
No known key found for this signature in database
GPG key ID: 1E4AED62986CD25D
4 changed files with 212 additions and 138 deletions

View file

@ -1468,11 +1468,11 @@ bool AppInitMain(InitInterfaces& interfaces)
uiInterface.InitMessage(_("Loading block index..."));
LOCK(cs_main);
do {
const int64_t load_block_index_start_time = GetTimeMillis();
bool is_coinsview_empty;
try {
LOCK(cs_main);
UnloadBlockIndex();
pcoinsTip.reset();
pcoinsdbview.reset();
@ -1544,7 +1544,7 @@ bool AppInitMain(InitInterfaces& interfaces)
// The on-disk coinsdb is now in a good state, create the cache
pcoinsTip.reset(new CCoinsViewCache(pcoinscatcher.get()));
bool is_coinsview_empty = fReset || fReindexChainState || pcoinsTip->GetBestBlock().IsNull();
is_coinsview_empty = fReset || fReindexChainState || pcoinsTip->GetBestBlock().IsNull();
if (!is_coinsview_empty) {
// LoadChainTip sets chainActive based on pcoinsTip's best block
if (!LoadChainTip(chainparams)) {
@ -1553,18 +1553,25 @@ bool AppInitMain(InitInterfaces& interfaces)
}
assert(chainActive.Tip() != nullptr);
}
} catch (const std::exception& e) {
LogPrintf("%s\n", e.what());
strLoadError = _("Error opening block database");
break;
}
if (!fReset) {
// Note that RewindBlockIndex MUST run even if we're about to -reindex-chainstate.
// It both disconnects blocks based on chainActive, and drops block data in
// mapBlockIndex based on lack of available witness data.
uiInterface.InitMessage(_("Rewinding blocks..."));
if (!RewindBlockIndex(chainparams)) {
strLoadError = _("Unable to rewind the database to a pre-fork state. You will need to redownload the blockchain");
break;
}
if (!fReset) {
// Note that RewindBlockIndex MUST run even if we're about to -reindex-chainstate.
// It both disconnects blocks based on chainActive, and drops block data in
// mapBlockIndex based on lack of available witness data.
uiInterface.InitMessage(_("Rewinding blocks..."));
if (!RewindBlockIndex(chainparams)) {
strLoadError = _("Unable to rewind the database to a pre-fork state. You will need to redownload the blockchain");
break;
}
}
try {
LOCK(cs_main);
if (!is_coinsview_empty) {
uiInterface.InitMessage(_("Verifying blocks..."));
if (fHavePruned && gArgs.GetArg("-checkblocks", DEFAULT_CHECKBLOCKS) > MIN_BLOCKS_TO_KEEP) {

View file

@ -1579,15 +1579,15 @@ static UniValue invalidateblock(const JSONRPCRequest& request)
uint256 hash(ParseHashV(request.params[0], "blockhash"));
CValidationState state;
CBlockIndex* pblockindex;
{
LOCK(cs_main);
CBlockIndex* pblockindex = LookupBlockIndex(hash);
pblockindex = LookupBlockIndex(hash);
if (!pblockindex) {
throw JSONRPCError(RPC_INVALID_ADDRESS_OR_KEY, "Block not found");
}
InvalidateBlock(state, Params(), pblockindex);
}
InvalidateBlock(state, Params(), pblockindex);
if (state.IsValid()) {
ActivateBestChain(state, Params());

View file

@ -178,7 +178,7 @@ public:
// Manual block validity manipulation:
bool PreciousBlock(CValidationState& state, const CChainParams& params, CBlockIndex* pindex) LOCKS_EXCLUDED(cs_main);
bool InvalidateBlock(CValidationState& state, const CChainParams& chainparams, CBlockIndex* pindex) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
bool InvalidateBlock(CValidationState& state, const CChainParams& chainparams, CBlockIndex* pindex);
void ResetBlockFailureFlags(CBlockIndex* pindex) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
bool ReplayBlocks(const CChainParams& params, CCoinsView* view);
@ -207,8 +207,10 @@ private:
CBlockIndex* FindMostWorkChain() EXCLUSIVE_LOCKS_REQUIRED(cs_main);
void ReceivedBlockTransactions(const CBlock& block, CBlockIndex* pindexNew, const FlatFilePos& pos, const Consensus::Params& consensusParams) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
bool RollforwardBlock(const CBlockIndex* pindex, CCoinsViewCache& inputs, const CChainParams& params) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
//! Mark a block as not having block data
void EraseBlockData(CBlockIndex* index) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
} g_chainstate;
/**
@ -2626,6 +2628,14 @@ static void NotifyHeaderTip() LOCKS_EXCLUDED(cs_main) {
}
}
static void LimitValidationInterfaceQueue() {
AssertLockNotHeld(cs_main);
if (GetMainSignals().CallbacksPending() > 10) {
SyncWithValidationInterfaceQueue();
}
}
/**
* Make the best chain active, in multiple steps. The result is either failure
* or an activated best chain. pblock is either nullptr or a pointer to a block
@ -2654,15 +2664,13 @@ bool CChainState::ActivateBestChain(CValidationState &state, const CChainParams&
do {
boost::this_thread::interruption_point();
if (GetMainSignals().CallbacksPending() > 10) {
// Block until the validation queue drains. This should largely
// never happen in normal operation, however may happen during
// reindex, causing memory blowup if we run too far ahead.
// Note that if a validationinterface callback ends up calling
// ActivateBestChain this may lead to a deadlock! We should
// probably have a DEBUG_LOCKORDER test for this in the future.
SyncWithValidationInterfaceQueue();
}
// Block until the validation queue drains. This should largely
// never happen in normal operation, however may happen during
// reindex, causing memory blowup if we run too far ahead.
// Note that if a validationinterface callback ends up calling
// ActivateBestChain this may lead to a deadlock! We should
// probably have a DEBUG_LOCKORDER test for this in the future.
LimitValidationInterfaceQueue();
{
LOCK(cs_main);
@ -2773,64 +2781,85 @@ bool PreciousBlock(CValidationState& state, const CChainParams& params, CBlockIn
bool CChainState::InvalidateBlock(CValidationState& state, const CChainParams& chainparams, CBlockIndex *pindex)
{
AssertLockHeld(cs_main);
// We first disconnect backwards and then mark the blocks as invalid.
// This prevents a case where pruned nodes may fail to invalidateblock
// and be left unable to start as they have no tip candidates (as there
// are no blocks that meet the "have data and are not invalid per
// nStatus" criteria for inclusion in setBlockIndexCandidates).
CBlockIndex* to_mark_failed = pindex;
bool pindex_was_in_chain = false;
CBlockIndex *invalid_walk_tip = chainActive.Tip();
int disconnected = 0;
DisconnectedBlockTransactions disconnectpool;
while (chainActive.Contains(pindex)) {
// Disconnect (descendants of) pindex, and mark them invalid.
while (true) {
if (ShutdownRequested()) break;
// Make sure the queue of validation callbacks doesn't grow unboundedly.
LimitValidationInterfaceQueue();
LOCK(cs_main);
if (!chainActive.Contains(pindex)) break;
pindex_was_in_chain = true;
CBlockIndex *invalid_walk_tip = chainActive.Tip();
// ActivateBestChain considers blocks already in chainActive
// unconditionally valid already, so force disconnect away from it.
if (!DisconnectTip(state, chainparams, &disconnectpool)) {
// It's probably hopeless to try to make the mempool consistent
// here if DisconnectTip failed, but we can try.
UpdateMempoolForReorg(disconnectpool, false);
return false;
}
}
DisconnectedBlockTransactions disconnectpool;
bool ret = DisconnectTip(state, chainparams, &disconnectpool);
// DisconnectTip will add transactions to disconnectpool.
// Adjust the mempool to be consistent with the new tip, adding
// transactions back to the mempool if disconnecting was succesful,
// and we're not doing a very deep invalidation (in which case
// keeping the mempool up to date is probably futile anyway).
UpdateMempoolForReorg(disconnectpool, /* fAddToMempool = */ (++disconnected <= 10) && ret);
if (!ret) return false;
assert(invalid_walk_tip->pprev == chainActive.Tip());
// Now mark the blocks we just disconnected as descendants invalid
// (note this may not be all descendants).
while (pindex_was_in_chain && invalid_walk_tip != pindex) {
invalid_walk_tip->nStatus |= BLOCK_FAILED_CHILD;
// We immediately mark the disconnected blocks as invalid.
// This prevents a case where pruned nodes may fail to invalidateblock
// and be left unable to start as they have no tip candidates (as there
// are no blocks that meet the "have data and are not invalid per
// nStatus" criteria for inclusion in setBlockIndexCandidates).
invalid_walk_tip->nStatus |= BLOCK_FAILED_VALID;
setDirtyBlockIndex.insert(invalid_walk_tip);
setBlockIndexCandidates.erase(invalid_walk_tip);
invalid_walk_tip = invalid_walk_tip->pprev;
}
// Mark the block itself as invalid.
pindex->nStatus |= BLOCK_FAILED_VALID;
setDirtyBlockIndex.insert(pindex);
setBlockIndexCandidates.erase(pindex);
m_failed_blocks.insert(pindex);
// DisconnectTip will add transactions to disconnectpool; try to add these
// back to the mempool.
UpdateMempoolForReorg(disconnectpool, true);
// The resulting new best tip may not be in setBlockIndexCandidates anymore, so
// add it again.
BlockMap::iterator it = mapBlockIndex.begin();
while (it != mapBlockIndex.end()) {
if (it->second->IsValid(BLOCK_VALID_TRANSACTIONS) && it->second->HaveTxsDownloaded() && !setBlockIndexCandidates.value_comp()(it->second, chainActive.Tip())) {
setBlockIndexCandidates.insert(it->second);
setBlockIndexCandidates.insert(invalid_walk_tip->pprev);
if (invalid_walk_tip->pprev == to_mark_failed && (to_mark_failed->nStatus & BLOCK_FAILED_VALID)) {
// We only want to mark the last disconnected block as BLOCK_FAILED_VALID; its children
// need to be BLOCK_FAILED_CHILD instead.
to_mark_failed->nStatus = (to_mark_failed->nStatus ^ BLOCK_FAILED_VALID) | BLOCK_FAILED_CHILD;
setDirtyBlockIndex.insert(to_mark_failed);
}
it++;
// Track the last disconnected block, so we can correct its BLOCK_FAILED_CHILD status in future
// iterations, or, if it's the last one, call InvalidChainFound on it.
to_mark_failed = invalid_walk_tip;
}
InvalidChainFound(pindex);
{
LOCK(cs_main);
if (chainActive.Contains(to_mark_failed)) {
// If the to-be-marked invalid block is in the active chain, something is interfering and we can't proceed.
return false;
}
// Mark pindex (or the last disconnected block) as invalid, even when it never was in the main chain
to_mark_failed->nStatus |= BLOCK_FAILED_VALID;
setDirtyBlockIndex.insert(to_mark_failed);
setBlockIndexCandidates.erase(to_mark_failed);
m_failed_blocks.insert(to_mark_failed);
// The resulting new best tip may not be in setBlockIndexCandidates anymore, so
// add it again.
BlockMap::iterator it = mapBlockIndex.begin();
while (it != mapBlockIndex.end()) {
if (it->second->IsValid(BLOCK_VALID_TRANSACTIONS) && it->second->HaveTxsDownloaded() && !setBlockIndexCandidates.value_comp()(it->second, chainActive.Tip())) {
setBlockIndexCandidates.insert(it->second);
}
it++;
}
InvalidChainFound(to_mark_failed);
}
// Only notify about a new block tip if the active chain was modified.
if (pindex_was_in_chain) {
uiInterface.NotifyBlockTip(IsInitialBlockDownload(), pindex->pprev);
uiInterface.NotifyBlockTip(IsInitialBlockDownload(), to_mark_failed->pprev);
}
return true;
}
@ -4113,38 +4142,114 @@ bool ReplayBlocks(const CChainParams& params, CCoinsView* view) {
return g_chainstate.ReplayBlocks(params, view);
}
//! Helper for CChainState::RewindBlockIndex
void CChainState::EraseBlockData(CBlockIndex* index)
{
AssertLockHeld(cs_main);
assert(!chainActive.Contains(index)); // Make sure this block isn't active
// Reduce validity
index->nStatus = std::min<unsigned int>(index->nStatus & BLOCK_VALID_MASK, BLOCK_VALID_TREE) | (index->nStatus & ~BLOCK_VALID_MASK);
// Remove have-data flags.
index->nStatus &= ~(BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO);
// Remove storage location.
index->nFile = 0;
index->nDataPos = 0;
index->nUndoPos = 0;
// Remove various other things
index->nTx = 0;
index->nChainTx = 0;
index->nSequenceId = 0;
// Make sure it gets written.
setDirtyBlockIndex.insert(index);
// Update indexes
setBlockIndexCandidates.erase(index);
std::pair<std::multimap<CBlockIndex*, CBlockIndex*>::iterator, std::multimap<CBlockIndex*, CBlockIndex*>::iterator> ret = mapBlocksUnlinked.equal_range(index->pprev);
while (ret.first != ret.second) {
if (ret.first->second == index) {
mapBlocksUnlinked.erase(ret.first++);
} else {
++ret.first;
}
}
// Mark parent as eligible for main chain again
if (index->pprev && index->pprev->IsValid(BLOCK_VALID_TRANSACTIONS) && index->pprev->HaveTxsDownloaded()) {
setBlockIndexCandidates.insert(index->pprev);
}
}
bool CChainState::RewindBlockIndex(const CChainParams& params)
{
LOCK(cs_main);
// Note that during -reindex-chainstate we are called with an empty chainActive!
int nHeight = 1;
while (nHeight <= chainActive.Height()) {
// Although SCRIPT_VERIFY_WITNESS is now generally enforced on all
// blocks in ConnectBlock, we don't need to go back and
// re-download/re-verify blocks from before segwit actually activated.
if (IsWitnessEnabled(chainActive[nHeight - 1], params.GetConsensus()) && !(chainActive[nHeight]->nStatus & BLOCK_OPT_WITNESS)) {
break;
// First erase all post-segwit blocks without witness not in the main chain,
// as this can we done without costly DisconnectTip calls. Active
// blocks will be dealt with below (releasing cs_main in between).
{
LOCK(cs_main);
for (const auto& entry : mapBlockIndex) {
if (IsWitnessEnabled(entry.second->pprev, params.GetConsensus()) && !(entry.second->nStatus & BLOCK_OPT_WITNESS) && !chainActive.Contains(entry.second)) {
EraseBlockData(entry.second);
}
}
nHeight++;
}
// Find what height we need to reorganize to.
CBlockIndex *tip;
int nHeight = 1;
{
LOCK(cs_main);
while (nHeight <= chainActive.Height()) {
// Although SCRIPT_VERIFY_WITNESS is now generally enforced on all
// blocks in ConnectBlock, we don't need to go back and
// re-download/re-verify blocks from before segwit actually activated.
if (IsWitnessEnabled(chainActive[nHeight - 1], params.GetConsensus()) && !(chainActive[nHeight]->nStatus & BLOCK_OPT_WITNESS)) {
break;
}
nHeight++;
}
tip = chainActive.Tip();
}
// nHeight is now the height of the first insufficiently-validated block, or tipheight + 1
CValidationState state;
CBlockIndex* pindex = chainActive.Tip();
while (chainActive.Height() >= nHeight) {
if (fPruneMode && !(chainActive.Tip()->nStatus & BLOCK_HAVE_DATA)) {
// If pruning, don't try rewinding past the HAVE_DATA point;
// since older blocks can't be served anyway, there's
// no need to walk further, and trying to DisconnectTip()
// will fail (and require a needless reindex/redownload
// of the blockchain).
break;
}
if (!DisconnectTip(state, params, nullptr)) {
return error("RewindBlockIndex: unable to disconnect block at height %i (%s)", pindex->nHeight, FormatStateMessage(state));
// Loop until the tip is below nHeight, or we reach a pruned block.
while (!ShutdownRequested()) {
{
LOCK(cs_main);
// Make sure nothing changed from under us (this won't happen because RewindBlockIndex runs before importing/network are active)
assert(tip == chainActive.Tip());
if (tip == nullptr || tip->nHeight < nHeight) break;
if (fPruneMode && !(tip->nStatus & BLOCK_HAVE_DATA)) {
// If pruning, don't try rewinding past the HAVE_DATA point;
// since older blocks can't be served anyway, there's
// no need to walk further, and trying to DisconnectTip()
// will fail (and require a needless reindex/redownload
// of the blockchain).
break;
}
// Disconnect block
if (!DisconnectTip(state, params, nullptr)) {
return error("RewindBlockIndex: unable to disconnect block at height %i (%s)", tip->nHeight, FormatStateMessage(state));
}
// Reduce validity flag and have-data flags.
// We do this after actual disconnecting, otherwise we'll end up writing the lack of data
// to disk before writing the chainstate, resulting in a failure to continue if interrupted.
// Note: If we encounter an insufficiently validated block that
// is on chainActive, it must be because we are a pruning node, and
// this block or some successor doesn't HAVE_DATA, so we were unable to
// rewind all the way. Blocks remaining on chainActive at this point
// must not have their validity reduced.
EraseBlockData(tip);
tip = tip->pprev;
}
// Make sure the queue of validation callbacks doesn't grow unboundedly.
LimitValidationInterfaceQueue();
// Occasionally flush state to disk.
if (!FlushStateToDisk(params, state, FlushStateMode::PERIODIC)) {
LogPrintf("RewindBlockIndex: unable to flush state to disk (%s)\n", FormatStateMessage(state));
@ -4152,55 +4257,17 @@ bool CChainState::RewindBlockIndex(const CChainParams& params)
}
}
// Reduce validity flag and have-data flags.
// We do this after actual disconnecting, otherwise we'll end up writing the lack of data
// to disk before writing the chainstate, resulting in a failure to continue if interrupted.
for (const auto& entry : mapBlockIndex) {
CBlockIndex* pindexIter = entry.second;
{
LOCK(cs_main);
if (chainActive.Tip() != nullptr) {
// We can't prune block index candidates based on our tip if we have
// no tip due to chainActive being empty!
PruneBlockIndexCandidates();
// Note: If we encounter an insufficiently validated block that
// is on chainActive, it must be because we are a pruning node, and
// this block or some successor doesn't HAVE_DATA, so we were unable to
// rewind all the way. Blocks remaining on chainActive at this point
// must not have their validity reduced.
if (IsWitnessEnabled(pindexIter->pprev, params.GetConsensus()) && !(pindexIter->nStatus & BLOCK_OPT_WITNESS) && !chainActive.Contains(pindexIter)) {
// Reduce validity
pindexIter->nStatus = std::min<unsigned int>(pindexIter->nStatus & BLOCK_VALID_MASK, BLOCK_VALID_TREE) | (pindexIter->nStatus & ~BLOCK_VALID_MASK);
// Remove have-data flags.
pindexIter->nStatus &= ~(BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO);
// Remove storage location.
pindexIter->nFile = 0;
pindexIter->nDataPos = 0;
pindexIter->nUndoPos = 0;
// Remove various other things
pindexIter->nTx = 0;
pindexIter->nChainTx = 0;
pindexIter->nSequenceId = 0;
// Make sure it gets written.
setDirtyBlockIndex.insert(pindexIter);
// Update indexes
setBlockIndexCandidates.erase(pindexIter);
std::pair<std::multimap<CBlockIndex*, CBlockIndex*>::iterator, std::multimap<CBlockIndex*, CBlockIndex*>::iterator> ret = mapBlocksUnlinked.equal_range(pindexIter->pprev);
while (ret.first != ret.second) {
if (ret.first->second == pindexIter) {
mapBlocksUnlinked.erase(ret.first++);
} else {
++ret.first;
}
}
} else if (pindexIter->IsValid(BLOCK_VALID_TRANSACTIONS) && pindexIter->HaveTxsDownloaded()) {
setBlockIndexCandidates.insert(pindexIter);
CheckBlockIndex(params.GetConsensus());
}
}
if (chainActive.Tip() != nullptr) {
// We can't prune block index candidates based on our tip if we have
// no tip due to chainActive being empty!
PruneBlockIndexCandidates();
CheckBlockIndex(params.GetConsensus());
}
return true;
}

View file

@ -443,7 +443,7 @@ CBlockIndex* FindForkInGlobalIndex(const CChain& chain, const CBlockLocator& loc
bool PreciousBlock(CValidationState& state, const CChainParams& params, CBlockIndex *pindex) LOCKS_EXCLUDED(cs_main);
/** Mark a block as invalid. */
bool InvalidateBlock(CValidationState& state, const CChainParams& chainparams, CBlockIndex* pindex) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
bool InvalidateBlock(CValidationState& state, const CChainParams& chainparams, CBlockIndex* pindex);
/** Remove invalidity status from a block and its descendants. */
void ResetBlockFailureFlags(CBlockIndex* pindex) EXCLUSIVE_LOCKS_REQUIRED(cs_main);