From ac94141af0c16161afa68de1c3720f254ae4e12c Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Fri, 24 Jan 2020 13:02:13 +0900 Subject: [PATCH] validation: delay flushing undo files in syncing node case Data files are pre-allocated, and upon flush/finalization, they are trimmed down to their resulting size. Block (blk) files are written to disk as blocks come in, which is often out of order, whereas undo (rev) files are written sequentially, as each block is added to the top of the chain. When a block file hits the size limit, the system flushes and trims the file down to its final size, and moves on to the next block file. Case 1: blocks are added to the chain as they come in (synced up node case) -- in this case, we will flush and finalize the undo file together with the block file. Case 2: blocks are added to the chain after they have been downloaded (syncing node case) -- in this case, we postpone finalizing the undo file until we know the undo data for the last block in the file has been written to disk. --- src/validation.cpp | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/validation.cpp b/src/validation.cpp index 9854740e6..faf92b3b0 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -1731,19 +1731,24 @@ DisconnectResult CChainState::DisconnectBlock(const CBlock& block, const CBlockI return fClean ? DISCONNECT_OK : DISCONNECT_UNCLEAN; } -void static FlushBlockFile(bool fFinalize = false) +static void FlushUndoFile(int block_file, bool finalize = false) +{ + FlatFilePos undo_pos_old(block_file, vinfoBlockFile[block_file].nUndoSize); + if (!UndoFileSeq().Flush(undo_pos_old, finalize)) { + AbortNode("Flushing undo file to disk failed. This is likely the result of an I/O error."); + } +} + +static void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false) { LOCK(cs_LastBlockFile); - FlatFilePos block_pos_old(nLastBlockFile, vinfoBlockFile[nLastBlockFile].nSize); - FlatFilePos undo_pos_old(nLastBlockFile, vinfoBlockFile[nLastBlockFile].nUndoSize); - - bool status = true; - status &= BlockFileSeq().Flush(block_pos_old, fFinalize); - status &= UndoFileSeq().Flush(undo_pos_old, fFinalize); - if (!status) { + if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) { AbortNode("Flushing block file to disk failed. This is likely the result of an I/O error."); } + // we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks, + // e.g. during IBD or a sync after a node going offline + if (!fFinalize || finalize_undo) FlushUndoFile(nLastBlockFile, finalize_undo); } static bool FindUndoPos(BlockValidationState &state, int nFile, FlatFilePos &pos, unsigned int nAddSize); @@ -1757,6 +1762,14 @@ static bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationSt return error("ConnectBlock(): FindUndoPos failed"); if (!UndoWriteToDisk(blockundo, _pos, pindex->pprev->GetBlockHash(), chainparams.MessageStart())) return AbortNode(state, "Failed to write undo data"); + // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) + // we want to flush the rev (undo) file once we've written the last block, which is indicated by the last height + // in the block file info as below; note that this does not catch the case where the undo writes are keeping up + // with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in + // the FindBlockPos function + if (_pos.nFile < nLastBlockFile && static_cast(pindex->nHeight) == vinfoBlockFile[_pos.nFile].nHeightLast) { + FlushUndoFile(_pos.nFile, true); + } // update nUndoPos in block index pindex->nUndoPos = _pos.nPos; @@ -3220,8 +3233,13 @@ static bool FindBlockPos(FlatFilePos &pos, unsigned int nAddSize, unsigned int n vinfoBlockFile.resize(nFile + 1); } + bool finalize_undo = false; if (!fKnown) { while (vinfoBlockFile[nFile].nSize + nAddSize >= MAX_BLOCKFILE_SIZE) { + // when the undo file is keeping up with the block file, we want to flush it explicitly + // when it is lagging behind (more blocks arrive than are being connected), we let the + // undo block write case handle it + finalize_undo = (vinfoBlockFile[nFile].nHeightLast == (unsigned int)ChainActive().Tip()->nHeight); nFile++; if (vinfoBlockFile.size() <= nFile) { vinfoBlockFile.resize(nFile + 1); @@ -3235,7 +3253,7 @@ static bool FindBlockPos(FlatFilePos &pos, unsigned int nAddSize, unsigned int n if (!fKnown) { LogPrintf("Leaving block file %i: %s\n", nLastBlockFile, vinfoBlockFile[nLastBlockFile].ToString()); } - FlushBlockFile(!fKnown); + FlushBlockFile(!fKnown, finalize_undo); nLastBlockFile = nFile; }