From 53c65ae7619ac9e80c89a321c70de64f3745e2aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sat, 13 Jan 2018 13:50:59 +0100 Subject: [PATCH] zstd: Update to pristine 1.3.3 --- thirdparty/README.md | 4 +- thirdparty/zstd/common/bitstream.h | 2 +- thirdparty/zstd/common/mem.h | 28 +- thirdparty/zstd/common/pool.c | 3 +- thirdparty/zstd/common/zstd_common.c | 14 +- thirdparty/zstd/common/zstd_internal.h | 220 +-- thirdparty/zstd/compress/zstd_compress.c | 769 ++++++----- ...td_compress.h => zstd_compress_internal.h} | 225 +++- thirdparty/zstd/compress/zstd_double_fast.c | 1 + thirdparty/zstd/compress/zstd_double_fast.h | 5 +- thirdparty/zstd/compress/zstd_fast.c | 1 + thirdparty/zstd/compress/zstd_fast.h | 5 +- thirdparty/zstd/compress/zstd_lazy.c | 116 +- thirdparty/zstd/compress/zstd_lazy.h | 5 +- thirdparty/zstd/compress/zstd_ldm.h | 5 +- thirdparty/zstd/compress/zstd_opt.c | 1188 +++++++---------- thirdparty/zstd/compress/zstd_opt.h | 4 +- thirdparty/zstd/compress/zstdmt_compress.c | 210 +-- thirdparty/zstd/compress/zstdmt_compress.h | 22 +- thirdparty/zstd/decompress/zstd_decompress.c | 72 +- thirdparty/zstd/zstd.h | 73 +- 21 files changed, 1522 insertions(+), 1450 deletions(-) rename thirdparty/zstd/compress/{zstd_compress.h => zstd_compress_internal.h} (52%) diff --git a/thirdparty/README.md b/thirdparty/README.md index 792e3b1650..049ea6e58e 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -453,10 +453,10 @@ Files extracted from upstream source: ## zstd - Upstream: https://github.com/facebook/zstd -- Version: 1.3.2 +- Version: 1.3.3 - License: BSD-3-Clause Files extracted from upstream source: - lib/{common/,compress/,decompress/,zstd.h} -- README.md, LICENSE +- LICENSE diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h index 2094823fe2..fcf3843079 100644 --- a/thirdparty/zstd/common/bitstream.h +++ b/thirdparty/zstd/common/bitstream.h @@ -167,7 +167,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /*-************************************************************** * Internal functions ****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (register U32 val) +MEM_STATIC unsigned BIT_highbit32 (U32 val) { assert(val != 0); { diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h index 23335c3146..47d2300177 100644 --- a/thirdparty/zstd/common/mem.h +++ b/thirdparty/zstd/common/mem.h @@ -56,8 +56,6 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size typedef int32_t S32; typedef uint64_t U64; typedef int64_t S64; - typedef intptr_t iPtrDiff; - typedef uintptr_t uPtrDiff; #else typedef unsigned char BYTE; typedef unsigned short U16; @@ -66,8 +64,6 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size typedef signed int S32; typedef unsigned long long U64; typedef signed long long S64; - typedef ptrdiff_t iPtrDiff; - typedef size_t uPtrDiff; #endif @@ -123,20 +119,26 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } /* currently only defined for gcc and icc */ #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) __pragma( pack(push, 1) ) - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; __pragma( pack(pop) ) #else - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; #endif -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } #else diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c index 1b0fe1035d..98b109e72a 100644 --- a/thirdparty/zstd/common/pool.c +++ b/thirdparty/zstd/common/pool.c @@ -11,7 +11,6 @@ /* ====== Dependencies ======= */ #include /* size_t */ -#include /* malloc, calloc, free */ #include "pool.h" /* ====== Compiler specifics ====== */ @@ -115,7 +114,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM * and full queues. */ ctx->queueSize = queueSize + 1; - ctx->queue = (POOL_job*) malloc(ctx->queueSize * sizeof(POOL_job)); + ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); ctx->queueHead = 0; ctx->queueTail = 0; ctx->numThreadsBusy = 0; diff --git a/thirdparty/zstd/common/zstd_common.c b/thirdparty/zstd/common/zstd_common.c index c2041053be..bccc948892 100644 --- a/thirdparty/zstd/common/zstd_common.c +++ b/thirdparty/zstd/common/zstd_common.c @@ -31,21 +31,27 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } * ZSTD Error Management ******************************************/ /*! ZSTD_isError() : -* tells if a return value is an error code */ + * tells if a return value is an error code */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : -* provides error code string from function result (useful for debugging) */ + * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /*! ZSTD_getError() : -* convert a `size_t` function result into a proper ZSTD_errorCode enum */ + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } /*! ZSTD_getErrorString() : -* provides error code string from enum */ + * provides error code string from enum */ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } +/*! g_debuglog_enable : + * turn on/off debug traces (global switch) */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) +int g_debuglog_enable = 1; +#endif + /*=************************************************************** * Custom allocator diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h index e91cd20baa..5d2900eb76 100644 --- a/thirdparty/zstd/common/zstd_internal.h +++ b/thirdparty/zstd/common/zstd_internal.h @@ -11,6 +11,10 @@ #ifndef ZSTD_CCOMMON_H_MODULE #define ZSTD_CCOMMON_H_MODULE +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ /*-************************************* * Dependencies @@ -50,21 +54,26 @@ extern "C" { #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) # include +extern int g_debuglog_enable; /* recommended values for ZSTD_DEBUG display levels : * 1 : no display, enables assert() only - * 2 : reserved for currently active debugging path - * 3 : events once per object lifetime (CCtx, CDict) + * 2 : reserved for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) * 4 : events once per frame * 5 : events once per block * 6 : events once per sequence (*very* verbose) */ -# define DEBUGLOG(l, ...) { \ - if (l<=ZSTD_DEBUG) { \ - fprintf(stderr, __FILE__ ": "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ +# define RAWLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ } } #else -# define DEBUGLOG(l, ...) {} /* disabled */ +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ #endif @@ -85,9 +94,7 @@ extern "C" { #define ZSTD_OPT_NUM (1<<12) #define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) @@ -134,28 +141,40 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define LLFSELog 9 #define OffFSELog 8 -static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, +static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 6, 7, 8, 9,10,11,12, 13,14,15,16 }; -static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, +static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 2, 1, 1, 1, 1, 1, -1,-1,-1,-1 }; #define LL_DEFAULTNORMLOG 6 /* for static allocation */ static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; -static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11, +static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 7, 8, 9,10,11, 12,13,14,15,16 }; -static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, +static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1,-1,-1, -1,-1,-1,-1,-1 }; #define ML_DEFAULTNORMLOG 6 /* for static allocation */ static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; -static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; +static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + -1,-1,-1,-1,-1 }; #define OF_DEFAULTNORMLOG 5 /* for static allocation */ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; @@ -167,7 +186,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } /*! ZSTD_wildcopy() : -* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ + * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ #define WILDCOPY_OVERLENGTH 8 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) { @@ -191,17 +210,14 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* s /*-******************************************* -* Private interfaces +* Private declarations *********************************************/ -typedef struct ZSTD_stats_s ZSTD_stats_t; - typedef struct seqDef_s { U32 offset; U16 litLength; U16 matchLength; } seqDef; - typedef struct { seqDef* sequencesStart; seqDef* sequences; @@ -216,100 +232,8 @@ typedef struct { U32 repToConfirm[ZSTD_REP_NUM]; } seqStore_t; -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; - -typedef struct { - U32* litFreq; - U32* litLengthFreq; - U32* matchLengthFreq; - U32* offCodeFreq; - ZSTD_match_t* matchTable; - ZSTD_optimal_t* priceTable; - - U32 matchLengthSum; - U32 matchSum; - U32 litLengthSum; - U32 litSum; - U32 offCodeSum; - U32 log2matchLengthSum; - U32 log2matchSum; - U32 log2litLengthSum; - U32 log2litSum; - U32 log2offCodeSum; - U32 factor; - U32 staticPrices; - U32 cachedPrice; - U32 cachedLitLength; - const BYTE* cachedLiterals; -} optState_t; - -typedef struct { - U32 offset; - U32 checksum; -} ldmEntry_t; - -typedef struct { - ldmEntry_t* hashTable; - BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ -} ldmState_t; - -typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ - U32 hashLog; /* Log size of hashTable */ - U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ - U32 minMatchLength; /* Minimum match length */ - U32 hashEveryLog; /* Log number of entries to skip */ -} ldmParams_t; - -typedef struct { - U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; - FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - U32 workspace[HUF_WORKSPACE_SIZE_U32]; - HUF_repeat hufCTable_repeatMode; - FSE_repeat offcode_repeatMode; - FSE_repeat matchlength_repeatMode; - FSE_repeat litlength_repeatMode; -} ZSTD_entropyCTables_t; - -struct ZSTD_CCtx_params_s { - ZSTD_format_e format; - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; - - int compressionLevel; - U32 forceWindow; /* force back-references to respect limit of - * 1<= 3) /* GCC Intrinsic */ return 31 - __builtin_clz(val); # else /* Software version */ - static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; - int r; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; - r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; - return r; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; # endif } } -/* hidden functions */ - /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. * Note : only works with regular variant; * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ -/*! ZSTD_initCStream_internal() : - * Private use only. Init streaming operation. - * expects params to be valid. - * must receive dict, or cdict, or none, but not both. - * @return : 0, or an error code */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); - -/*! ZSTD_compressStream_generic() : - * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ -size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode); - -/*! ZSTD_getCParamsFromCDict() : - * as the name implies */ -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); - -/* ZSTD_compressBegin_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictMode_e dictMode, - ZSTD_CCtx_params params, - unsigned long long pledgedSrcSize); - -/* ZSTD_compress_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_CCtx_params params); - typedef struct { blockType_e blockType; U32 lastBlock; @@ -398,7 +279,8 @@ typedef struct { } blockProperties_t; /*! ZSTD_getcBlockSize() : -* Provides the size of compressed block from block header `src` */ + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c index 2c46c79f1c..8d1629246d 100644 --- a/thirdparty/zstd/compress/zstd_compress.c +++ b/thirdparty/zstd/compress/zstd_compress.c @@ -26,7 +26,7 @@ #include "fse.h" #define HUF_STATIC_LINKING_ONLY #include "huf.h" -#include "zstd_compress.h" +#include "zstd_compress_internal.h" #include "zstd_fast.h" #include "zstd_double_fast.h" #include "zstd_lazy.h" @@ -42,17 +42,6 @@ size_t ZSTD_compressBound(size_t srcSize) { } -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - - /*-************************************* * Context memory management ***************************************/ @@ -78,6 +67,7 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) if (!cctx) return NULL; cctx->customMem = customMem; cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; + cctx->requestedParams.fParams.contentSizeFlag = 1; ZSTD_STATIC_ASSERT(zcss_init==0); ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); return cctx; @@ -152,28 +142,34 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStor #define ZSTD_CLEVEL_CUSTOM 999 static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - ZSTD_CCtx_params params, U64 srcSizeHint, size_t dictSize) + ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize) { - return (params.compressionLevel == ZSTD_CLEVEL_CUSTOM ? - params.cParams : - ZSTD_getCParams(params.compressionLevel, srcSizeHint, dictSize)); + DEBUGLOG(4, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u", + (U32)srcSizeHint, (U32)dictSize); + return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ? + CCtxParams.cParams : + ZSTD_getCParams(CCtxParams.compressionLevel, srcSizeHint, dictSize); } -static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* params, U64 srcSize) +static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* CCtxParams, U64 srcSize) { - params->cParams = ZSTD_getCParamsFromCCtxParams(*params, srcSize, 0); - params->compressionLevel = ZSTD_CLEVEL_CUSTOM; + DEBUGLOG(4, "ZSTD_cLevelToCCtxParams_srcSize: srcSize = %u", + (U32)srcSize); + CCtxParams->cParams = ZSTD_getCParamsFromCCtxParams(*CCtxParams, srcSize, 0); + CCtxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM; } static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx) { + DEBUGLOG(4, "ZSTD_cLevelToCParams: level=%i", cctx->requestedParams.compressionLevel); ZSTD_cLevelToCCtxParams_srcSize( &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1); } -static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* params) +static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* CCtxParams) { - ZSTD_cLevelToCCtxParams_srcSize(params, 0); + DEBUGLOG(4, "ZSTD_cLevelToCCtxParams"); + ZSTD_cLevelToCCtxParams_srcSize(CCtxParams, ZSTD_CONTENTSIZE_UNKNOWN); } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( @@ -251,6 +247,7 @@ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value) { + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value); if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); switch(param) @@ -259,7 +256,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_compressionLevel: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ if (cctx->cdict) return ERROR(stage_wrong); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); @@ -270,9 +266,8 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v case ZSTD_p_minMatch: case ZSTD_p_targetLength: case ZSTD_p_compressionStrategy: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ if (cctx->cdict) return ERROR(stage_wrong); - ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */ + if (value>0) ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */ return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_contentSizeFlag: @@ -281,15 +276,12 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize, - * even when referencing into Dictionary content + * even when referencing into Dictionary content. * default : 0 when using a CDict, 1 when using a Prefix */ - cctx->loadedDictEnd = 0; return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_nbThreads: - if (value==0) return 0; - DEBUGLOG(5, " setting nbThreads : %u", value); - if (value > 1 && cctx->staticSize) { + if ((value > 1) && cctx->staticSize) { return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ } return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); @@ -298,22 +290,15 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_overlapSizeLog: - DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_enableLongDistanceMatching: if (cctx->cdict) return ERROR(stage_wrong); - if (value != 0) { - ZSTD_cLevelToCParams(cctx); - } + if (value>0) ZSTD_cLevelToCParams(cctx); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_ldmHashLog: case ZSTD_p_ldmMinMatch: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ - if (cctx->cdict) return ERROR(stage_wrong); - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - case ZSTD_p_ldmBucketSizeLog: case ZSTD_p_ldmHashEveryLog: if (cctx->cdict) return ERROR(stage_wrong); @@ -324,160 +309,167 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v } size_t ZSTD_CCtxParam_setParameter( - ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value) { + DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%u, %u)", (U32)param, value); switch(param) { case ZSTD_p_format : if (value > (unsigned)ZSTD_f_zstd1_magicless) return ERROR(parameter_unsupported); - params->format = (ZSTD_format_e)value; - return 0; + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; case ZSTD_p_compressionLevel : if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); - if (value == 0) return 0; - params->compressionLevel = value; - return 0; + if (value) /* 0 : does not change current level */ + CCtxParams->compressionLevel = value; + return CCtxParams->compressionLevel; case ZSTD_p_windowLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.windowLog = value; - return 0; + DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value); + if (value) { /* 0 : does not change current windowLog */ + CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.windowLog = value; + } + return CCtxParams->cParams.windowLog; case ZSTD_p_hashLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.hashLog = value; - return 0; + if (value) { /* 0 : does not change current hashLog */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.hashLog = value; + } + return CCtxParams->cParams.hashLog; case ZSTD_p_chainLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.chainLog = value; - return 0; + if (value) { /* 0 : does not change current chainLog */ + CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.chainLog = value; + } + return CCtxParams->cParams.chainLog; case ZSTD_p_searchLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.searchLog = value; - return 0; + if (value) { /* 0 : does not change current searchLog */ + CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.searchLog = value; + } + return value; case ZSTD_p_minMatch : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.searchLength = value; - return 0; + if (value) { /* 0 : does not change current minMatch length */ + CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.searchLength = value; + } + return CCtxParams->cParams.searchLength; case ZSTD_p_targetLength : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.targetLength = value; - return 0; + if (value) { /* 0 : does not change current sufficient_len */ + CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.targetLength = value; + } + return CCtxParams->cParams.targetLength; case ZSTD_p_compressionStrategy : - if (value == 0) return 0; - CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); - ZSTD_cLevelToCCtxParams(params); - params->cParams.strategy = (ZSTD_strategy)value; - return 0; + if (value) { /* 0 : does not change currentstrategy */ + CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + } + return (size_t)CCtxParams->cParams.strategy; case ZSTD_p_contentSizeFlag : /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(5, "set content size flag = %u", (value>0)); - params->fParams.contentSizeFlag = value > 0; - return 0; + DEBUGLOG(4, "set content size flag = %u", (value>0)); + CCtxParams->fParams.contentSizeFlag = value > 0; + return CCtxParams->fParams.contentSizeFlag; case ZSTD_p_checksumFlag : /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - params->fParams.checksumFlag = value > 0; - return 0; + CCtxParams->fParams.checksumFlag = value > 0; + return CCtxParams->fParams.checksumFlag; case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(5, "set dictIDFlag = %u", (value>0)); - params->fParams.noDictIDFlag = (value == 0); - return 0; + DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); + CCtxParams->fParams.noDictIDFlag = (value == 0); + return !CCtxParams->fParams.noDictIDFlag; case ZSTD_p_forceMaxWindow : - params->forceWindow = value > 0; - return 0; + CCtxParams->forceWindow = (value > 0); + return CCtxParams->forceWindow; case ZSTD_p_nbThreads : - if (value == 0) return 0; + if (value == 0) return CCtxParams->nbThreads; #ifndef ZSTD_MULTITHREAD if (value > 1) return ERROR(parameter_unsupported); - return 0; + return 1; #else - return ZSTDMT_initializeCCtxParameters(params, value); + return ZSTDMT_CCtxParam_setNbThreads(CCtxParams, value); #endif case ZSTD_p_jobSize : #ifndef ZSTD_MULTITHREAD return ERROR(parameter_unsupported); #else - if (params->nbThreads <= 1) return ERROR(parameter_unsupported); - return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_sectionSize, value); + if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported); + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); #endif case ZSTD_p_overlapSizeLog : #ifndef ZSTD_MULTITHREAD return ERROR(parameter_unsupported); #else - if (params->nbThreads <= 1) return ERROR(parameter_unsupported); - return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value); + if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported); + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value); #endif case ZSTD_p_enableLongDistanceMatching : - if (value != 0) { - ZSTD_cLevelToCCtxParams(params); - params->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (value) { + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; } - return ZSTD_ldm_initializeParameters(¶ms->ldmParams, value); + return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value); case ZSTD_p_ldmHashLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - params->ldmParams.hashLog = value; - return 0; + if (value) { /* 0 : does not change current ldmHashLog */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->ldmParams.hashLog = value; + } + return CCtxParams->ldmParams.hashLog; case ZSTD_p_ldmMinMatch : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); - params->ldmParams.minMatchLength = value; - return 0; + if (value) { /* 0 : does not change current ldmMinMatch */ + CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); + CCtxParams->ldmParams.minMatchLength = value; + } + return CCtxParams->ldmParams.minMatchLength; case ZSTD_p_ldmBucketSizeLog : if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) { return ERROR(parameter_outOfBound); } - params->ldmParams.bucketSizeLog = value; - return 0; + CCtxParams->ldmParams.bucketSizeLog = value; + return value; case ZSTD_p_ldmHashEveryLog : if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) { return ERROR(parameter_outOfBound); } - params->ldmParams.hashEveryLog = value; - return 0; + CCtxParams->ldmParams.hashEveryLog = value; + return value; default: return ERROR(parameter_unsupported); } } -/** - * This function should be updated whenever ZSTD_CCtx_params is updated. - * Parameters are copied manually before the dictionary is loaded. - * The multithreading parameters jobSize and overlapSizeLog are set only if - * nbThreads > 1. - * - * Pledged srcSize is treated as unknown. +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. */ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) @@ -485,33 +477,14 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); if (cctx->cdict) return ERROR(stage_wrong); - /* Assume the compression and frame parameters are validated */ - cctx->requestedParams.cParams = params->cParams; - cctx->requestedParams.fParams = params->fParams; - cctx->requestedParams.compressionLevel = params->compressionLevel; + cctx->requestedParams = *params; - /* Set force window explicitly since it sets cctx->loadedDictEnd */ - CHECK_F( ZSTD_CCtx_setParameter( - cctx, ZSTD_p_forceMaxWindow, params->forceWindow) ); - - /* Set multithreading parameters explicitly */ - CHECK_F( ZSTD_CCtx_setParameter(cctx, ZSTD_p_nbThreads, params->nbThreads) ); - if (params->nbThreads > 1) { - CHECK_F( ZSTD_CCtx_setParameter(cctx, ZSTD_p_jobSize, params->jobSize) ); - CHECK_F( ZSTD_CCtx_setParameter( - cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) ); - } - - /* Copy long distance matching parameters */ - cctx->requestedParams.ldmParams = params->ldmParams; - - /* customMem is used only for create/free params and can be ignored */ return 0; } ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize); + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; return 0; @@ -523,14 +496,14 @@ size_t ZSTD_CCtx_loadDictionary_advanced( { if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ - DEBUGLOG(4, "load dictionary of size %u", (U32)dictSize); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */ if (dict==NULL || dictSize==0) { /* no dictionary mode */ cctx->cdictLocal = NULL; cctx->cdict = NULL; } else { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, 0, dictSize); + ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); cctx->cdictLocal = ZSTD_createCDict_advanced( dict, dictSize, dictLoadMethod, dictMode, @@ -756,10 +729,7 @@ size_t ZSTD_estimateCStreamSize(int compressionLevel) { static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, ZSTD_compressionParameters cParams2) { - U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX); - U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX); - return (bslog1 == bslog2) /* same block size */ - & (cParams1.hashLog == cParams2.hashLog) + return (cParams1.hashLog == cParams2.hashLog) & (cParams1.chainLog == cParams2.chainLog) & (cParams1.strategy == cParams2.strategy) /* opt parser space */ & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */ @@ -778,12 +748,38 @@ static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); } +typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; + +/* ZSTD_sufficientBuff() : + * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . + * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ +static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + ZSTD_compressionParameters cParams2, + U64 pledgedSrcSize) +{ + size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); + size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); + size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; + DEBUGLOG(4, "ZSTD_sufficientBuff: windowSize2=%u from wlog=%u", + (U32)windowSize2, cParams2.windowLog); + DEBUGLOG(4, "ZSTD_sufficientBuff: blockSize2 %u <=? blockSize1 %u", + (U32)blockSize2, (U32)blockSize1); + return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */ + & (neededBufferSize2 <= bufferSize1); +} + /** Equivalence for resetCCtx purposes */ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, - ZSTD_CCtx_params params2) + ZSTD_CCtx_params params2, + size_t buffSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + U64 pledgedSrcSize) { + DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && - ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams); + ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && + ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize); } /*! ZSTD_continueCCtx() : @@ -791,7 +787,11 @@ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) { U32 const end = (U32)(cctx->nextSrc - cctx->base); - DEBUGLOG(4, "continue mode"); + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + DEBUGLOG(4, "ZSTD_continueCCtx"); + + cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ cctx->appliedParams = params; cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; cctx->consumedSrcSize = 0; @@ -812,7 +812,6 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl } typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; -typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; /*! ZSTD_resetCCtx_internal() : note : `params` are assumed fully validated at this stage */ @@ -821,13 +820,16 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { - DEBUGLOG(4, "ZSTD_resetCCtx_internal"); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - DEBUGLOG(4, "pledgedSrcSize: %u", (U32)pledgedSrcSize); if (crp == ZSTDcrp_continue) { - if (ZSTD_equivalentParams(params, zc->appliedParams)) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1"); + if (ZSTD_equivalentParams(zc->appliedParams, params, + zc->inBuffSize, zc->blockSize, + zbuff, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)", + zc->appliedParams.cParams.windowLog, (U32)zc->blockSize); assert(!(params.ldmParams.enableLdm && params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET)); zc->entropy->hufCTable_repeatMode = HUF_repeat_none; @@ -836,6 +838,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->entropy->litlength_repeatMode = FSE_repeat_none; return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } + DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ @@ -846,7 +849,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); } - { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = blockSize + 11*maxNbSeq; @@ -858,7 +862,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const h3Size = ((size_t)1) << hashLog3; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ @@ -874,11 +878,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, : 0; size_t const neededSpace = entropySpace + optSpace + ldmSpace + tableSpace + tokenSpace + bufferSpace; + DEBUGLOG(4, "Need %uKB workspace, including %uKB for tables, and %uKB for buffers", + (U32)(neededSpace>>10), (U32)(tableSpace>>10), (U32)(bufferSpace>>10)); + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u - windowSize: %u - blockSize: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size, (U32)windowSize, (U32)blockSize); if (zc->workSpaceSize < neededSpace) { /* too small : resize */ - DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n", - (unsigned)zc->workSpaceSize>>10, - (unsigned)neededSpace>>10); + DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK", + (unsigned)(zc->workSpaceSize>>10), + (unsigned)(neededSpace>>10)); /* static cctx : no resize, error out */ if (zc->staticSize) return ERROR(memory_allocation); @@ -901,7 +909,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->consumedSrcSize = 0; if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(5, "pledged content size : %u ; flag : %u", + DEBUGLOG(4, "pledged content size : %u ; flag : %u", (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); zc->blockSize = blockSize; @@ -927,7 +935,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, /* opt parser space */ if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) { - DEBUGLOG(5, "reserving optimal parser space"); + DEBUGLOG(4, "reserving optimal parser space"); assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ zc->optState.litFreq = (U32*)ptr; zc->optState.litLengthFreq = zc->optState.litFreq + (1<hashTable = (U32*)(ptr); @@ -999,15 +1008,16 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { /*! ZSTD_copyCCtx_internal() : * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * The "context", in this case, refers to the hash and chain tables, entropy - * tables, and dictionary offsets. * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1 - * @return : 0, or an error code */ + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, + unsigned windowLog, ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize, + U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { DEBUGLOG(5, "ZSTD_copyCCtx_internal"); @@ -1017,6 +1027,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; + if (windowLog) params.cParams.windowLog = windowLog; params.fParams = fParams; ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); @@ -1045,6 +1056,12 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, /* copy entropy tables */ memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t)); + /* copy repcodes */ + { + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + dstCCtx->seqStore.rep[i] = srcCCtx->seqStore.rep[i]; + } return 0; } @@ -1059,9 +1076,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); - fParams.contentSizeFlag = pledgedSrcSize>0; + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); - return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize, zbuff); + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + 0 /*windowLog from srcCCtx*/, fParams, pledgedSrcSize, + zbuff); } @@ -1238,7 +1258,7 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy, ostart[4] = (BYTE)(cLitSize >> 10); break; } - default: /* not possible : lhSize is {3,4,5} */ + default: /* not possible : lhSize is {3,4,5} */ assert(0); } return lhSize+cLitSize; @@ -1247,8 +1267,6 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy, void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { - BYTE const LL_deltaCode = 19; - BYTE const ML_deltaCode = 36; const seqDef* const sequences = seqStorePtr->sequencesStart; BYTE* const llCodeTable = seqStorePtr->llCode; BYTE* const ofCodeTable = seqStorePtr->ofCode; @@ -1258,9 +1276,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) for (u=0; u 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; + llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); - mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; + mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); } if (seqStorePtr->longLengthID==1) llCodeTable[seqStorePtr->longLengthPos] = MaxLL; @@ -1273,7 +1291,8 @@ typedef enum { ZSTD_defaultAllowed = 1 } ZSTD_defaultPolicy_e; -MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( +MEM_STATIC +symbolEncodingType_e ZSTD_selectEncodingType( FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) { @@ -1281,6 +1300,7 @@ MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( #define MAX_SEQ_FOR_STATIC_FSE 1000 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { + DEBUGLOG(5, "Selected set_rle"); /* Prefer set_basic over set_rle when there are 2 or less symbols, * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. * If basic encoding isn't possible, always choose RLE. @@ -1288,18 +1308,30 @@ MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( *repeatMode = FSE_repeat_check; return set_rle; } - if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + if ( isDefaultAllowed + && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + DEBUGLOG(5, "Selected set_repeat"); return set_repeat; } - if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) { - *repeatMode = FSE_repeat_valid; + if ( isDefaultAllowed + && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; return set_basic; } + DEBUGLOG(5, "Selected set_compressed"); *repeatMode = FSE_repeat_check; return set_compressed; } -MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, +MEM_STATIC +size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type, U32* count, U32 max, BYTE const* codeTable, size_t nbSeq, @@ -1317,7 +1349,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, case set_repeat: return 0; case set_basic: - CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); + CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -1339,11 +1371,13 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, } } -MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) +MEM_STATIC +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) { BIT_CStream_t blockStream; FSE_CState_t stateMatchLength; @@ -1380,8 +1414,12 @@ MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity, BYTE const ofCode = ofCodeTable[n]; BYTE const mlCode = mlCodeTable[n]; U32 const llBits = LL_bits[llCode]; - U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ + U32 const ofBits = ofCode; U32 const mlBits = ML_bits[mlCode]; + DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", + sequences[n].litLength, + sequences[n].matchLength + MINMATCH, + sequences[n].offset); /* 32b*/ /* 64b*/ /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ @@ -1447,14 +1485,18 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, entropy, cParams->strategy, op, dstCapacity, literals, litSize); if (ZSTD_isError(cSize)) return cSize; + assert(cSize <= dstCapacity); op += cSize; } /* Sequences Header */ - if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) return op - ostart; /* seqHead : flags for FSE encoding type */ @@ -1462,9 +1504,10 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); - /* CTable for Literal Lengths */ + /* build CTable for Literal Lengths */ { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace); + DEBUGLOG(5, "Building LL table"); LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, @@ -1472,11 +1515,12 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (ZSTD_isError(countSize)) return countSize; op += countSize; } } - /* CTable for Offsets */ + /* build CTable for Offsets */ { U32 max = MaxOff; size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace); /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, @@ -1484,9 +1528,10 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (ZSTD_isError(countSize)) return countSize; op += countSize; } } - /* CTable for MatchLengths */ + /* build CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace); + DEBUGLOG(5, "Building ML table"); MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, @@ -1497,13 +1542,15 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - { size_t const streamSize = ZSTD_encodeSequences(op, oend - op, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - if (ZSTD_isError(streamSize)) return streamSize; - op += streamSize; + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets); + if (ZSTD_isError(bitstreamSize)) return bitstreamSize; + op += bitstreamSize; } return op - ostart; @@ -1517,27 +1564,33 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, { size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams, dst, dstCapacity); - size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; /* If the srcSize <= dstCapacity, then there is enough space to write a * raw uncompressed block. Since we ran out of space, the block must not * be compressible, so fall back to a raw uncompressed block. */ - int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity; - + int const uncompressibleError = (cSize == ERROR(dstSize_tooSmall)) && (srcSize <= dstCapacity); if (ZSTD_isError(cSize) && !uncompressibleError) return cSize; + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (entropy->offcode_repeatMode == FSE_repeat_valid) + entropy->offcode_repeatMode = FSE_repeat_check; + /* Check compressibility */ - if (cSize >= maxCSize || uncompressibleError) { - entropy->hufCTable_repeatMode = HUF_repeat_none; - entropy->offcode_repeatMode = FSE_repeat_none; - entropy->matchlength_repeatMode = FSE_repeat_none; - entropy->litlength_repeatMode = FSE_repeat_none; - return 0; - } + { size_t const minGain = ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ + size_t const maxCSize = srcSize - minGain; + if (cSize >= maxCSize || uncompressibleError) { + entropy->hufCTable_repeatMode = HUF_repeat_none; + entropy->offcode_repeatMode = FSE_repeat_none; + entropy->matchlength_repeatMode = FSE_repeat_none; + entropy->litlength_repeatMode = FSE_repeat_none; + return 0; /* block not compressed */ + } } assert(!ZSTD_isError(cSize)); - /* confirm repcodes */ + /* block is compressed => confirm repcodes in history */ { int i; for (i=0; irep[i] = seqStorePtr->repToConfirm[i]; } return cSize; } @@ -1559,9 +1612,9 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } }; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat <= (U32)ZSTD_btultra); - return blockCompressor[extDict!=0][(U32)strat]; } @@ -1572,30 +1625,38 @@ static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, seqStorePtr->lit += lastLLSize; } +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - const BYTE* const base = zc->base; - const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); - size_t lastLLSize; - const BYTE* anchor; - U32 const extDict = zc->lowLimit < zc->dictLimit; - const ZSTD_blockCompressor blockCompressor = - zc->appliedParams.ldmParams.enableLdm - ? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm) - : ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); - - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + DEBUGLOG(5, "ZSTD_compressBlock_internal : dstCapacity = %u", (U32)dstCapacity); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) + return 0; /* don't even attempt compression below a certain srcSize */ ZSTD_resetSeqStore(&(zc->seqStore)); - if (current > zc->nextToUpdate + 384) - zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */ - - lastLLSize = blockCompressor(zc, src, srcSize); - - /* Last literals */ - anchor = (const BYTE*)src + srcSize - lastLLSize; - ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize); + /* limited update after a very long match */ + { const BYTE* const base = zc->base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (current > zc->nextToUpdate + 384) + zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); + } + /* find and store sequences */ + { U32 const extDict = zc->lowLimit < zc->dictLimit; + const ZSTD_blockCompressor blockCompressor = + zc->appliedParams.ldmParams.enableLdm + ? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm) + : ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); + size_t const lastLLSize = blockCompressor(zc, src, srcSize); + const BYTE* const anchor = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize); + } + /* encode */ return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize); } @@ -1618,13 +1679,14 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= 31); + DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (U32)blockSize); if (cctx->appliedParams.fParams.checksumFlag && srcSize) XXH64_update(&cctx->xxhState, src, srcSize); while (remaining) { U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - size_t cSize; if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ @@ -1666,34 +1728,39 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, else cctx->nextToUpdate -= correction; DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x\n", correction, cctx->lowLimit); } - + /* enforce maxDist */ if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { - /* enforce maxDist */ U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit; } - cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; + { size_t cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; - if (cSize == 0) { /* block is not compressible */ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); - if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); - MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ - memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); - cSize = ZSTD_blockHeaderSize+blockSize; - } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader24); - cSize += ZSTD_blockHeaderSize; - } + if (cSize == 0) { /* block is not compressible */ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize + blockSize; + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } - remaining -= blockSize; - dstCapacity -= cSize; - ip += blockSize; - op += cSize; - } + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (U32)cSize); + } } if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; return op-ostart; @@ -1719,7 +1786,6 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); if (params.format == ZSTD_f_zstd1) { - DEBUGLOG(4, "writing zstd magic number"); MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; } @@ -1753,8 +1819,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, const BYTE* const ip = (const BYTE*) src; size_t fhSize = 0; - DEBUGLOG(5, "ZSTD_compressContinue_internal"); - DEBUGLOG(5, "stage: %u", cctx->stage); + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage); if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ if (frame && (cctx->stage==ZSTDcs_init)) { @@ -1766,17 +1831,21 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, cctx->stage = ZSTDcs_ongoing; } + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + /* Check if blocks follow each other */ if (src != cctx->nextSrc) { /* not contiguous */ - ptrdiff_t const delta = cctx->nextSrc - ip; + size_t const distanceFromBase = (size_t)(cctx->nextSrc - cctx->base); cctx->lowLimit = cctx->dictLimit; - cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + cctx->dictLimit = (U32)distanceFromBase; cctx->dictBase = cctx->base; - cctx->base -= delta; + cctx->base = ip - distanceFromBase; cctx->nextToUpdate = cctx->dictLimit; if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */ } + cctx->nextSrc = ip + srcSize; /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { @@ -1785,17 +1854,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, cctx->lowLimit = lowLimitMax; } - cctx->nextSrc = ip + srcSize; - - if (srcSize) { - size_t const cSize = frame ? + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize); + { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); if (ZSTD_isError(cSize)) return cSize; cctx->consumedSrcSize += srcSize; return cSize + fhSize; - } else - return fhSize; + } } size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, @@ -1832,7 +1898,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t zc->lowLimit = zc->dictLimit; zc->dictLimit = (U32)(zc->nextSrc - zc->base); zc->dictBase = zc->base; - zc->base += ip - zc->nextSrc; + zc->base = ip - zc->dictLimit; zc->nextToUpdate = zc->dictLimit; zc->loadedDictEnd = zc->appliedParams.forceWindow ? 0 : (U32)(iend - zc->base); @@ -1983,7 +2049,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode) { - DEBUGLOG(5, "ZSTD_compress_insertDictionary"); + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<=8)) return 0; /* dict restricted modes */ @@ -1992,7 +2058,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictMode == ZSTD_dm_auto) { - DEBUGLOG(5, "raw content dictionary detected"); + DEBUGLOG(4, "raw content dictionary detected"); return ZSTD_loadDictionaryContent(cctx, dict, dictSize); } if (dictMode == ZSTD_dm_fullDict) @@ -2006,21 +2072,22 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, +size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) + ZSTD_CCtx_params params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(4, "ZSTD_compressBegin_internal"); + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (cdict && cdict->dictContentSize>0) { + cctx->requestedParams = params; return ZSTD_copyCCtx_internal(cctx, cdict->refContext, - params.fParams, pledgedSrcSize, + params.cParams.windowLog, params.fParams, pledgedSrcSize, zbuff); } @@ -2029,16 +2096,19 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode); } -size_t ZSTD_compressBegin_advanced_internal( - ZSTD_CCtx* cctx, +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) { + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); /* compression parameters verification and optimization */ CHECK_F( ZSTD_checkCParams(params.cParams) ); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictMode, NULL, + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictMode, + cdict, params, pledgedSrcSize, ZSTDb_not_buffered); } @@ -2051,9 +2121,10 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); - return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dm_auto, - cctxParams, - pledgedSrcSize); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dm_auto, + NULL /*cdict*/, + cctxParams, pledgedSrcSize); } size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) @@ -2061,8 +2132,9 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict"); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, - cctxParams, 0, ZSTDb_not_buffered); + cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) @@ -2143,6 +2215,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, @@ -2156,6 +2229,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params) { + DEBUGLOG(4, "ZSTD_compress_advanced"); CHECK_F(ZSTD_checkCParams(params.cParams)); return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } @@ -2168,6 +2242,7 @@ size_t ZSTD_compress_advanced_internal( const void* dict,size_t dictSize, ZSTD_CCtx_params params) { + DEBUGLOG(4, "ZSTD_compress_advanced_internal"); CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, params, srcSize, ZSTDb_not_buffered) ); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); @@ -2176,8 +2251,10 @@ size_t ZSTD_compress_advanced_internal( size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0); + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); params.fParams.contentSizeFlag = 1; + DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)", + compressionLevel, (U32)srcSize, (U32)dictSize); return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } @@ -2234,7 +2311,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams) { - DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode); + DEBUGLOG(3, "ZSTD_initCDict_internal, mode %u", (U32)dictMode); if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictBuffer = NULL; cdict->dictContent = dictBuffer; @@ -2264,7 +2341,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams, ZSTD_customMem customMem) { - DEBUGLOG(5, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode); + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode); if (!customMem.customAlloc ^ !customMem.customFree) return NULL; { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); @@ -2339,9 +2416,9 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, + cctxSize; ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; void* ptr; - DEBUGLOG(5, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7); + DEBUGLOG(4, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7); if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - DEBUGLOG(5, "(workspaceSize < neededSize) : (%u < %u) => %u", + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; @@ -2373,11 +2450,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); if (cdict==NULL) return ERROR(dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ZSTD_getCParamsFromCDict(cdict); params.fParams = fParams; - DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced"); return ZSTD_compressBegin_internal(cctx, NULL, 0, ZSTD_dm_auto, cdict, @@ -2392,7 +2469,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0); } @@ -2427,6 +2504,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, ZSTD_CStream* ZSTD_createCStream(void) { + DEBUGLOG(3, "ZSTD_createCStream"); return ZSTD_createCStream_advanced(ZSTD_defaultCMem); } @@ -2457,9 +2535,9 @@ size_t ZSTD_CStreamOutSize(void) } static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) + const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) { DEBUGLOG(4, "ZSTD_resetCStream_internal"); /* params are supposed to be fully validated at this point */ @@ -2467,31 +2545,35 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ CHECK_F( ZSTD_compressBegin_internal(zcs, - dict, dictSize, dictMode, - cdict, - params, pledgedSrcSize, - ZSTDb_buffered) ); + dict, dictSize, dictMode, + cdict, + params, pledgedSrcSize, + ZSTDb_buffered) ); zcs->inToCompress = 0; zcs->inBuffPos = 0; - zcs->inBuffTarget = zcs->blockSize; + zcs->inBuffTarget = zcs->blockSize + + (zcs->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; zcs->streamStage = zcss_load; zcs->frameEnded = 0; return 0; /* ready to go */ } +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params params = zcs->requestedParams; - params.fParams.contentSizeFlag = (pledgedSrcSize > 0); + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + params.fParams.contentSizeFlag = 1; params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0); - DEBUGLOG(4, "ZSTD_resetCStream"); return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize); } /*! ZSTD_initCStream_internal() : - * Note : not static, but hidden (not exposed). Used by zstdmt_compress.c + * Note : for lib/compress only. Used by zstdmt_compress.c. * Assumption 1 : params are valid * Assumption 2 : either dict, or cdict, is defined, not both */ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, @@ -2503,7 +2585,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict && dictSize >= 8) { - DEBUGLOG(5, "loading dictionary of size %u", (U32)dictSize); + DEBUGLOG(4, "loading dictionary of size %u", (U32)dictSize); if (zcs->staticSize) { /* static CCtx : never uses malloc */ /* incompatible with internal cdict creation */ return ERROR(memory_allocation); @@ -2516,14 +2598,14 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); } else { if (cdict) { - params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict */ + params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ } ZSTD_freeCDict(zcs->cdictLocal); zcs->cdictLocal = NULL; zcs->cdict = cdict; } - params.compressionLevel = ZSTD_CLEVEL_CUSTOM; + params.compressionLevel = ZSTD_CLEVEL_CUSTOM; /* enforce usage of cParams, instead of a dynamic derivation from cLevel (but does that happen ?) */ zcs->requestedParams = params; return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize); @@ -2535,8 +2617,9 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize) -{ /* cannot handle NULL cdict (does not know what to do) */ - if (!cdict) return ERROR(dictionary_wrong); +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ { ZSTD_CCtx_params params = zcs->requestedParams; params.cParams = ZSTD_getCParamsFromCDict(cdict); params.fParams = fParams; @@ -2549,18 +2632,25 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, /* note : cdict must outlive compression session */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { - ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ }; - return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0); /* note : will check that cdict != NULL */ + ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ } +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be correct. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) { - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", + (U32)pledgedSrcSize, params.fParams.contentSizeFlag); CHECK_F( ZSTD_checkCParams(params.cParams) ); - return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, pledgedSrcSize); + if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); } size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) @@ -2568,21 +2658,21 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t di ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); - return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, 0); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); } -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) { - ZSTD_CCtx_params cctxParams; + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); - cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); - cctxParams.fParams.contentSizeFlag = (pledgedSrcSize>0); + ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize); } size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) { - return ZSTD_initCStream_srcSize(zcs, compressionLevel, 0); + DEBUGLOG(4, "ZSTD_initCStream"); + return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); } /*====== Compression ======*/ @@ -2615,9 +2705,9 @@ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, /* check expectations */ DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode); assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize>0); - assert(zcs->outBuff!= NULL); - assert(zcs->outBuffSize>0); + assert(zcs->inBuffSize > 0); + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); assert(output->pos <= output->size); assert(input->pos <= input->size); @@ -2757,7 +2847,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, ZSTD_inBuffer* input, ZSTD_EndDirective endOp) { - DEBUGLOG(5, "ZSTD_compress_generic"); + DEBUGLOG(5, "ZSTD_compress_generic, endOp=%u ", (U32)endOp); /* check conditions */ if (output->pos > output->size) return ERROR(GENERIC); if (input->pos > input->size) return ERROR(GENERIC); @@ -2765,42 +2855,47 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, /* transparent initialization stage */ if (cctx->streamStage == zcss_init) { - ZSTD_prefixDict const prefixDict = cctx->prefixDict; ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ZSTD_getCParamsFromCCtxParams( - cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + ZSTD_prefixDict const prefixDict = cctx->prefixDict; memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); #ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) + params.nbThreads = 1; /* do not invoke multi-threading when src size is too small */ if (params.nbThreads > 1) { - if (cctx->mtctx == NULL || cctx->appliedParams.nbThreads != params.nbThreads) { + if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) { + DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)", + params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx)); ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem); if (cctx->mtctx == NULL) return ERROR(memory_allocation); } DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads); CHECK_F( ZSTDMT_initCStream_internal( - cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, ZSTD_dm_rawContent, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, ZSTD_dm_rawContent, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); cctx->streamStage = zcss_load; cctx->appliedParams.nbThreads = params.nbThreads; } else #endif - { - CHECK_F( ZSTD_resetCStream_internal( + { CHECK_F( ZSTD_resetCStream_internal( cctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbThreads <= 1); } } /* compression stage */ #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbThreads > 1) { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); - DEBUGLOG(5, "ZSTDMT_compressStream_generic result : %u", (U32)flushMin); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_startNewCompression(cctx); @@ -2850,8 +2945,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize; - DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u", - (unsigned)toFlush); + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush); return toFlush; } } @@ -2880,12 +2974,12 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */ { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */ - { 22, 21, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */ - { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */ - { 23, 22, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */ - { 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */ - { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */ - { 25, 25, 23, 7, 3, 64, ZSTD_btultra }, /* level 20 */ + { 22, 21, 22, 4, 5, 16, ZSTD_btlazy2 }, /* level 15 */ + { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ + { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ + { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */ + { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */ { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */ { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */ }, @@ -3004,6 +3098,8 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l } #endif + DEBUGLOG(4, "ZSTD_getCParams: cLevel=%i, srcSize=%u, dictSize=%u => table %u", + compressionLevel, (U32)srcSizeHint, (U32)dictSize, tableID); if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */ if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel]; @@ -3019,5 +3115,6 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); memset(¶ms, 0, sizeof(params)); params.cParams = cParams; + params.fParams.contentSizeFlag = 1; return params; } diff --git a/thirdparty/zstd/compress/zstd_compress.h b/thirdparty/zstd/compress/zstd_compress_internal.h similarity index 52% rename from thirdparty/zstd/compress/zstd_compress.h rename to thirdparty/zstd/compress/zstd_compress_internal.h index 94606edc93..f104fe981e 100644 --- a/thirdparty/zstd/compress/zstd_compress.h +++ b/thirdparty/zstd/compress/zstd_compress_internal.h @@ -8,6 +8,9 @@ * You may select, at your option, one of the above-listed licenses. */ +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ #ifndef ZSTD_COMPRESS_H #define ZSTD_COMPRESS_H @@ -43,6 +46,95 @@ typedef struct ZSTD_prefixDict_s { ZSTD_dictMode_e dictMode; } ZSTD_prefixDict; +typedef struct { + U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + U32 workspace[HUF_WORKSPACE_SIZE_U32]; + HUF_repeat hufCTable_repeatMode; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + U32* litFreq; /* table of literals statistics, of size 256 */ + U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + /* begin updated by ZSTD_setLog2Prices */ + U32 log2litSum; /* pow2 to compare log2(litfreq) to */ + U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */ + U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ + U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ + /* end : updated by ZSTD_setLog2Prices */ + U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ +} optState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ldmEntry_t* hashTable; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashEveryLog; /* Log number of entries to skip */ +} ldmParams_t; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + U32 forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} -static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} /*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH + * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * `mlBase` : matchLength - MINMATCH */ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) { #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) static const BYTE* g_start = NULL; - U32 const pos = (U32)((const BYTE*)literals - g_start); - if (g_start==NULL) g_start = (const BYTE*)literals; - if ((pos > 0) && (pos < 1000000000)) - DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + } #endif /* copy Literals */ assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); @@ -139,6 +244,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v /* literal Length */ if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } @@ -148,11 +254,12 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v seqStorePtr->sequences[0].offset = offsetCode + 1; /* match Length */ - if (matchCode>0xFFFF) { + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } - seqStorePtr->sequences[0].matchLength = (U16)matchCode; + seqStorePtr->sequences[0].matchLength = (U16)mlBase; seqStorePtr->sequences++; } @@ -161,7 +268,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v /*-************************************* * Match length counter ***************************************/ -static unsigned ZSTD_NbCommonBytes (register size_t val) +static unsigned ZSTD_NbCommonBytes (size_t val) { if (MEM_isLittleEndian()) { if (MEM_64bits()) { @@ -235,13 +342,17 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co const BYTE* const pStart = pIn; const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } - if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } if ((pInhashTable; U32 const hashLog = zc->appliedParams.cParams.hashLog; @@ -40,7 +42,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ U32 const windowLow = zc->lowLimit; - U32 matchEndIdx = current+8; + U32 matchEndIdx = current+8+1; size_t bestLength = 8; #ifdef ZSTD_C_PREDICT U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); @@ -49,12 +51,15 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co predictedLarge += (predictedLarge>0); #endif /* ZSTD_C_PREDICT */ + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ @@ -76,10 +81,11 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co continue; } #endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); @@ -93,16 +99,17 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co matchEndIdx = matchIndex + (U32)matchLength; } - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match+1 is smaller than current */ + /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ } else { /* match is larger than current */ *largerPtr = matchIndex; @@ -114,8 +121,38 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co *smallerPtr = *largerPtr = 0; if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - if (matchEndIdx > current + 8) return matchEndIdx - (current + 8); - return 1; + assert(matchEndIdx > current + 8); + return matchEndIdx - (current + 8); +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls, const U32 extDict) +{ + const BYTE* const base = zc->base; + U32 const target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", + idx, target, extDict); + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict); + zc->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls) +{ + ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/); +} + +void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls) +{ + ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/); } @@ -144,7 +181,7 @@ static size_t ZSTD_insertBtAndFindBestMatch ( const U32 windowLow = zc->lowLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; + U32 matchEndIdx = current+8+1; U32 dummy32; /* to be nullified at the end */ size_t bestLength = 0; @@ -158,8 +195,7 @@ static size_t ZSTD_insertBtAndFindBestMatch ( if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); @@ -172,8 +208,9 @@ static size_t ZSTD_insertBtAndFindBestMatch ( matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } } if (match[matchLength] < ip[matchLength]) { @@ -194,21 +231,12 @@ static size_t ZSTD_insertBtAndFindBestMatch ( *smallerPtr = *largerPtr = 0; - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + assert(matchEndIdx > current+8); + zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ return bestLength; } -void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); -} - /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch ( ZSTD_CCtx* zc, @@ -239,16 +267,6 @@ static size_t ZSTD_BtFindBestMatch_selectMLS ( } -void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); -} - - /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( ZSTD_CCtx* zc, @@ -335,14 +353,14 @@ size_t ZSTD_HcFindBestMatch_generic ( U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - const BYTE* match; size_t currentMl=0; if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; + const BYTE* const match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); } else { - match = dictBase + matchIndex; + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; } @@ -380,10 +398,10 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, + ZSTD_CCtx* const zc, const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) + size_t* const offsetPtr, + U32 const maxNbAttempts, U32 const matchLengthSearch) { switch(matchLengthSearch) { @@ -502,9 +520,8 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, */ /* catch up */ if (offset) { - while ( (start > anchor) - && (start > base+offset-ZSTD_REP_MOVE) - && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); } @@ -516,9 +533,8 @@ _storeSequence: } /* check immediate repcode */ - while ( (ip <= ilimit) - && ((offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h index a9c4daed25..74e1fd6970 100644 --- a/thirdparty/zstd/compress/zstd_lazy.h +++ b/thirdparty/zstd/compress/zstd_lazy.h @@ -11,12 +11,13 @@ #ifndef ZSTD_LAZY_H #define ZSTD_LAZY_H -#include "zstd_compress.h" - #if defined (__cplusplus) extern "C" { #endif +#include "mem.h" /* U32 */ +#include "zstd.h" /* ZSTD_CCtx, size_t */ + U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); diff --git a/thirdparty/zstd/compress/zstd_ldm.h b/thirdparty/zstd/compress/zstd_ldm.h index d6d3d42c33..8f12c677aa 100644 --- a/thirdparty/zstd/compress/zstd_ldm.h +++ b/thirdparty/zstd/compress/zstd_ldm.h @@ -10,12 +10,13 @@ #ifndef ZSTD_LDM_H #define ZSTD_LDM_H -#include "zstd_compress.h" - #if defined (__cplusplus) extern "C" { #endif +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include "zstd.h" /* ZSTD_CCtx, size_t */ + /*-************************************* * Long distance matching ***************************************/ diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c index c47ce23ad5..7171ff5373 100644 --- a/thirdparty/zstd/compress/zstd_opt.c +++ b/thirdparty/zstd/compress/zstd_opt.c @@ -8,36 +8,35 @@ * You may select, at your option, one of the above-listed licenses. */ +#include "zstd_compress_internal.h" #include "zstd_opt.h" -#include "zstd_lazy.h" +#include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */ -#define ZSTD_LITFREQ_ADD 2 -#define ZSTD_FREQ_DIV 4 -#define ZSTD_MAX_PRICE (1<<30) +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + /*-************************************* * Price functions for optimal parser ***************************************/ static void ZSTD_setLog2Prices(optState_t* optPtr) { - optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); - optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); + optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); + optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); - optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum)); } -static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) +static void ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize) { - unsigned u; - - optPtr->cachedLiterals = NULL; - optPtr->cachedPrice = optPtr->cachedLitLength = 0; optPtr->staticPrices = 0; - if (optPtr->litLengthSum == 0) { + if (optPtr->litLengthSum == 0) { /* first init */ + unsigned u; if (srcSize <= 1024) optPtr->staticPrices = 1; assert(optPtr->litFreq!=NULL); @@ -45,44 +44,41 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz optPtr->litFreq[u] = 0; for (u=0; ulitFreq[src[u]]++; - optPtr->litSum = 0; - optPtr->litLengthSum = MaxLL+1; - optPtr->matchLengthSum = MaxML+1; - optPtr->offCodeSum = (MaxOff+1); - optPtr->matchSum = (ZSTD_LITFREQ_ADD<litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV); + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV); optPtr->litSum += optPtr->litFreq[u]; } + for (u=0; u<=MaxLL; u++) optPtr->litLengthFreq[u] = 1; + optPtr->litLengthSum = MaxLL+1; for (u=0; u<=MaxML; u++) optPtr->matchLengthFreq[u] = 1; + optPtr->matchLengthSum = MaxML+1; for (u=0; u<=MaxOff; u++) optPtr->offCodeFreq[u] = 1; - } else { - optPtr->matchLengthSum = 0; - optPtr->litLengthSum = 0; - optPtr->offCodeSum = 0; - optPtr->matchSum = 0; - optPtr->litSum = 0; + optPtr->offCodeSum = (MaxOff+1); + } else { + unsigned u; + + optPtr->litSum = 0; for (u=0; u<=MaxLit; u++) { - optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); optPtr->litSum += optPtr->litFreq[u]; } + optPtr->litLengthSum = 0; for (u=0; u<=MaxLL; u++) { optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); optPtr->litLengthSum += optPtr->litLengthFreq[u]; } + optPtr->matchLengthSum = 0; for (u=0; u<=MaxML; u++) { optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; - optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3); } - optPtr->matchSum *= ZSTD_LITFREQ_ADD; + optPtr->offCodeSum = 0; for (u=0; u<=MaxOff; u++) { optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); optPtr->offCodeSum += optPtr->offCodeFreq[u]; @@ -93,114 +89,146 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz } -static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) +/* ZSTD_rawLiteralsCost() : + * cost of literals (only) in given segment (which length can be null) + * does not include cost of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) { - U32 price, u; - - if (optPtr->staticPrices) - return ZSTD_highbit32((U32)litLength+1) + (litLength*6); - - if (litLength == 0) - return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1); + if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ + if (litLength == 0) return 0; /* literals */ - if (optPtr->cachedLiterals == literals) { - U32 const additional = litLength - optPtr->cachedLitLength; - const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength; - price = optPtr->cachedPrice + additional * optPtr->log2litSum; - for (u=0; u < additional; u++) - price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1); - optPtr->cachedPrice = price; - optPtr->cachedLitLength = litLength; - } else { - price = litLength * optPtr->log2litSum; + { U32 u; + U32 cost = litLength * optPtr->log2litSum; for (u=0; u < litLength; u++) - price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); - - if (litLength >= 12) { - optPtr->cachedLiterals = literals; - optPtr->cachedPrice = price; - optPtr->cachedLitLength = litLength; - } + cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); + return cost; } - - /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); - } - - return price; } - -FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) { - /* offset */ - U32 price; - BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1); - if (optPtr->staticPrices) - return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of the literal part of a sequence, + * including literals themselves, and literalLength symbol */ +static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) +{ + return ZSTD_rawLiteralsCost(literals, litLength, optPtr) + + ZSTD_litLengthPrice(litLength, optPtr); +} + +/* ZSTD_litLengthContribution() : + * @return ( cost(litlength) - cost(0) ) + * this value can then be added to rawLiteralsCost() + * to provide a cost which is directly comparable to a match ending at same position */ +static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) +{ + if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1); + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + int const contribution = LL_bits[llCode] + + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) + - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); +#if 1 + return contribution; +#else + return MAX(0, contribution); /* sometimes better, sometimes not ... */ +#endif + } +} + +/* ZSTD_literalsContribution() : + * creates a fake cost for the literals part of a sequence + * which can be compared to the ending cost of a match + * should a new match start at this position */ +static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) +{ + int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr) + + ZSTD_litLengthContribution(litLength, optPtr); + return contribution; +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice( + U32 const offset, U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->staticPrices) /* fixed scheme, do not use statistics */ + return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode; price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); - if (!ultra && offCode >= 20) price += (offCode-19)*2; + if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + { U32 const mlCode = ZSTD_MLcode(mlBase); price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); } - return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; } - -static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) { - U32 u; - /* literals */ - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + { U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + { U32 const llCode = ZSTD_LLcode(litLength); optPtr->litLengthFreq[llCode]++; optPtr->litLengthSum++; } - /* match offset */ - { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - optPtr->offCodeSum++; + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; } /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); optPtr->matchLengthFreq[mlCode]++; optPtr->matchLengthSum++; } - - ZSTD_setLog2Prices(optPtr); } -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - - -/* function safe only for comparisons */ -static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) { switch (length) { @@ -216,15 +244,14 @@ static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static -U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip) { - U32* const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE* const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + U32* const hashTable3 = cctx->hashTable3; + U32 const hashLog3 = cctx->hashLog3; + const BYTE* const base = cctx->base; + U32 idx = cctx->nextToUpdate3; + U32 const target = cctx->nextToUpdate3 = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); while(idx < target) { hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; @@ -238,102 +265,147 @@ U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) /*-************************************* * Binary Tree search ***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, int const extDict, + U32 nbCompares, U32 const mls, U32 const sufficient_len, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, const U32 lengthToBeat) { const BYTE* const base = zc->base; - const U32 current = (U32)(ip-base); - const U32 hashLog = zc->appliedParams.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32 const current = (U32)(ip-base); + U32 const hashLog = zc->appliedParams.cParams.hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; U32* const hashTable = zc->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); U32 matchIndex = hashTable[h]; U32* const bt = zc->chainTable; - const U32 btLog = zc->appliedParams.cParams.chainLog - 1; - const U32 btMask= (1U << btLog) - 1; + U32 const btLog = zc->appliedParams.cParams.chainLog - 1; + U32 const btMask= (1U << btLog) - 1; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; + U32 const dictLimit = zc->dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; + U32 const btLow = btMask >= current ? 0 : current - btMask; + U32 const windowLow = zc->lowLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; + U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatchLen-1; + size_t bestLength = lengthToBeat-1; + DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches"); - if (minMatch == 3) { /* HC3 match finder */ + /* check repCode */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictBase + repIndex; + assert(current >= windowLow); + if ( extDict /* this case only valid in extDict mode */ + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found rep-match %u of length %u", + repCode - ll0, (U32)repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + if ((matchIndex3 > windowLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); } else { - match = dictBase + matchIndex3; - if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); } /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; - matches[mnum].len = (U32)currentMl; - mnum++; - if (currentMl > ZSTD_OPT_NUM) goto update; - if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ - } - } - } + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + zc->nextToUpdate = current+1; /* skip insertion */ + return 1; + } } } } hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); + U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; + assert(current > matchIndex); if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) { - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; - } + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); } else { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + match = base + matchIndex; /* prepare for match[matchLength] */ } if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + DEBUGLOG(8, "found match of length %u at distance %u", + (U32)matchLength, current - matchIndex); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; matches[mnum].len = (U32)matchLength; mnum++; if (matchLength > ZSTD_OPT_NUM) break; - if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ + if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } } if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ + /* match smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ } else { - /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ @@ -343,65 +415,31 @@ static U32 ZSTD_insertBtAndGetAllMatches ( *smallerPtr = *largerPtr = 0; -update: - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + assert(matchEndIdx > current+8); + zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ return mnum; } -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS ( +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) + const BYTE* ip, const BYTE* const iHighLimit, int const extDict, + U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, U32 const lengthToBeat) { + DEBUGLOG(7, "ZSTD_BtGetAllMatches"); + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch); + else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch); switch(matchLengthSearch) { - case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat); default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat); + case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat); case 7 : - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7 : - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat); } } @@ -409,12 +447,92 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( /*-******************************* * Optimal parser *********************************/ +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + + +typedef struct { + const BYTE* anchor; + U32 litlen; + U32 rawLitCost; +} cachedLiteralPrice_t; + +static U32 ZSTD_rawLiteralsCost_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + U32 startCost; + U32 remainingLength; + const BYTE* startPosition; + + if (anchor == cachedLitPrice->anchor) { + startCost = cachedLitPrice->rawLitCost; + startPosition = anchor + cachedLitPrice->litlen; + assert(litlen >= cachedLitPrice->litlen); + remainingLength = litlen - cachedLitPrice->litlen; + } else { + startCost = 0; + startPosition = anchor; + remainingLength = litlen; + } + + { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr); + cachedLitPrice->anchor = anchor; + cachedLitPrice->litlen = litlen; + cachedLitPrice->rawLitCost = rawLitCost; + return rawLitCost; + } +} + +static U32 ZSTD_fullLiteralsCost_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) + + ZSTD_litLengthPrice(litlen, optStatePtr); +} + +static int ZSTD_literalsContribution_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) + + ZSTD_litLengthContribution(litlen, optStatePtr); + return contribution; +} + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) + const void* src, size_t srcSize, + const int optLevel, const int extDict) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); + seqStore_t* const seqStorePtr = &(ctx->seqStore); + optState_t* const optStatePtr = &(ctx->optState); const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; @@ -423,237 +541,219 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const BYTE* const base = ctx->base; const BYTE* const prefixStart = base + ctx->dictLimit; - const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; - const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; - const U32 mls = ctx->appliedParams.cParams.searchLength; - const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; + U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog; + U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1); + U32 const mls = ctx->appliedParams.cParams.searchLength; + U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - ZSTD_optimal_t* opt = optStatePtr->priceTable; - ZSTD_match_t* matches = optStatePtr->matchTable; - const BYTE* inr; - U32 offset, rep[ZSTD_REP_NUM]; + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + cachedLiteralPrice_t cachedLitPrice; + U32 rep[ZSTD_REP_NUM]; /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); ip += (ip==prefixStart); - { U32 i; for (i=0; irep[i]; } + { int i; for (i=0; irep[i]; } + memset(&cachedLitPrice, 0, sizeof(cachedLitPrice)); /* Match Loop */ while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - litlen = (U32)(ip - anchor); + U32 cur, last_pos = 0; + U32 best_mlen, best_off; - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i=(ip == anchor); i 0) && (repCur < (S32)(ip-prefixStart)) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { - mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - best_off = i - (ip == anchor); - do { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch); + if (!nbMatches) { ip++; continue; } - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie", + nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart)); - if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; + if (maxML > sufficient_len) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + DEBUGLOG(7, "large match (%u>%u), immediate encoding", + best_mlen, sufficient_len); + cur = 0; + last_pos = 1; + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + U32 pos; + U32 matchNb; + for (pos = 0; pos < minMatch; pos++) { + opt[pos].mlen = 1; + opt[pos].price = ZSTD_MAX_PRICE; + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + DEBUGLOG(7, "rPos:%u => set initial price : %u", + pos, matchPrice); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = matchPrice; + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } } + last_pos = pos-1; + } } - /* set prices using matches at position = 0 */ - best_mlen = (last_pos) ? last_pos : minMatch; - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } } - - if (last_pos < minMatch) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } + price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr); + } else { + price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + } + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal", + cur, price, opt[cur].price); + opt[cur].mlen = 1; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); + } } - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; - if (cur == last_pos) break; + if (cur == last_pos) break; - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; + if ( (optLevel==0) /*static*/ + && (opt[cur+1].price <= opt[cur].price) ) + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1. - * offset ZSTD_REP_MOVE_OPT is used for the special case - * litLength == 0, where offset 0 means something special. - * mlen == 1 means the previous byte was stored as a literal, - * so they are mutually exclusive. - */ - assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); - opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } + { U32 const ll0 = (opt[cur].mlen != 1); + U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; + U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0; + U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr); + U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch); + U32 matchNb; + if (!nbMatches) continue; - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u", + cur, nbMatches, maxML); - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + if ( (maxML > sufficient_len) + | (cur + maxML >= ZSTD_OPT_NUM) ) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + last_pos = cur + 1; + goto _shortestPath; } + } - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; - mlen++; - } } } + DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", + pos, price, opt[pos].price); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } else { + if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; cur = last_pos - best_mlen; - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 1); - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } + /* reverse traversal */ + DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); + { U32 selectedMatchLength = best_mlen; + U32 selectedOffset = best_off; + U32 pos = cur; + while (1) { + U32 const mlen = opt[pos].mlen; + U32 const off = opt[pos].off; + opt[pos].mlen = selectedMatchLength; + opt[pos].off = selectedOffset; + selectedMatchLength = mlen; + selectedOffset = off; + if (mlen > pos) break; + pos -= mlen; + } } - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } + /* save sequences */ + { U32 pos; + for (pos=0; pos < last_pos; ) { + U32 const llen = (U32)(ip - anchor); + U32 const mlen = opt[pos].mlen; + U32 const offset = opt[pos].off; + if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */ + pos += mlen; ip += mlen; - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; + /* repcodes update : like ZSTD_updateRep(), but update in place */ + if (offset >= ZSTD_REP_NUM) { /* full offset */ + rep[2] = rep[1]; rep[1] = rep[0]; - rep[0] = best_off; + rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + (llen==0); + if (repCode) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + if (repCode >= 2) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } } - if (litLength==0) offset--; - } - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ + ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen); + ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH); + anchor = ip; + } } + ZSTD_setLog2Prices(optStatePtr); + } /* while (ip < ilimit) */ /* Save reps for next block */ { int i; for (i=0; irepToConfirm[i] = rep[i]; } @@ -665,293 +765,21 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/); } size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); } - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; - const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; - const U32 mls = ctx->appliedParams.cParams.searchLength; - const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = optStatePtr->priceTable; - ZSTD_match_t* matches = optStatePtr->matchTable; - const BYTE* inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { U32 i; for (i=0; irep[i]; } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 current = (U32)(ip-base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i = (ip==anchor); i 0 && repCur <= (S32)current) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip==anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { ip++; continue; } - - { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } } - - if (last_pos < minMatch) { - ip++; continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); - opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos; ) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength==0) offset--; - } - - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { int i; for (i=0; irepToConfirm[i] = rep[i]; } - - /* Return the last literals size */ - return iend - anchor; -} - - size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/); } size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/); } diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h index 816a1fabbf..82e810c293 100644 --- a/thirdparty/zstd/compress/zstd_opt.h +++ b/thirdparty/zstd/compress/zstd_opt.h @@ -11,12 +11,12 @@ #ifndef ZSTD_OPT_H #define ZSTD_OPT_H -#include "zstd_compress.h" - #if defined (__cplusplus) extern "C" { #endif +#include "zstd.h" /* ZSTD_CCtx, size_t */ + size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize); size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize); diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c index 7831cd3bd8..e51edf124f 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.c +++ b/thirdparty/zstd/compress/zstdmt_compress.c @@ -24,7 +24,7 @@ #include /* memcpy, memset */ #include "pool.h" /* threadpool */ #include "threading.h" /* mutex */ -#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstdmt_compress.h" @@ -140,9 +140,12 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) return poolSize + totalBufferSize; } -static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize) +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) { + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); } /** ZSTDMT_getBuffer() : @@ -150,28 +153,31 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize) static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) { size_t const bSize = bufPool->bufferSize; - DEBUGLOG(5, "ZSTDMT_getBuffer"); + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); ZSTD_pthread_mutex_lock(&bufPool->poolMutex); if (bufPool->nbBuffers) { /* try to use an existing buffer */ buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; size_t const availBufferSize = buf.size; bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; - if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) { + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.size); ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); return buf; } /* size conditions not respected : scratch this buffer, create new one */ - DEBUGLOG(5, "existing buffer does not meet size conditions => freeing"); + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); ZSTD_free(buf.start, bufPool->cMem); } ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); /* create new buffer */ - DEBUGLOG(5, "create a new buffer"); + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); { buffer_t buffer; void* const start = ZSTD_malloc(bSize, bufPool->cMem); buffer.start = start; /* note : start can be NULL if malloc fails ! */ buffer.size = (start==NULL) ? 0 : bSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); return buffer; } } @@ -184,12 +190,14 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) ZSTD_pthread_mutex_lock(&bufPool->poolMutex); if (bufPool->nbBuffers < bufPool->totalBuffers) { bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.size, (U32)(bufPool->nbBuffers-1)); ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); return; } ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); /* Reached bufferPool capacity (should not happen) */ - DEBUGLOG(5, "buffer pool capacity reached => freeing "); + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); ZSTD_free(buf.start, bufPool->cMem); } @@ -302,7 +310,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) typedef struct { buffer_t src; const void* srcStart; - size_t dictSize; + size_t prefixSize; size_t srcSize; buffer_t dstBuff; size_t cSize; @@ -324,11 +332,11 @@ typedef struct { void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool); - const void* const src = (const char*)job->srcStart + job->dictSize; + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + const void* const src = (const char*)job->srcStart + job->prefixSize; buffer_t dstBuff = job->dstBuff; - DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", - job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); + DEBUGLOG(5, "ZSTDMT_compressChunk: job (first:%u) (last:%u) : prefixSize %u, srcSize %u ", + job->firstChunk, job->lastChunk, (U32)job->prefixSize, (U32)job->srcSize); if (cctx==NULL) { job->cSize = ERROR(memory_allocation); @@ -342,38 +350,48 @@ void ZSTDMT_compressChunk(void* jobDescription) goto _endJob; } job->dstBuff = dstBuff; + DEBUGLOG(5, "ZSTDMT_compressChunk: received dstBuff of size %u", (U32)dstBuff.size); } - if (job->cdict) { /* should only happen for first segment */ - size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize); - DEBUGLOG(5, "using CDict"); + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize); + DEBUGLOG(4, "ZSTDMT_compressChunk: init using CDict (windowLog=%u)", job->params.cParams.windowLog); + assert(job->firstChunk); /* only allowed for first job */ if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } } else { /* srcStart points at reloaded section */ - if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */ - { ZSTD_CCtx_params jobParams = job->params; - size_t const forceWindowError = - ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk); - /* Force loading dictionary in "content-only" mode (no header analysis) */ - size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, job->srcStart, job->dictSize, ZSTD_dm_rawContent, jobParams, job->fullFrameSize); - if (ZSTD_isError(initError) || ZSTD_isError(forceWindowError)) { + U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : ZSTD_CONTENTSIZE_UNKNOWN; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk); + if (ZSTD_isError(forceWindowError)) { + DEBUGLOG(5, "ZSTD_CCtxParam_setParameter error : %s ", ZSTD_getErrorName(forceWindowError)); + job->cSize = forceWindowError; + goto _endJob; + } + DEBUGLOG(5, "ZSTDMT_compressChunk: invoking ZSTD_compressBegin_advanced_internal with windowLog = %u ", jobParams.cParams.windowLog); + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + NULL, + jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) { + DEBUGLOG(5, "ZSTD_compressBegin_advanced_internal error : %s ", ZSTD_getErrorName(initError)); job->cSize = initError; goto _endJob; - } - } } - if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */ + } } + } + if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0); - if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; } ZSTD_invalidateRepCodes(cctx); } - DEBUGLOG(5, "Compressing : "); - DEBUG_PRINTHEX(4, job->srcStart, 12); + DEBUGLOG(5, "Compressing into dstBuff of size %u", (U32)dstBuff.size); + DEBUG_PRINTHEX(6, job->srcStart, 12); job->cSize = (job->lastChunk) ? ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize); - DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)", + DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u) ", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); - DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize)); + DEBUGLOG(5, "dstBuff.size : %u ; => %s ", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize)); _endJob: ZSTDMT_releaseCCtx(job->cctxPool, cctx); @@ -403,13 +421,14 @@ struct ZSTDMT_CCtx_s { ZSTDMT_CCtxPool* cctxPool; ZSTD_pthread_mutex_t jobCompleted_mutex; ZSTD_pthread_cond_t jobCompleted_cond; + ZSTD_CCtx_params params; size_t targetSectionSize; size_t inBuffSize; size_t dictSize; size_t targetDictSize; inBuff_t inBuff; - ZSTD_CCtx_params params; XXH64_state_t xxhState; + unsigned singleThreaded; unsigned jobIDMask; unsigned doneJobID; unsigned nextJobID; @@ -430,20 +449,32 @@ static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customM nbJobs * sizeof(ZSTDMT_jobDescription), cMem); } -/* Internal only */ -size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads) +/* ZSTDMT_CCtxParam_setNbThreads(): + * Internal use only */ +size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads) { + if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX; + if (nbThreads < 1) nbThreads = 1; params->nbThreads = nbThreads; params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT; params->jobSize = 0; - return 0; + return nbThreads; +} + +/* ZSTDMT_getNbThreads(): + * @return nb threads currently active in mtctx. + * mtctx must be valid */ +size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx) +{ + assert(mtctx != NULL); + return mtctx->params.nbThreads; } ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) { ZSTDMT_CCtx* mtctx; U32 nbJobs = nbThreads + 2; - DEBUGLOG(3, "ZSTDMT_createCCtx_advanced"); + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads); if (nbThreads < 1) return NULL; nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX); @@ -453,7 +484,7 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); if (!mtctx) return NULL; - ZSTDMT_initializeCCtxParameters(&mtctx->params, nbThreads); + ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads); mtctx->cMem = cMem; mtctx->allJobsCompleted = 1; mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem); @@ -545,17 +576,23 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) } /* Internal only */ -size_t ZSTDMT_CCtxParam_setMTCtxParameter( - ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value) { +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, + ZSTDMT_parameter parameter, unsigned value) { + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter"); switch(parameter) { - case ZSTDMT_p_sectionSize : + case ZSTDMT_p_jobSize : + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value); + if ( (value > 0) /* value==0 => automatic job size */ + & (value < ZSTDMT_JOBSIZE_MIN) ) + value = ZSTDMT_JOBSIZE_MIN; params->jobSize = value; - return 0; + return value; case ZSTDMT_p_overlapSectionLog : + if (value > 9) value = 9; DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); params->overlapSizeLog = (value >= 9) ? 9 : value; - return 0; + return value; default : return ERROR(parameter_unsupported); } @@ -563,9 +600,10 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter( size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value) { + DEBUGLOG(4, "ZSTDMT_setMTCtxParameter"); switch(parameter) { - case ZSTDMT_p_sectionSize : + case ZSTDMT_p_jobSize : return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); case ZSTDMT_p_overlapSectionLog : return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); @@ -601,7 +639,7 @@ static size_t ZSTDMT_compress_advanced_internal( size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog); unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads); size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; - size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ + size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ const char* const srcStart = (const char*)src; size_t remainingSrcSize = srcSize; unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */ @@ -610,7 +648,8 @@ static size_t ZSTDMT_compress_advanced_internal( assert(jobParams.nbThreads == 0); assert(mtctx->cctxPool->totalCCtx == params.nbThreads); - DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ", + nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize); if (nbChunks==1) { /* fallback to single-thread mode */ ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); @@ -639,9 +678,9 @@ static size_t ZSTDMT_compress_advanced_internal( mtctx->jobs[u].src = g_nullBuffer; mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize; - mtctx->jobs[u].dictSize = dictSize; + mtctx->jobs[u].prefixSize = dictSize; mtctx->jobs[u].srcSize = chunkSize; - mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL; + mtctx->jobs[u].cdict = (u==0) ? cdict : NULL; mtctx->jobs[u].fullFrameSize = srcSize; mtctx->jobs[u].params = jobParams; /* do not calculate checksum within sections, but write it in header for first section */ @@ -659,7 +698,7 @@ static size_t ZSTDMT_compress_advanced_internal( XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize); } - DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize); + DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)chunkSize); DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12); POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); @@ -753,13 +792,14 @@ size_t ZSTDMT_initCStream_internal( const ZSTD_CDict* cdict, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTDMT_initCStream_internal"); + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ assert(zcs->cctxPool->totalCCtx == params.nbThreads); + zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ - if (params.nbThreads==1) { + if (zcs->singleThreaded) { ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params); DEBUGLOG(4, "single thread mode"); assert(singleThreadParams.nbThreads == 0); @@ -767,6 +807,7 @@ size_t ZSTDMT_initCStream_internal( dict, dictSize, cdict, singleThreadParams, pledgedSrcSize); } + DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads); if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); @@ -777,7 +818,6 @@ size_t ZSTDMT_initCStream_internal( zcs->params = params; zcs->frameContentSize = pledgedSrcSize; if (dict) { - DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal); ZSTD_freeCDict(zcs->cdictLocal); zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */ @@ -785,20 +825,20 @@ size_t ZSTDMT_initCStream_internal( zcs->cdict = zcs->cdictLocal; if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); } else { - DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal); ZSTD_freeCDict(zcs->cdictLocal); zcs->cdictLocal = NULL; zcs->cdict = cdict; } + assert(params.overlapSizeLog <= 9); zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog)); - DEBUGLOG(4, "overlapLog : %u ", params.overlapSizeLog); - DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10)); + DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10)); zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2); - zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); - zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize); - DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10)); + if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN; + if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize); zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize; + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10)); ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) ); zcs->inBuff.buffer = g_nullBuffer; zcs->dictSize = 0; @@ -816,7 +856,7 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params cctxParams = mtctx->params; - DEBUGLOG(5, "ZSTDMT_initCStream_advanced"); + DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); cctxParams.cParams = params.cParams; cctxParams.fParams = params.fParams; return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL, @@ -838,9 +878,12 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, /* ZSTDMT_resetCStream() : - * pledgedSrcSize is optional and can be zero == unknown */ + * pledgedSrcSize can be zero == unknown (for the time being) + * prefer using ZSTD_CONTENTSIZE_UNKNOWN, + * as `0` might mean "empty" in the future */ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) { + if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; if (zcs->params.nbThreads==1) return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize); return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params, @@ -852,7 +895,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { ZSTD_CCtx_params cctxParams = zcs->params; cctxParams.cParams = params.cParams; cctxParams.fParams = params.fParams; - return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, 0); + return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); } @@ -860,12 +903,12 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi { unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize); zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; - zcs->jobs[jobID].dictSize = zcs->dictSize; + zcs->jobs[jobID].prefixSize = zcs->dictSize; assert(zcs->inBuff.filled >= srcSize + zcs->dictSize); zcs->jobs[jobID].params = zcs->params; /* do not calculate checksum within sections, but write it in header for first section */ @@ -911,7 +954,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi zcs->params.fParams.checksumFlag = 0; } } - DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, @@ -930,6 +973,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushNextJob"); if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); while (zcs->jobs[wJobID].jobCompleted==0) { @@ -942,7 +986,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; if (!job.jobScanned) { if (ZSTD_isError(job.cSize)) { - DEBUGLOG(5, "compression error detected "); + DEBUGLOG(5, "job %u : compression error detected : %s", + zcs->doneJobID, ZSTD_getErrorName(job.cSize)); ZSTDMT_waitForAllJobsCompleted(zcs); ZSTDMT_releaseAllJobResources(zcs); return job.cSize; @@ -991,15 +1036,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, { size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize; unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic "); assert(output->pos <= output->size); assert(input->pos <= input->size); + + if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */ + return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); + } + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { /* current frame being ended. Only flush/end are allowed */ return ERROR(stage_wrong); } - if (mtctx->params.nbThreads==1) { /* delegate to single-thread (synchronous) */ - return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); - } /* single-pass shortcut (note : synchronous-mode) */ if ( (mtctx->nextJobID == 0) /* just started */ @@ -1068,32 +1116,34 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu } -static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame) { - size_t const srcSize = zcs->inBuff.filled - zcs->dictSize; + size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize; + DEBUGLOG(5, "ZSTDMT_flushStream_internal"); - if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) - && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { - CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); + if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded)) + && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { + DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job"); + CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); } /* check if there is any data available to flush */ - return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */); + return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */); } -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(5, "ZSTDMT_flushStream"); - if (zcs->params.nbThreads==1) - return ZSTD_flushStream(zcs->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */); + if (mtctx->singleThreaded) + return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */); } -size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(4, "ZSTDMT_endStream"); - if (zcs->params.nbThreads==1) - return ZSTD_endStream(zcs->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */); + if (mtctx->singleThreaded) + return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */); } diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h index 8c59c684f1..d12f0adb8d 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.h +++ b/thirdparty/zstd/compress/zstdmt_compress.h @@ -50,7 +50,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, /* === Streaming functions === */ ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); -ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it may change in the future, to mean "empty" */ ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); @@ -60,8 +60,8 @@ ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /* === Advanced functions and parameters === */ -#ifndef ZSTDMT_SECTION_SIZE_MIN -# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ #endif ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, @@ -84,13 +84,13 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, /* ZSTDMT_parameter : * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ typedef enum { - ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ - ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ + ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ + ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ } ZSTDMT_parameter; /* ZSTDMT_setMTCtxParameter() : * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. - * The function must be called typically after ZSTD_createCCtx(). + * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value); @@ -112,7 +112,15 @@ ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value); -size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads); +/* ZSTDMT_CCtxParam_setNbThreads() + * Set nbThreads, and clamp it correctly, + * also reset jobSize and overlapLog */ +size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads); + +/* ZSTDMT_getNbThreads(): + * @return nb threads currently active in mtctx. + * mtctx must be valid */ +size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx); /*! ZSTDMT_initCStream_internal() : * Private use only. Init streaming operation. diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c index 96fc609089..a59d944112 100644 --- a/thirdparty/zstd/decompress/zstd_decompress.c +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -827,9 +827,9 @@ typedef struct { FSE_DState_t stateOffb; FSE_DState_t stateML; size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* base; + const BYTE* prefixStart; + const BYTE* dictEnd; size_t pos; - uPtrDiff gotoDict; } seqState_t; @@ -1224,8 +1224,9 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long BIT_reloadDStream(&seqState->DStream); { size_t const pos = seqState->pos + seq.litLength; - seq.match = seqState->base + pos - seq.offset; /* single memory segment */ - if (seq.offset > pos) seq.match += seqState->gotoDict; /* separate memory segment */ + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ seqState->pos = pos + seq.matchLength; } @@ -1243,7 +1244,7 @@ HINT_INLINE size_t ZSTD_execSequenceLong(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) + const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -1253,21 +1254,21 @@ size_t ZSTD_execSequenceLong(BYTE* op, const BYTE* match = sequence.match; /* check */ - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); + if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); /* copy Literals */ - ZSTD_copy8(op, *litPtr); + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ if (sequence.litLength > 8) ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; @@ -1277,7 +1278,7 @@ size_t ZSTD_execSequenceLong(BYTE* op, memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; - match = base; + match = prefixStart; if (op > oend_w || sequence.matchLength < MINMATCH) { U32 i; for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; @@ -1331,8 +1332,8 @@ static size_t ZSTD_decompressSequencesLong( BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const prefixStart = (const BYTE*) (dctx->base); + const BYTE* const dictStart = (const BYTE*) (dctx->vBase); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); int nbSeq; @@ -1353,9 +1354,9 @@ static size_t ZSTD_decompressSequencesLong( int seqNb; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } - seqState.base = base; - seqState.pos = (size_t)(op-base); - seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); @@ -1370,9 +1371,9 @@ static size_t ZSTD_decompressSequencesLong( /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNblegacyContext, legacyVersion, output, input); } #endif - return hSize; /* error */ + return hSize; /* error */ } if (hSize != 0) { /* need more input */ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ - if (iend-ip > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip); - zds->lhSize += iend-ip; + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; } input->pos = input->size; return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ @@ -2472,8 +2475,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict); if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") ip = istart + cSize; op += decompressedSize; zds->expected = 0; @@ -2496,8 +2501,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } /* control buffer memory usage */ - DEBUGLOG(4, "Control max buffer memory usage (max %u KB)", - (U32)(zds->maxWindowSize >> 10)); + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); @@ -2555,17 +2561,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB /* fall-through */ case zdss_load: { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); - size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); size_t loadedSize; - if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + } ip += loadedSize; zds->inPos += loadedSize; if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ - { const int isSkipFrame = ZSTD_isSkipFrame(zds); - size_t const decodedSize = ZSTD_decompressContinue(zds, + { size_t const decodedSize = ZSTD_decompressContinue(zds, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, zds->inBuff, neededInSize); if (ZSTD_isError(decodedSize)) return decodedSize; diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h index 2194a3b23d..9ac0a73dce 100644 --- a/thirdparty/zstd/zstd.h +++ b/thirdparty/zstd/zstd.h @@ -59,7 +59,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 3 -#define ZSTD_VERSION_RELEASE 2 +#define ZSTD_VERSION_RELEASE 3 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */ @@ -131,7 +131,7 @@ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t /*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < 128 KB) ? ((128 KB - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ @@ -432,12 +432,12 @@ typedef struct { typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; -/*= Custom memory allocation functions */ +/*--- Custom memory allocation functions ---*/ typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; /* use this constant to defer to stdlib's functions */ -static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL }; +static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*************************************** @@ -446,7 +446,7 @@ static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL }; /*! ZSTD_findFrameCompressedSize() : * `src` should point to the start of a ZSTD encoded frame or skippable frame - * `srcSize` must be at least as large as the frame + * `srcSize` must be >= first frame size * @return : the compressed size of the first frame starting at `src`, * suitable to pass to `ZSTD_decompress` or similar, * or an error code if input is invalid */ @@ -557,7 +557,8 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); * It must outlive context usage. * workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize() * to determine how large workspace must be to support scenario. - * @return : pointer to ZSTD_CCtx*, or NULL if error (size too small) + * @return : pointer to ZSTD_CCtx* (same address as workspace, but different type), + * or NULL if error (typically size too small) * Note : zstd will never resize nor malloc() when using a static cctx. * If it needs more memory than available, it will simply error out. * Note 2 : there is no corresponding "free" function. @@ -587,7 +588,7 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTD_compressionParameters cParams, ZSTD_customMem customMem); -/*! ZSTD_initStaticCDict_advanced() : +/*! ZSTD_initStaticCDict() : * Generate a digested dictionary in provided memory area. * workspace: The memory area to emplace the dictionary into. * Provided pointer must 8-bytes aligned. @@ -596,7 +597,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS * to determine how large workspace must be. * cParams : use ZSTD_getCParams() to transform a compression level * into its relevants cParams. - * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * @return : pointer to ZSTD_CDict* (same address as workspace, but different type), + * or NULL if error (typically, size too small). * Note : there is no corresponding "free" function. * Since workspace was allocated externally, it must be freed externally. */ @@ -613,7 +615,7 @@ ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, uns /*! ZSTD_getParams() : * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. -* All fields of `ZSTD_frameParameters` are set to default (0) */ +* All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_checkCParams() : @@ -660,7 +662,8 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); * It must outlive context usage. * workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize() * to determine how large workspace must be to support scenario. - * @return : pointer to ZSTD_DCtx*, or NULL if error (size too small) + * @return : pointer to ZSTD_DCtx* (same address as workspace, but different type), + * or NULL if error (typically size too small) * Note : zstd will never resize nor malloc() when using a static dctx. * If it needs more memory than available, it will simply error out. * Note 2 : static dctx is incompatible with legacy support @@ -731,20 +734,22 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); /*===== Advanced Streaming compression functions =====*/ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ -ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ +ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */ /*! ZSTD_resetCStream() : * start a new compression job, using same parameters from previous job. * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.. * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * pledgedSrcSize==0 means "srcSize unknown". + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it may change to mean "empty" in some future version, so prefer using macro ZSTD_CONTENTSIZE_UNKNOWN. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); @@ -800,10 +805,10 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompress /*===== Buffer-less streaming compression functions =====*/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); @@ -1000,18 +1005,19 @@ typedef enum { * Special: value 0 means "do not change strategy". */ /* frame parameters */ - ZSTD_p_contentSizeFlag=200, /* Content size is written into frame header _whenever known_ (default:1) - * note that content size must be known at the beginning, - * it is sent using ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression, + * it is provided using ZSTD_CCtx_setPledgedSrcSize() */ ZSTD_p_checksumFlag, /* A 32-bits checksum of content is written at end of frame (default:0) */ - ZSTD_p_dictIDFlag, /* When applicable, dictID of dictionary is provided in frame header (default:1) */ + ZSTD_p_dictIDFlag, /* When applicable, dictionary's ID is written into frame header (default:1) */ /* multi-threading parameters */ ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1) * More threads improve speed, but also increase memory usage. * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled. * Special: value 0 means "do not change nbThreads" */ - ZSTD_p_jobSize, /* Size of a compression job. Each compression job is completed in parallel. + ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode. + * Each compression job is completed in parallel, so indirectly controls the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest * The minimum size is automatically and transparently enforced */ @@ -1057,7 +1063,8 @@ typedef enum { /*! ZSTD_CCtx_setParameter() : * Set one compression parameter, selected by enum ZSTD_cParameter. * Note : when `value` is an enum, cast it to unsigned for proper type checking. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ + * @result : informational value (typically, the one being set, possibly corrected), + * or an error code (which can be tested with ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value); /*! ZSTD_CCtx_setPledgedSrcSize() : @@ -1066,7 +1073,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : 0 means zero, empty. * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - * Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for new compression jobs. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new compression job. * Note 2 : If all data is provided and consumed in a single round, * this value is overriden by srcSize instead. */ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); @@ -1138,13 +1145,19 @@ typedef enum { * - Compression parameters cannot be changed once compression is started. * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - * - @return provides the minimum amount of data still to flush from internal buffers + * - In single-thread mode (default), function is blocking : it completed its job before returning to caller. + * - In multi-thread mode, function is non-blocking : it just acquires a copy of input, and distribute job to internal worker threads, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller. + * - @return provides the minimum amount of data remaining to be flushed from internal buffers * or an error code, which can be tested using ZSTD_isError(). - * if @return != 0, flush is not fully completed, there is some data left within internal buffers. - * - after a ZSTD_e_end directive, if internal buffer is not fully flushed, + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful to determine if a ZSTD_e_flush or ZSTD_e_end directive is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * It is necessary to fully flush internal buffers - * before starting a new compression job, or changing compression parameters. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. */ ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, ZSTD_outBuffer* output,