Update zstd to 1.4.4

(cherry picked from commit 55afd6e784)
2019-11-09 22:31:00 -06:00 · 2019-11-09 22:31:00 -06:00 · bc31b11fdd
parent c563839355
commit bc31b11fdd
27 changed files with 1852 additions and 831 deletions
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -537,7 +537,7 @@ Files extracted from upstream source:
 ## zstd
 - Upstream: https://github.com/facebook/zstd
- Version: 1.4.3
+- Version: 1.4.4
 - License: BSD-3-Clause
 Files extracted from upstream source:
--- a/thirdparty/zstd/common/bitstream.h
+++ b/thirdparty/zstd/common/bitstream.h
@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
        _BitScanReverse ( &r, val );
        return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-        return 31 - __builtin_clz (val);
+        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
        return 31 - __CLZ(val);
 #   else   /* Software version */
@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    assert(bitC->ptr <= bitC->endPtr);
    bitC->bitPos &= 7;
    bitC->bitContainer >>= nbBytes*8;
 }
@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
--- a/thirdparty/zstd/common/compiler.h
+++ b/thirdparty/zstd/common/compiler.h
@ -61,6 +61,13 @@
 #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
 #endif
 /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
 #if defined(__GNUC__)
 #  define UNUSED_ATTR __attribute__((unused))
 #else
 #  define UNUSED_ATTR
 #endif
 /* force no inlining */
 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
@ -127,9 +134,14 @@
    }                                     \
 }
-/* vectorization */
+/* vectorization
 * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
 #if !defined(__clang__) && defined(__GNUC__)
-#  define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
 #    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
 #  else
 #    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
 #  endif
 #else
 #  define DONT_VECTORIZE
 #endif
--- a/thirdparty/zstd/common/fse.h
+++ b/thirdparty/zstd/common/fse.h
@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
--- a/thirdparty/zstd/common/fse_decompress.c
+++ b/thirdparty/zstd/common/fse_decompress.c
@ -52,7 +52,9 @@
 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
 /* check and forward error code */
 #ifndef CHECK_F
 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
 #endif
 /* **************************************************************
--- a/thirdparty/zstd/common/mem.h
+++ b/thirdparty/zstd/common/mem.h
@ -47,6 +47,79 @@ extern "C" {
 #define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
 /* detects whether we are being compiled under msan */
 #if defined (__has_feature)
 #  if __has_feature(memory_sanitizer)
 #    define MEMORY_SANITIZER 1
 #  endif
 #endif
 #if defined (MEMORY_SANITIZER)
 /* Not all platforms that support msan provide sanitizers/msan_interface.h.
 * We therefore declare the functions we need ourselves, rather than trying to
 * include the header file... */
 #include <stdint.h> /* intptr_t */
 /* Make memory region fully initialized (without changing its contents). */
 void __msan_unpoison(const volatile void *a, size_t size);
 /* Make memory region fully uninitialized (without changing its contents).
   This is a legacy interface that does not update origin information. Use
   __msan_allocated_memory() instead. */
 void __msan_poison(const volatile void *a, size_t size);
 /* Returns the offset of the first (at least partially) poisoned byte in the
   memory range, or -1 if the whole range is good. */
 intptr_t __msan_test_shadow(const volatile void *x, size_t size);
 #endif
 /* detects whether we are being compiled under asan */
 #if defined (__has_feature)
 #  if __has_feature(address_sanitizer)
 #    define ADDRESS_SANITIZER 1
 #  endif
 #elif defined(__SANITIZE_ADDRESS__)
 #  define ADDRESS_SANITIZER 1
 #endif
 #if defined (ADDRESS_SANITIZER)
 /* Not all platforms that support asan provide sanitizers/asan_interface.h.
 * We therefore declare the functions we need ourselves, rather than trying to
 * include the header file... */
 /**
 * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
 *
 * This memory must be previously allocated by your program. Instrumented
 * code is forbidden from accessing addresses in this region until it is
 * unpoisoned. This function is not guaranteed to poison the entire region -
 * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
 * alignment restrictions.
 *
 * \note This function is not thread-safe because no two threads can poison or
 * unpoison memory in the same memory region simultaneously.
 *
 * \param addr Start of memory region.
 * \param size Size of memory region. */
 void __asan_poison_memory_region(void const volatile *addr, size_t size);
 /**
 * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
 *
 * This memory must be previously allocated by your program. Accessing
 * addresses in this region is allowed until this region is poisoned again.
 * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
 * to ASan alignment restrictions.
 *
 * \note This function is not thread-safe because no two threads can
 * poison or unpoison memory in the same memory region simultaneously.
 *
 * \param addr Start of memory region.
 * \param size Size of memory region. */
 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 #endif
 /*-**************************************************************
 *  Basic Types
--- a/thirdparty/zstd/common/pool.c
+++ b/thirdparty/zstd/common/pool.c
@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
    ctx->queueTail = 0;
    ctx->numThreadsBusy = 0;
    ctx->queueEmpty = 1;
-    (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+    {
-    (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+        int error = 0;
-    (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
        if (error) { POOL_free(ctx); return NULL; }
    }
    ctx->shutdown = 0;
    /* Allocate space for the thread handles */
    ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
--- a/thirdparty/zstd/common/threading.c
+++ b/thirdparty/zstd/common/threading.c
@ -14,6 +14,8 @@
 * This file will hold wrapper for systems, which do not support pthreads
 */
 #include "threading.h"
 /* create fake symbol to avoid empty translation unit warning */
 int g_ZSTD_threading_useless_symbol;
@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
 /* ===  Dependencies  === */
 #include <process.h>
 #include <errno.h>
 #include "threading.h"
 /* ===  Implementation  === */
@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
 }
 #endif   /* ZSTD_MULTITHREAD */
 #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
 #include <stdlib.h>
 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
 {
    *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
    if (!*mutex)
        return 1;
    return pthread_mutex_init(*mutex, attr);
 }
 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
 {
    if (!*mutex)
        return 0;
    {
        int const ret = pthread_mutex_destroy(*mutex);
        free(*mutex);
        return ret;
    }
 }
 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
 {
    *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
    if (!*cond)
        return 1;
    return pthread_cond_init(*cond, attr);
 }
 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
 {
    if (!*cond)
        return 0;
    {
        int const ret = pthread_cond_destroy(*cond);
        free(*cond);
        return ret;
    }
 }
 #endif
--- a/thirdparty/zstd/common/threading.h
+++ b/thirdparty/zstd/common/threading.h
@ -13,6 +13,8 @@
 #ifndef THREADING_H_938743
 #define THREADING_H_938743
 #include "debug.h"
 #if defined (__cplusplus)
 extern "C" {
 #endif
@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
 */
-#elif defined(ZSTD_MULTITHREAD)   /* posix assumed ; need a better detection method */
+#elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
 /* ===   POSIX Systems   === */
 #  include <pthread.h>
 #if DEBUGLEVEL < 1
 #define ZSTD_pthread_mutex_t            pthread_mutex_t
 #define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
 #define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
 #define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
 #else /* DEBUGLEVEL >= 1 */
 /* Debug implementation of threading.
 * In this implementation we use pointers for mutexes and condition variables.
 * This way, if we forget to init/destroy them the program will crash or ASAN
 * will report leaks.
 */
 #define ZSTD_pthread_mutex_t            pthread_mutex_t*
 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
 #define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
 #define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
 #define ZSTD_pthread_cond_t             pthread_cond_t*
 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
 #define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
 #define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
 #define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
 #define ZSTD_pthread_t                  pthread_t
 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
 #define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
 #endif
 #else  /* ZSTD_MULTITHREAD not defined */
 /* No multithreading support */
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
-#define WILDCOPY_OVERLENGTH 8
+#define WILDCOPY_OVERLENGTH 32
-#define VECLEN 16
+#define WILDCOPY_VECLEN 16
 typedef enum {
    ZSTD_no_overlap,
-    ZSTD_overlap_src_before_dst,
+    ZSTD_overlap_src_before_dst
    /*  ZSTD_overlap_dst_before_src, */
 } ZSTD_overlap_e;
 /*! ZSTD_wildcopy() :
- *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
+ *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
 *  @param ovtype controls the overlap detection
 *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
 *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
 *           The src buffer must be before the dst buffer.
 */
 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
 {
    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = op + length;
-    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
-    if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
+
-      do
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
-          COPY8(op, ip)
+        /* Handle short offset copies. */
-      while (op < oend);
+        do {
-    }
+            COPY8(op, ip)
-    else {
+        } while (op < oend);
-      if ((length & 8) == 0)
+    } else {
-        COPY8(op, ip);
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
-      do {
+        /* Separate out the first two COPY16() calls because the copy length is
         * almost certain to be short, so the branches have different
         * probabilities.
         * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
         * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
         */
        COPY16(op, ip);
      }
      while (op < oend);
    }
 }
 /*! ZSTD_wildcopy_16min() :
 *  same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
 void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
 {
    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = op + length;
    assert(length >= 8);
    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
    if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
      do
          COPY8(op, ip)
      while (op < oend);
    }
    else {
      if ((length & 8) == 0)
        COPY8(op, ip);
      do {
        COPY16(op, ip);
-      }
+        if (op >= oend) return;
-      while (op < oend);
+        do {
            COPY16(op, ip);
            COPY16(op, ip);
        }
        while (op < oend);
    }
 }
 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
 {
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = (BYTE*)dstEnd;
    do
        COPY8(op, ip)
    while (op < oend);
 }
 /*-*******************************************
 *  Private declarations
@ -323,7 +300,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
        _BitScanReverse(&r, val);
        return (unsigned)r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return 31 - __builtin_clz(val);
+        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
        return 31 - __CLZ(val);
 #   else   /* Software version */
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@ -19,6 +19,7 @@
 *  Dependencies
 ***************************************/
 #include "zstd_internal.h"
 #include "zstd_cwksp.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
 #endif
@ -192,6 +193,13 @@ typedef struct {
  size_t capacity; /* The capacity starting from `seq` pointer */
 } rawSeqStore_t;
 typedef struct {
    int collectSequences;
    ZSTD_Sequence* seqStart;
    size_t seqIndex;
    size_t maxSequences;
 } SeqCollector;
 struct ZSTD_CCtx_params_s {
    ZSTD_format_e format;
    ZSTD_compressionParameters cParams;
@ -203,6 +211,9 @@ struct ZSTD_CCtx_params_s {
    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
                                * No target when targetCBlockSize == 0.
                                * There is no guarantee on compressed block size */
    int srcSizeHint;           /* User's best guess of source size.
                                * Hint is not valid when srcSizeHint == 0.
                                * There is no guarantee that hint is close to actual source size */
    ZSTD_dictAttachPref_e attachDictPref;
    ZSTD_literalCompressionMode_e literalCompressionMode;
@ -228,9 +239,7 @@ struct ZSTD_CCtx_s {
    ZSTD_CCtx_params appliedParams;
    U32   dictID;
-    int workSpaceOversizedDuration;
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
    void* workSpace;
    size_t workSpaceSize;
    size_t blockSize;
    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
    unsigned long long consumedSrcSize;
@ -238,6 +247,8 @@ struct ZSTD_CCtx_s {
    XXH64_state_t xxhState;
    ZSTD_customMem customMem;
    size_t staticSize;
    SeqCollector seqCollector;
    int isFirstBlock;
    seqStore_t seqStore;      /* sequences storage ptrs */
    ldmState_t ldmState;      /* long distance matching state */
@ -337,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
    return (srcSize >> minlog) + 2;
 }
 /*! ZSTD_safecopyLiterals() :
 *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
 *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
 *  large copies.
 */
 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
    assert(iend > ilimit_w);
    if (ip <= ilimit_w) {
        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
        op += ilimit_w - ip;
        ip = ilimit_w;
    }
    while (ip < iend) *op++ = *ip++;
 }
 /*! ZSTD_storeSeq() :
- *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
- *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
 *  `mlBase` : matchLength - MINMATCH
 *  Allowed to overread literals up to litLimit.
 */
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR
 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
 {
    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
    BYTE const* const litEnd = literals + litLength;
 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
    static const BYTE* g_start = NULL;
    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
    }
 #endif
    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
    /* copy Literals */
    assert(seqStorePtr->maxNbLit <= 128 KB);
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
+    assert(literals + litLength <= litLimit);
    if (litEnd <= litLimit_w) {
        /* Common case we can use wildcopy.
 	 * First copy 16 bytes, because literals are likely short.
 	 */
        assert(WILDCOPY_OVERLENGTH >= 16);
        ZSTD_copy16(seqStorePtr->lit, literals);
        if (litLength > 16) {
            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
        }
    } else {
        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
    }
    seqStorePtr->lit += litLength;
    /* literal Length */
@ -368,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
    seqStorePtr->sequences[0].litLength = (U16)litLength;
    /* match offset */
-    seqStorePtr->sequences[0].offset = offsetCode + 1;
+    seqStorePtr->sequences[0].offset = offCode + 1;
    /* match Length */
    if (mlBase>0xFFFF) {
@ -910,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                     const ZSTD_CDict* cdict,
-                     ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
@ -925,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                    ZSTD_dictContentType_e dictContentType,
                                    ZSTD_dictTableLoadMethod_e dtlm,
                                    const ZSTD_CDict* cdict,
-                                    ZSTD_CCtx_params params,
+                                    const ZSTD_CCtx_params* params,
                                    unsigned long long pledgedSrcSize);
 /* ZSTD_compress_advanced_internal() :
@ -934,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const void* dict,size_t dictSize,
-                                 ZSTD_CCtx_params params);
+                                 const ZSTD_CCtx_params* params);
 /* ZSTD_writeLastEmptyBlock() :
--- a/thirdparty/zstd/compress/zstd_compress_literals.c
+++ b/thirdparty/zstd/compress/zstd_compress_literals.c
@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                              ZSTD_strategy strategy, int disableLiteralCompression,
                              void* dst, size_t dstCapacity,
                        const void* src, size_t srcSize,
-                              void* workspace, size_t wkspSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
                        const int bmi2)
 {
    size_t const minGain = ZSTD_minGain(srcSize, strategy);
@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
    {   HUF_repeat repeat = prevHuf->repeatMode;
        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
-        cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+        cLitSize = singleStream ?
-                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
+            HUF_compress1X_repeat(
-                                : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
-                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+                255, 11, entropyWorkspace, entropyWorkspaceSize,
                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
            HUF_compress4X_repeat(
                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
                255, 11, entropyWorkspace, entropyWorkspaceSize,
                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
        if (repeat != HUF_repeat_none) {
            /* reused the existing table */
            hType = set_repeat;
--- a/thirdparty/zstd/compress/zstd_compress_literals.h
+++ b/thirdparty/zstd/compress/zstd_compress_literals.h
@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                              ZSTD_strategy strategy, int disableLiteralCompression,
                              void* dst, size_t dstCapacity,
                        const void* src, size_t srcSize,
-                              void* workspace, size_t wkspSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
                        const int bmi2);
 #endif /* ZSTD_COMPRESS_LITERALS_H */
--- a/thirdparty/zstd/compress/zstd_compress_sequences.c
+++ b/thirdparty/zstd/compress/zstd_compress_sequences.c
@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* workspace, size_t workspaceSize)
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
 {
    BYTE* op = (BYTE*)dst;
    const BYTE* const oend = op + dstCapacity;
@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
        memcpy(nextCTable, prevCTable, prevCTableSize);
        return 0;
    case set_basic:
-        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize));  /* note : could be pre-calculated */
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize));  /* note : could be pre-calculated */
        return 0;
    case set_compressed: {
        S16 norm[MaxSeq + 1];
@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
            FORWARD_IF_ERROR(NCountSize);
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
            return NCountSize;
        }
    }
--- a/thirdparty/zstd/compress/zstd_compress_sequences.h
+++ b/thirdparty/zstd/compress/zstd_compress_sequences.h
@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* workspace, size_t workspaceSize);
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
 size_t ZSTD_encodeSequences(
            void* dst, size_t dstCapacity,
--- a/thirdparty/zstd/compress/zstd_cwksp.h
+++ b/thirdparty/zstd/compress/zstd_cwksp.h
@ -0,0 +1,535 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */
 #ifndef ZSTD_CWKSP_H
 #define ZSTD_CWKSP_H
 /*-*************************************
 *  Dependencies
 ***************************************/
 #include "zstd_internal.h"
 #if defined (__cplusplus)
 extern "C" {
 #endif
 /*-*************************************
 *  Constants
 ***************************************/
 /* define "workspace is too large" as this number of times larger than needed */
 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
 /* when workspace is continuously too large
 * during at least this number of times,
 * context's memory usage is considered wasteful,
 * because it's sized to handle a worst case scenario which rarely happens.
 * In which case, resize it down to free some memory */
 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
 /* Since the workspace is effectively its own little malloc implementation /
 * arena, when we run under ASAN, we should similarly insert redzones between
 * each internal element of the workspace, so ASAN will catch overruns that
 * reach outside an object but that stay inside the workspace.
 *
 * This defines the size of that redzone.
 */
 #ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
 #endif
 /*-*************************************
 *  Structures
 ***************************************/
 typedef enum {
    ZSTD_cwksp_alloc_objects,
    ZSTD_cwksp_alloc_buffers,
    ZSTD_cwksp_alloc_aligned
 } ZSTD_cwksp_alloc_phase_e;
 /**
 * Zstd fits all its internal datastructures into a single continuous buffer,
 * so that it only needs to perform a single OS allocation (or so that a buffer
 * can be provided to it and it can perform no allocations at all). This buffer
 * is called the workspace.
 *
 * Several optimizations complicate that process of allocating memory ranges
 * from this workspace for each internal datastructure:
 *
 * - These different internal datastructures have different setup requirements:
 *
 *   - The static objects need to be cleared once and can then be trivially
 *     reused for each compression.
 *
 *   - Various buffers don't need to be initialized at all--they are always
 *     written into before they're read.
 *
 *   - The matchstate tables have a unique requirement that they don't need
 *     their memory to be totally cleared, but they do need the memory to have
 *     some bound, i.e., a guarantee that all values in the memory they've been
 *     allocated is less than some maximum value (which is the starting value
 *     for the indices that they will then use for compression). When this
 *     guarantee is provided to them, they can use the memory without any setup
 *     work. When it can't, they have to clear the area.
 *
 * - These buffers also have different alignment requirements.
 *
 * - We would like to reuse the objects in the workspace for multiple
 *   compressions without having to perform any expensive reallocation or
 *   reinitialization work.
 *
 * - We would like to be able to efficiently reuse the workspace across
 *   multiple compressions **even when the compression parameters change** and
 *   we need to resize some of the objects (where possible).
 *
 * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
 * abstraction was created. It works as follows:
 *
 * Workspace Layout:
 *
 * [                        ... workspace ...                         ]
 * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
 *
 * The various objects that live in the workspace are divided into the
 * following categories, and are allocated separately:
 *
 * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
 *   so that literally everything fits in a single buffer. Note: if present,
 *   this must be the first object in the workspace, since ZSTD_free{CCtx,
 *   CDict}() rely on a pointer comparison to see whether one or two frees are
 *   required.
 *
 * - Fixed size objects: these are fixed-size, fixed-count objects that are
 *   nonetheless "dynamically" allocated in the workspace so that we can
 *   control how they're initialized separately from the broader ZSTD_CCtx.
 *   Examples:
 *   - Entropy Workspace
 *   - 2 x ZSTD_compressedBlockState_t
 *   - CDict dictionary contents
 *
 * - Tables: these are any of several different datastructures (hash tables,
 *   chain tables, binary trees) that all respect a common format: they are
 *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
 *   Their sizes depend on the cparams.
 *
 * - Aligned: these buffers are used for various purposes that require 4 byte
 *   alignment, but don't require any initialization before they're used.
 *
 * - Buffers: these buffers are used for various purposes that don't require
 *   any alignment or initialization before they're used. This means they can
 *   be moved around at no cost for a new compression.
 *
 * Allocating Memory:
 *
 * The various types of objects must be allocated in order, so they can be
 * correctly packed into the workspace buffer. That order is:
 *
 * 1. Objects
 * 2. Buffers
 * 3. Aligned
 * 4. Tables
 *
 * Attempts to reserve objects of different types out of order will fail.
 */
 typedef struct {
    void* workspace;
    void* workspaceEnd;
    void* objectEnd;
    void* tableEnd;
    void* tableValidEnd;
    void* allocStart;
    int allocFailed;
    int workspaceOversizedDuration;
    ZSTD_cwksp_alloc_phase_e phase;
 } ZSTD_cwksp;
 /*-*************************************
 *  Functions
 ***************************************/
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
 MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
    (void)ws;
    assert(ws->workspace <= ws->objectEnd);
    assert(ws->objectEnd <= ws->tableEnd);
    assert(ws->objectEnd <= ws->tableValidEnd);
    assert(ws->tableEnd <= ws->allocStart);
    assert(ws->tableValidEnd <= ws->allocStart);
    assert(ws->allocStart <= ws->workspaceEnd);
 }
 /**
 * Align must be a power of 2.
 */
 MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
    size_t const mask = align - 1;
    assert((align & mask) == 0);
    return (size + mask) & ~mask;
 }
 /**
 * Use this to determine how much space in the workspace we will consume to
 * allocate this object. (Normally it should be exactly the size of the object,
 * but under special conditions, like ASAN, where we pad each object, it might
 * be larger.)
 *
 * Since tables aren't currently redzoned, you don't need to call through this
 * to figure out how much space you need for the matchState tables. Everything
 * else is though.
 */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #else
    return size;
 #endif
 }
 MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
    assert(phase >= ws->phase);
    if (phase > ws->phase) {
        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
                phase >= ZSTD_cwksp_alloc_buffers) {
            ws->tableValidEnd = ws->objectEnd;
        }
        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
                phase >= ZSTD_cwksp_alloc_aligned) {
            /* If unaligned allocations down from a too-large top have left us
             * unaligned, we need to realign our alloc ptr. Technically, this
             * can consume space that is unaccounted for in the neededSpace
             * calculation. However, I believe this can only happen when the
             * workspace is too large, and specifically when it is too large
             * by a larger margin than the space that will be consumed. */
            /* TODO: cleaner, compiler warning friendly way to do this??? */
            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
            if (ws->allocStart < ws->tableValidEnd) {
                ws->tableValidEnd = ws->allocStart;
            }
        }
        ws->phase = phase;
    }
 }
 /**
 * Returns whether this object/buffer/etc was allocated in this workspace.
 */
 MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
 }
 /**
 * Internal function. Do not use directly.
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
    void* alloc;
    void* bottom = ws->tableEnd;
    ZSTD_cwksp_internal_advance_phase(ws, phase);
    alloc = (BYTE *)ws->allocStart - bytes;
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* over-reserve space */
    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
    ZSTD_cwksp_assert_internal_consistency(ws);
    assert(alloc >= bottom);
    if (alloc < bottom) {
        DEBUGLOG(4, "cwksp: alloc failed!");
        ws->allocFailed = 1;
        return NULL;
    }
    if (alloc < ws->tableValidEnd) {
        ws->tableValidEnd = alloc;
    }
    ws->allocStart = alloc;
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
     * either size. */
    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
    __asan_unpoison_memory_region(alloc, bytes);
 #endif
    return alloc;
 }
 /**
 * Reserves and returns unaligned memory.
 */
 MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
 }
 /**
 * Reserves and returns memory sized on and aligned on sizeof(unsigned).
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
    assert((bytes & (sizeof(U32)-1)) == 0);
    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
 }
 /**
 * Aligned on sizeof(unsigned). These buffers have the special property that
 * their values remain constrained, allowing us to re-use them without
 * memset()-ing them.
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
    void* alloc = ws->tableEnd;
    void* end = (BYTE *)alloc + bytes;
    void* top = ws->allocStart;
    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
    assert((bytes & (sizeof(U32)-1)) == 0);
    ZSTD_cwksp_internal_advance_phase(ws, phase);
    ZSTD_cwksp_assert_internal_consistency(ws);
    assert(end <= top);
    if (end > top) {
        DEBUGLOG(4, "cwksp: table alloc failed!");
        ws->allocFailed = 1;
        return NULL;
    }
    ws->tableEnd = end;
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    __asan_unpoison_memory_region(alloc, bytes);
 #endif
    return alloc;
 }
 /**
 * Aligned on sizeof(void*).
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
    void* alloc = ws->objectEnd;
    void* end = (BYTE*)alloc + roundedBytes;
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* over-reserve space */
    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
    DEBUGLOG(5,
        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
    assert((bytes & (sizeof(void*)-1)) == 0);
    ZSTD_cwksp_assert_internal_consistency(ws);
    /* we must be in the first phase, no advance is possible */
    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
        DEBUGLOG(4, "cwksp: object alloc failed!");
        ws->allocFailed = 1;
        return NULL;
    }
    ws->objectEnd = end;
    ws->tableEnd = end;
    ws->tableValidEnd = end;
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
     * either size. */
    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
    __asan_unpoison_memory_region(alloc, bytes);
 #endif
    return alloc;
 }
 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
    /* To validate that the table re-use logic is sound, and that we don't
     * access table space that we haven't cleaned, we re-"poison" the table
     * space every time we mark it dirty. */
    {
        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
        __msan_poison(ws->objectEnd, size);
    }
 #endif
    assert(ws->tableValidEnd >= ws->objectEnd);
    assert(ws->tableValidEnd <= ws->allocStart);
    ws->tableValidEnd = ws->objectEnd;
    ZSTD_cwksp_assert_internal_consistency(ws);
 }
 MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
    assert(ws->tableValidEnd >= ws->objectEnd);
    assert(ws->tableValidEnd <= ws->allocStart);
    if (ws->tableValidEnd < ws->tableEnd) {
        ws->tableValidEnd = ws->tableEnd;
    }
    ZSTD_cwksp_assert_internal_consistency(ws);
 }
 /**
 * Zero the part of the allocated tables not already marked clean.
 */
 MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
    assert(ws->tableValidEnd >= ws->objectEnd);
    assert(ws->tableValidEnd <= ws->allocStart);
    if (ws->tableValidEnd < ws->tableEnd) {
        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
    }
    ZSTD_cwksp_mark_tables_clean(ws);
 }
 /**
 * Invalidates table allocations.
 * All other allocations remain valid.
 */
 MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: clearing tables!");
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    {
        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
        __asan_poison_memory_region(ws->objectEnd, size);
    }
 #endif
    ws->tableEnd = ws->objectEnd;
    ZSTD_cwksp_assert_internal_consistency(ws);
 }
 /**
 * Invalidates all buffer, aligned, and table allocations.
 * Object allocations remain valid.
 */
 MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: clearing!");
 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
    /* To validate that the context re-use logic is sound, and that we don't
     * access stuff that this compression hasn't initialized, we re-"poison"
     * the workspace (or at least the non-static, non-table parts of it)
     * every time we start a new compression. */
    {
        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
        __msan_poison(ws->tableValidEnd, size);
    }
 #endif
 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    {
        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
        __asan_poison_memory_region(ws->objectEnd, size);
    }
 #endif
    ws->tableEnd = ws->objectEnd;
    ws->allocStart = ws->workspaceEnd;
    ws->allocFailed = 0;
    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
        ws->phase = ZSTD_cwksp_alloc_buffers;
    }
    ZSTD_cwksp_assert_internal_consistency(ws);
 }
 /**
 * The provided workspace takes ownership of the buffer [start, start+size).
 * Any existing values in the workspace are ignored (the previously managed
 * buffer, if present, must be separately freed).
 */
 MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
    ws->workspace = start;
    ws->workspaceEnd = (BYTE*)start + size;
    ws->objectEnd = ws->workspace;
    ws->tableValidEnd = ws->objectEnd;
    ws->phase = ZSTD_cwksp_alloc_objects;
    ZSTD_cwksp_clear(ws);
    ws->workspaceOversizedDuration = 0;
    ZSTD_cwksp_assert_internal_consistency(ws);
 }
 MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
    void* workspace = ZSTD_malloc(size, customMem);
    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
    RETURN_ERROR_IF(workspace == NULL, memory_allocation);
    ZSTD_cwksp_init(ws, workspace, size);
    return 0;
 }
 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
    void *ptr = ws->workspace;
    DEBUGLOG(4, "cwksp: freeing workspace");
    memset(ws, 0, sizeof(ZSTD_cwksp));
    ZSTD_free(ptr, customMem);
 }
 /**
 * Moves the management of a workspace from one cwksp to another. The src cwksp
 * is left in an invalid state (src must be re-init()'ed before its used again).
 */
 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
    *dst = *src;
    memset(src, 0, sizeof(ZSTD_cwksp));
 }
 MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
 }
 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
    return ws->allocFailed;
 }
 /*-*************************************
 *  Functions Checking Free Space
 ***************************************/
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
 }
 MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
 }
 MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
    return ZSTD_cwksp_check_available(
        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
 }
 MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
 }
 MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
        ws->workspaceOversizedDuration++;
    } else {
        ws->workspaceOversizedDuration = 0;
    }
 }
 #if defined (__cplusplus)
 }
 #endif
 #endif /* ZSTD_CWKSP_H */
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }
@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }
@ -247,7 +247,7 @@ _match_found:
        offset_2 = offset_1;
        offset_1 = offset;
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
 _match_stored:
        /* match found */
@ -278,7 +278,7 @@ _match_stored:
                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                        ip += repLength2;
@ -297,7 +297,7 @@ _match_stored:
                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
                    ip += rLength;
                    anchor = ip;
                    continue;   /* faster when present ... (?) */
@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else {
            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                }
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            } else {
                ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                    ip += repLength2;
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@ -8,7 +8,7 @@
 * You may select, at your option, one of the above-listed licenses.
 */
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
 #include "zstd_fast.h"
@ -43,8 +43,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 }
-FORCE_INLINE_TEMPLATE
+FORCE_INLINE_TEMPLATE size_t
-size_t ZSTD_compressBlock_fast_generic(
+ZSTD_compressBlock_fast_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
        U32 const mls)
@ -74,8 +74,7 @@ size_t ZSTD_compressBlock_fast_generic(
    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
    ip0 += (ip0 == prefixStart);
    ip1 = ip0 + 1;
-    {
+    {   U32 const maxRep = (U32)(ip0 - prefixStart);
        U32 const maxRep = (U32)(ip0 - prefixStart);
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }
@ -118,8 +117,7 @@ size_t ZSTD_compressBlock_fast_generic(
            match0 = match1;
            goto _offset;
        }
-        {
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
            size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
            assert(step >= 2);
            ip0 += step;
            ip1 += step;
@ -138,7 +136,7 @@ _offset: /* Requires: ip0, match0 */
 _match: /* Requires: ip0, match0, offcode */
        /* Count the forward length */
        mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
-        ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
        /* match found */
        ip0 += mLength;
        anchor = ip0;
@ -150,16 +148,15 @@ _match: /* Requires: ip0, match0, offcode */
            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
-            while ( (ip0 <= ilimit)
+            while ( ((ip0 <= ilimit) & (offset_2>0))  /* offset_2==0 means offset_2 is invalidated */
-                 && ( (offset_2>0)
+                 && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
                    & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
                /* store sequence */
                size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
-                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
                ip0 += rLength;
                ip1 = ip0 + 1;
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
                anchor = ip0;
                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
            }
@ -179,8 +176,7 @@ size_t ZSTD_compressBlock_fast(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32 const mls = ms->cParams.minMatch;
    U32 const mls = cParams->minMatch;
    assert(ms->dictMatchState == NULL);
    switch(mls)
    {
@ -265,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else if ( (matchIndex <= prefixStartIndex) ) {
            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
            U32 const dictMatchIndex = dictHashTable[dictHash];
@ -285,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                } /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            }
        } else if (MEM_read32(match) != MEM_read32(ip)) {
            /* it's not a match, and we're not going to check the dictionary */
@ -300,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }
        /* match found */
@ -325,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
@ -348,8 +344,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32 const mls = ms->cParams.minMatch;
    U32 const mls = cParams->minMatch;
    assert(ms->dictMatchState != NULL);
    switch(mls)
    {
@ -408,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        const U32    repIndex = current + 1 - offset_1;
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
        const BYTE* const repMatch = repBase + repIndex;
        size_t mLength;
        hashTable[h] = current;   /* update hash table */
        assert(offset_1 <= current +1);   /* check repIndex */
        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
            ip += rLength;
            anchor = ip;
        } else {
            if ( (matchIndex < dictStartIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
@ -427,19 +423,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            }
            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 offset;
+                U32 const offset = current - matchIndex;
-                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
-                offset = current - matchIndex;
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                offset_2 = offset_1;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
-                offset_1 = offset;
+                ip += mLength;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                anchor = ip;
        }   }
        /* found a match : store it */
        ip += mLength;
        anchor = ip;
        if (ip <= ilimit) {
            /* Fill Table */
            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@ -448,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
@ -476,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32 const mls = ms->cParams.minMatch;
    U32 const mls = cParams->minMatch;
    switch(mls)
    {
    default: /* includes case 3 */
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }
@ -828,7 +828,7 @@ _storeSequence:
                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                    ip += matchLength;
                    anchor = ip;
                    continue;
@ -843,7 +843,7 @@ _storeSequence:
                /* store sequence */
                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }
@ -1066,7 +1066,7 @@ _storeSequence:
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
 {
    size_t const ldmHSize = ((size_t)1) << params.hashLog;
    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
-    size_t const ldmBucketSize =
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
-        ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
-    size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
    return params.enableLdm ? totalSize : 0;
 }
@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
                rep[i] = rep[i-1];
            rep[0] = sequence.offset;
            /* Store the sequence */
-            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
                          sequence.offset + ZSTD_REP_MOVE,
                          sequence.matchLength - MINMATCH);
            ip += sequence.matchLength;
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@ -1098,7 +1098,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
                    assert(anchor + llen <= iend);
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
                    anchor += advance;
                    ip = anchor;
            }   }
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
    /* init */
    if (job->cdict) {
-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
        assert(job->firstJob);  /* only allowed for first job */
        if (ZSTD_isError(initError)) JOB_ERROR(initError);
    } else {  /* srcStart points at reloaded section */
@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                        ZSTD_dtlm_fast,
                                        NULL, /*cdict*/
-                                        jobParams, pledgedSrcSize);
+                                        &jobParams, pledgedSrcSize);
            if (ZSTD_isError(initError)) JOB_ERROR(initError);
    }   }
@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
    unsigned jobID;
    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
        /* Copy the mutex/cond out */
        ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
        ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
        DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
-        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+
-        mtctx->jobs[jobID].cSize = 0;
+        /* Clear the job description, but keep the mutex/cond */
        memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
        mtctx->jobs[jobID].job_mutex = mutex;
        mtctx->jobs[jobID].job_cond = cond;
    }
    memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
    mtctx->inBuff.buffer = g_nullBuffer;
    mtctx->inBuff.filled = 0;
    mtctx->allJobsCompleted = 1;
@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
 /* Sets parameters relevant to the compression job,
 * initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
 {
-    ZSTD_CCtx_params jobParams = params;
+    ZSTD_CCtx_params jobParams = *params;
    /* Clear parameters related to multithreading */
    jobParams.forceWindow = 0;
    jobParams.nbWorkers = 0;
@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
 /* =====   Multi-threaded compression   ===== */
 /* ------------------------------------------ */
-static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
 {
    unsigned jobLog;
-    if (params.ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead. */
-        jobLog = MAX(21, params.cParams.chainLog + 4);
+        jobLog = MAX(21, params->cParams.chainLog + 4);
    } else {
-        jobLog = MAX(20, params.cParams.windowLog + 2);
+        jobLog = MAX(20, params->cParams.windowLog + 2);
    }
    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
 }
@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
    return ovlog;
 }
-static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
 {
-    int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
+    int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
-    int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
+    int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
    assert(0 <= overlapRLog && overlapRLog <= 8);
-    if (params.ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead.
         * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
-        ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+        ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
                - overlapRLog;
    }
    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
-    DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
+    DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
 }
 static unsigned
-ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
+ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
 {
    assert(nbWorkers>0);
    {   size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
          const ZSTD_CDict* cdict,
                ZSTD_CCtx_params params)
 {
-    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
+    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
-    size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
+    size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
-    unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+    unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
    const char* const srcStart = (const char*)src;
@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
-        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
+        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
    }
    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
    if (mtctx->singleBlockingThread) {
-        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
        assert(singleThreadParams.nbWorkers == 0);
        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
                                         dict, dictSize, cdict,
-                                         singleThreadParams, pledgedSrcSize);
+                                         &singleThreadParams, pledgedSrcSize);
    }
    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
        mtctx->cdict = cdict;
    }
-    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
+    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
    DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
    mtctx->targetSectionSize = params.jobSize;
    if (mtctx->targetSectionSize == 0) {
-        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
+        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
    }
    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
--- a/thirdparty/zstd/decompress/huf_decompress.c
+++ b/thirdparty/zstd/decompress/huf_decompress.c
@ -61,7 +61,9 @@
 *  Error Management
 ****************************************************************/
 #define HUF_isError ERR_isError
 #ifndef CHECK_F
 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
 #endif
 /* **************************************************************
--- a/thirdparty/zstd/decompress/zstd_decompress.c
+++ b/thirdparty/zstd/decompress/zstd_decompress.c
@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
 {
-    size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
                    ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
                    ZSTD_FRAMEHEADERSIZE_PREFIX;
    ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
    return startingInputLength;
@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
 {
    unsigned long long totalDstSize = 0;
-    while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
        U32 const magicNumber = MEM_readLE32(src);
        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
    /* check */
    RETURN_ERROR_IF(
-        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
        srcSize_wrong);
    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
                        srcSize_wrong);
@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
        dictSize = ZSTD_DDict_dictSize(ddict);
    }
-    while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
        if (ZSTD_isLegacy(src, srcSize)) {
@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
        for (i=0; i<3; i++) {
            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
-            RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
                            dictionary_corrupted);
            entropy->rep[i] = rep;
    }   }
@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
 {
    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
    ZSTD_clearDict(dctx);
-    if (dict && dictSize >= 8) {
+    if (dict && dictSize != 0) {
        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
        dctx->ddict = dctx->ddictLocal;
@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
 /* ZSTD_initDStream_usingDict() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
 * this function cannot fail */
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
 {
    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
-    return ZSTD_FRAMEHEADERSIZE_PREFIX;
+    return ZSTD_startingInputLength(zds->format);
 }
 /* note : this variant can't fail */
@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
 {
    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
-    return ZSTD_FRAMEHEADERSIZE_PREFIX;
+    return ZSTD_startingInputLength(dctx->format);
 }
 /* ZSTD_resetDStream() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
 * this function cannot fail */
 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
 {
    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
-    return ZSTD_FRAMEHEADERSIZE_PREFIX;
+    return ZSTD_startingInputLength(dctx->format);
 }
@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                            zds->lhSize += remainingInput;
                        }
                        input->pos = input->size;
-                        return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                    }
                    assert(ip != NULL);
                    memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
--- a/thirdparty/zstd/decompress/zstd_decompress_block.c
+++ b/thirdparty/zstd/decompress/zstd_decompress_block.c
@ -573,38 +573,118 @@ typedef struct {
    size_t pos;
 } seqState_t;
 /*! ZSTD_overlapCopy8() :
 *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
 *  If the offset is < 8 then the offset is spread to at least 8 bytes.
 *
 *  Precondition: *ip <= *op
 *  Postcondition: *op - *op >= 8
 */
 static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
    assert(*ip <= *op);
    if (offset < 8) {
        /* close range match, overlap */
        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
        int const sub2 = dec64table[offset];
        (*op)[0] = (*ip)[0];
        (*op)[1] = (*ip)[1];
        (*op)[2] = (*ip)[2];
        (*op)[3] = (*ip)[3];
        *ip += dec32table[offset];
        ZSTD_copy4(*op+4, *ip);
        *ip -= sub2;
    } else {
        ZSTD_copy8(*op, *ip);
    }
    *ip += 8;
    *op += 8;
    assert(*op - *ip >= 8);
 }
-/* ZSTD_execSequenceLast7():
+/*! ZSTD_safecopy() :
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
- * requires more careful checks, to ensure there is no overflow.
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
- * performance does not matter though.
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
- * note : this case is supposed to be never generated "naturally" by reference encoder,
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
- *        since in most cases it needs at least 8 bytes to look for a match.
+ *
- *        but it's allowed by the specification. */
+ *  @param ovtype controls the overlap detection
 *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
 *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
 *           The src buffer must be before the dst buffer.
 */
 static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
    ptrdiff_t const diff = op - ip;
    BYTE* const oend = op + length;
    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
    if (length < 8) {
        /* Handle short lengths. */
        while (op < oend) *op++ = *ip++;
        return;
    }
    if (ovtype == ZSTD_overlap_src_before_dst) {
        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
        assert(length >= 8);
        ZSTD_overlapCopy8(&op, &ip, diff);
        assert(op - ip >= 8);
        assert(op <= oend);
    }
    if (oend <= oend_w) {
        /* No risk of overwrite. */
        ZSTD_wildcopy(op, ip, length, ovtype);
        return;
    }
    if (op <= oend_w) {
        /* Wildcopy until we get close to the end. */
        assert(oend > oend_w);
        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
        ip += oend_w - op;
        op = oend_w;
    }
    /* Handle the leftovers. */
    while (op < oend) *op++ = *ip++;
 }
 /* ZSTD_execSequenceEnd():
 * This version handles cases that are near the end of the output buffer. It requires
 * more careful checks to make sure there is no overflow. By separating out these hard
 * and unlikely cases, we can speed up the common cases.
 *
 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
 */
 FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE* op,
+size_t ZSTD_execSequenceEnd(BYTE* op,
-                              BYTE* const oend, seq_t sequence,
+                            BYTE* const oend, seq_t sequence,
-                              const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE** litPtr, const BYTE* const litLimit,
-                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
 {
    BYTE* const oLitEnd = op + sequence.litLength;
    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;
    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
-    /* check */
+    /* bounds checks */
-    RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
+    assert(oLitEnd < oMatchEnd);
    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
    /* copy literals */
-    while (op < oLitEnd) *op++ = *(*litPtr)++;
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
    op = oLitEnd;
    *litPtr = iLitEnd;
    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - base)) {
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
-        match = dictEnd - (base-match);
+        match = dictEnd - (prefixStart-match);
        if (match + sequence.matchLength <= dictEnd) {
            memmove(oLitEnd, match, sequence.matchLength);
            return sequenceLength;
@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
            memmove(oLitEnd, match, length1);
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
-            match = base;
+            match = prefixStart;
    }   }
-    while (op < oMatchEnd) *op++ = *match++;
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
    return sequenceLength;
 }
 HINT_INLINE
 size_t ZSTD_execSequence(BYTE* op,
                         BYTE* const oend, seq_t sequence,
@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;
-    /* check */
+    /* Errors and uncommon cases handled here. */
-    RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
+    assert(oLitEnd < oMatchEnd);
-    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
+    if (iLitEnd > litLimit || oMatchEnd > oend_w)
-    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
-    /* copy Literals */
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
-    if (sequence.litLength > 8)
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
-        ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
-    else
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
-        ZSTD_copy8(op, *litPtr);
+
    /* Copy Literals:
     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
     * We likely don't need the full 32-byte wildcopy.
     */
    assert(WILDCOPY_OVERLENGTH >= 16);
    ZSTD_copy16(op, (*litPtr));
    if (sequence.litLength > 16) {
        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
    }
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */
-    /* copy Match */
+    /* Copy Match */
    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix -> go into extDict */
        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
            match = prefixStart;
            if (op > oend_w || sequence.matchLength < MINMATCH) {
              U32 i;
              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
              return sequenceLength;
            }
    }   }
-    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+    /* Match within prefix of 1 or more bytes */
    assert(op <= oMatchEnd);
    assert(oMatchEnd <= oend_w);
    assert(match >= prefixStart);
    assert(sequence.matchLength >= 1);
-    /* match within prefix */
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
-    if (sequence.offset < 8) {
+     * without overlap checking.
-        /* close range match, overlap */
+     */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    if (sequence.offset >= WILDCOPY_VECLEN) {
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        /* We bet on a full wildcopy for matches, since we expect matches to be
-        int const sub2 = dec64table[sequence.offset];
+         * longer than literals (in general). In silesia, ~10% of matches are longer
-        op[0] = match[0];
+         * than 16 bytes.
-        op[1] = match[1];
+         */
-        op[2] = match[2];
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
-        op[3] = match[3];
+        return sequenceLength;
        match += dec32table[sequence.offset];
        ZSTD_copy4(op+4, match);
        match -= sub2;
    } else {
        ZSTD_copy8(op, match);
    }
-    op += 8; match += 8;
+    assert(sequence.offset < WILDCOPY_VECLEN);
-    if (oMatchEnd > oend-(16-MINMATCH)) {
+    /* Copy 8 bytes and spread the offset to be >= 8. */
-        if (op < oend_w) {
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
            match += oend_w - op;
            op = oend_w;
        }
        while (op < oMatchEnd) *op++ = *match++;
    } else {
        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
    }
    return sequenceLength;
 }
-
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
-HINT_INLINE
+    if (sequence.matchLength > 8) {
-size_t ZSTD_execSequenceLong(BYTE* op,
+        assert(op < oMatchEnd);
-                             BYTE* const oend, seq_t sequence,
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
                             const BYTE** litPtr, const BYTE* const litLimit,
                             const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
 {
    BYTE* const oLitEnd = op + sequence.litLength;
    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = sequence.match;
    /* check */
    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
    if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
    /* copy Literals */
    if (sequence.litLength > 8)
        ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
    else
        ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */
    /* copy Match */
    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix */
        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
        if (match + sequence.matchLength <= dictEnd) {
            memmove(oLitEnd, match, sequence.matchLength);
            return sequenceLength;
        }
        /* span extDict & currentPrefixSegment */
        {   size_t const length1 = dictEnd - match;
            memmove(oLitEnd, match, length1);
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
            match = prefixStart;
            if (op > oend_w || sequence.matchLength < MINMATCH) {
              U32 i;
              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
              return sequenceLength;
            }
    }   }
    assert(op <= oend_w);
    assert(sequence.matchLength >= MINMATCH);
    /* match within prefix */
    if (sequence.offset < 8) {
        /* close range match, overlap */
        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
        int const sub2 = dec64table[sequence.offset];
        op[0] = match[0];
        op[1] = match[1];
        op[2] = match[2];
        op[3] = match[3];
        match += dec32table[sequence.offset];
        ZSTD_copy4(op+4, match);
        match -= sub2;
    } else {
        ZSTD_copy8(op, match);
    }
    op += 8; match += 8;
    if (oMatchEnd > oend-(16-MINMATCH)) {
        if (op < oend_w) {
            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
            match += oend_w - op;
            op = oend_w;
        }
        while (op < oMatchEnd) *op++ = *match++;
    } else {
        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
    }
    return sequenceLength;
 }
@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
        /* decode and decompress */
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
            seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
            sequences[seqNb & STORED_SEQS_MASK] = sequence;
@ -1109,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
        /* finish queue */
        seqNb -= seqAdvance;
        for ( ; seqNb<nbSeq ; seqNb++) {
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            op += oneSeqSize;
        }
--- a/thirdparty/zstd/zstd.h
+++ b/thirdparty/zstd/zstd.h
@ -15,6 +15,7 @@ extern "C" {
 #define ZSTD_H_235446
 /* ======   Dependency   ======*/
 #include <limits.h>   /* INT_MAX */
 #include <stddef.h>   /* size_t */
@ -71,7 +72,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  3
+#define ZSTD_VERSION_RELEASE  4
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
@ -196,9 +197,13 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 /*! ZSTD_compressCCtx() :
- *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- *  The function will compress at requested compression level,
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
- *  ignoring any other parameter */
+ *  this function compresses at requested compression level,
 *  __ignoring any other parameter__ .
 *  If any advanced parameter was set using the advanced API,
 *  they will all be reset. Only `compressionLevel` remains.
 */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
                                     void* dst, size_t dstCapacity,
                               const void* src, size_t srcSize,
@ -233,7 +238,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
 *   using ZSTD_CCtx_set*() functions.
 *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
 *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- *   They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
 *
 *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
 *
@ -261,18 +266,26 @@ typedef enum {
    /* compression parameters
     * Note: When compressing with a ZSTD_CDict these parameters are superseded
-     * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
+     * by the parameters used to construct the ZSTD_CDict.
-     * for more info (superseded-by-cdict). */
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
-    ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
                              * Note that exact compression parameters are dynamically determined,
                              * depending on both compression level and srcSize (when known).
                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
                              * Note 1 : it's possible to pass a negative compression level.
-                              * Note 2 : setting a level sets all default values of other compression parameters */
+                              * Note 2 : setting a level resets all other compression parameters to default */
    /* Advanced compression parameters :
     * It's possible to pin down compression parameters to some specific values.
     * In which case, these values are no longer dynamically selected by the compressor */
    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
                              * This will set a memory budget for streaming decompression,
                              * with larger values requiring more memory
                              * and typically compressing more.
                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
                              * Special: value 0 means "use default windowLog".
                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
-                              *       requires explicitly allowing such window size at decompression stage if using streaming. */
+                              *       requires explicitly allowing such size at streaming decompression stage. */
    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
                              * Resulting memory usage is (1 << (hashLog+2)).
                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@ -283,13 +296,13 @@ typedef enum {
                              * Resulting memory usage is (1 << (chainLog+2)).
                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
                              * Larger tables result in better and slower compression.
-                              * This parameter is useless when using "fast" strategy.
+                              * This parameter is useless for "fast" strategy.
                              * It's still useful when using "dfast" strategy,
                              * in which case it defines a secondary probe table.
                              * Special: value 0 means "use default chainLog". */
    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
                              * More attempts result in better and slower compression.
-                              * This parameter is useless when using "fast" and "dFast" strategies.
+                              * This parameter is useless for "fast" and "dFast" strategies.
                              * Special: value 0 means "use default searchLog". */
    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
                              * Note that Zstandard can still find matches of smaller size,
@ -344,7 +357,7 @@ typedef enum {
    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
                              * Content size must be known at the beginning of compression.
                              * This is automatically the case when using ZSTD_compress2(),
-                              * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
@ -363,7 +376,7 @@ typedef enum {
                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                              * 0 means default, which is dynamically determined based on compression parameters.
                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
-                              * The minimum size is automatically and transparently enforced */
+                              * The minimum size is automatically and transparently enforced. */
    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
                              * It helps preserve compression ratio, while each job is compressed in parallel.
@ -386,6 +399,7 @@ typedef enum {
     * ZSTD_c_forceAttachDict
     * ZSTD_c_literalCompressionMode
     * ZSTD_c_targetCBlockSize
     * ZSTD_c_srcSizeHint
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@ -396,6 +410,7 @@ typedef enum {
     ZSTD_c_experimentalParam4=1001,
     ZSTD_c_experimentalParam5=1002,
     ZSTD_c_experimentalParam6=1003,
     ZSTD_c_experimentalParam7=1004
 } ZSTD_cParameter;
 typedef struct {
@ -793,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 typedef struct ZSTD_CDict_s ZSTD_CDict;
 /*! ZSTD_createCDict() :
- *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
+ *  When compressing multiple messages or blocks using the same dictionary,
- *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
 *  ZSTD_createCDict() will create a state from digesting a dictionary.
 *  The resulting state can be used for future compression operations with very limited startup cost.
 *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
- *  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
 *      in which case the only thing that it transports is the @compressionLevel.
 *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
 *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                                         int compressionLevel);
@ -925,7 +945,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
 *           It's a CPU consuming operation, with non-negligible impact on latency.
 *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
 *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
                                 const void* prefix, size_t prefixSize);
@ -969,7 +989,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
 *           Prefix buffer must remain unmodified up to the end of frame,
 *           reached when ZSTD_decompressStream() returns 0.
- *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
 *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
 *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
 *           A full dictionary is more costly, as it requires building tables.
@ -1014,8 +1034,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 * Some of them might be removed in the future (especially when redundant with existing stable functions)
 * ***************************************************************************************/
-#define ZSTD_FRAMEHEADERSIZE_PREFIX 5   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
-#define ZSTD_FRAMEHEADERSIZE_MIN    6
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
 #define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
 #define ZSTD_SKIPPABLEHEADERSIZE    8
@ -1063,6 +1083,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 /* Advanced parameter bounds */
 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
 #define ZSTD_SRCSIZEHINT_MIN        0
 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 /* internal */
 #define ZSTD_HASHLOG3_MAX           17
@ -1072,6 +1094,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
 typedef struct {
    unsigned int matchPos; /* Match pos in dst */
    /* If seqDef.offset > 3, then this is seqDef.offset - 3
     * If seqDef.offset < 3, then this is the corresponding repeat offset
     * But if seqDef.offset < 3 and litLength == 0, this is the
     *   repeat offset before the corresponding repeat offset
     * And if seqDef.offset == 3 and litLength == 0, this is the
     *   most recent repeat offset - 1
     */
    unsigned int offset;
    unsigned int litLength; /* Literal length */
    unsigned int matchLength; /* Match length */
    /* 0 when seq not rep and seqDef.offset otherwise
     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
     */
    unsigned int rep;
 } ZSTD_Sequence;
 typedef struct {
    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
@ -1101,21 +1141,12 @@ typedef enum {
 typedef enum {
    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
-    ZSTD_dlm_byRef = 1,   /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
 } ZSTD_dictLoadMethod_e;
 typedef enum {
    /* Opened question : should we have a format ZSTD_f_auto ?
     * Today, it would mean exactly the same as ZSTD_f_zstd1.
     * But, in the future, should several formats become supported,
     * on the compression side, it would mean "default format".
     * On the decompression side, it would mean "automatic format detection",
     * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
     * Since meaning is a little different, another option could be to define different enums for compression and decompression.
     * This question could be kept for later, when there are actually multiple formats to support,
     * but there is also the question of pinning enum values, and pinning value `0` is especially important */
    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
-    ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
                                 * Useful to save 4 bytes per generated frame.
                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
 } ZSTD_format_e;
@ -1126,7 +1157,7 @@ typedef enum {
     * to evolve and should be considered only in the context of extremely
     * advanced performance tuning.
     *
-     * Zstd currently supports the use of a CDict in two ways:
+     * Zstd currently supports the use of a CDict in three ways:
     *
     * - The contents of the CDict can be copied into the working context. This
     *   means that the compression can search both the dictionary and input
@ -1142,6 +1173,12 @@ typedef enum {
     *   working context's tables can be reused). For small inputs, this can be
     *   faster than copying the CDict's tables.
     *
     * - The CDict's tables are not used at all, and instead we use the working
     *   context alone to reload the dictionary and use params based on the source
     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
     *   This method is effective when the dictionary sizes are very small relative
     *   to the input size, and the input size is fairly large to begin with.
     *
     * Zstd has a simple internal heuristic that selects which strategy to use
     * at the beginning of a compression. However, if experimentation shows that
     * Zstd is making poor choices, it is possible to override that choice with
@ -1150,6 +1187,7 @@ typedef enum {
    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
 } ZSTD_dictAttachPref_e;
 typedef enum {
@ -1158,7 +1196,7 @@ typedef enum {
                               *   levels will be compressed. */
  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
                               *   emitted if Huffman compression is not profitable. */
-  ZSTD_lcm_uncompressed = 2,  /**< Always emit uncompressed literals. */
+  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
 } ZSTD_literalCompressionMode_e;
@ -1210,20 +1248,38 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
 *           or an error code (if srcSize is too small) */
 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 /*! ZSTD_getSequences() :
 * Extract sequences from the sequence store
 * zc can be used to insert custom compression params.
 * This function invokes ZSTD_compress2
 * @return : number of sequences extracted
 */
 ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
    size_t outSeqsSize, const void* src, size_t srcSize);
 /***************************************
 *  Memory management
 ***************************************/
 /*! ZSTD_estimate*() :
- *  These functions make it possible to estimate memory usage
+ *  These functions make it possible to estimate memory usage of a future
- *  of a future {D,C}Ctx, before its creation.
+ *  {D,C}Ctx, before its creation.
- *  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
+ *
- *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  ZSTD_estimateCCtxSize() will provide a budget large enough for any
- *  If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+ *  compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
- *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  this estimate does not include space for a window buffer, so this estimate
- *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  is guaranteed to be enough for single-shot compressions, but not streaming
- *  Note : CCtx size estimation is only correct for single-threaded compression. */
+ *  compressions. It will however assume the input may be arbitrarily large,
 *  which is the worst case. If srcSize is known to always be small,
 *  ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
 *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
 *  ZSTD_getCParams() to create cParams from compressionLevel.
 *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
 *  ZSTD_CCtxParams_setParameter().
 *
 *  Note: only single-threaded compression is supported. This function will
 *  return an error code if ZSTD_c_nbWorkers is >= 1. */
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@ -1334,7 +1390,8 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
 *  Create a digested dictionary for compression
 *  Dictionary content is just referenced, not duplicated.
 *  As a consequence, `dictBuffer` **must** outlive CDict,
- *  and its content must remain unmodified throughout the lifetime of CDict. */
+ *  and its content must remain unmodified throughout the lifetime of CDict.
 *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 /*! ZSTD_getCParams() :
@ -1361,7 +1418,9 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 /*! ZSTD_compress_advanced() :
- *  Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
+ *  Note : this function is now DEPRECATED.
 *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
 *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
 ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                          void* dst, size_t dstCapacity,
                                    const void* src, size_t srcSize,
@ -1369,7 +1428,9 @@ ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                          ZSTD_parameters params);
 /*! ZSTD_compress_usingCDict_advanced() :
- *  Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ *  Note : this function is now REDUNDANT.
 *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
 *  This prototype will be marked as deprecated and generate compilation warning in some future version */
 ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                              void* dst, size_t dstCapacity,
                                        const void* src, size_t srcSize,
@ -1441,6 +1502,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 * There is no guarantee on compressed block size (default:0) */
 #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
 /* User's best guess of source size.
 * Hint is not valid when srcSizeHint == 0.
 * There is no guarantee that hint is close to actual source size,
 * but compression ratio may regress significantly if guess considerably underestimates */
 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
 /*! ZSTD_CCtx_getParameter() :
 *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
 *  and store it into int* value.
@ -1613,8 +1680,13 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
 * pledgedSrcSize must be correct. If it is not known at init time, use
 * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
 * "0" also disables frame content size field. It may be enabled in the future.
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
-ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
 ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                         int compressionLevel,
                         unsigned long long pledgedSrcSize);
 /**! ZSTD_initCStream_usingDict() :
 * This function is deprecated, and is equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@ -1623,42 +1695,66 @@ ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLe
 *
 * Creates of an internal CDict (incompatible with static CCtx), except if
 * dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
 * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t
 ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           int compressionLevel);
 /**! ZSTD_initCStream_advanced() :
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
 *     for ((param, value) : params) {
 *         ZSTD_CCtx_setParameter(zcs, param, value);
 *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
 *
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
- * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
 * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
-ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
+ZSTDLIB_API size_t
-                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                    const void* dict, size_t dictSize,
                          ZSTD_parameters params,
                          unsigned long long pledgedSrcSize);
 /**! ZSTD_initCStream_usingCDict() :
 * This function is deprecated, and equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
 *
 * note : cdict will just be referenced, and must outlive compression session
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 /**! ZSTD_initCStream_usingCDict_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ *   This function is DEPRECATED, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
 *     for ((fParam, value) : fParams) {
 *         ZSTD_CCtx_setParameter(zcs, fParam, value);
 *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
 *
 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
 * pledgedSrcSize must be correct. If srcSize is not known at init time, use
 * value ZSTD_CONTENTSIZE_UNKNOWN.
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
 ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                               const ZSTD_CDict* cdict,
                                     ZSTD_frameParameters fParams,
                                     unsigned long long pledgedSrcSize);
 /*! ZSTD_resetCStream() :
 * This function is deprecated, and is equivalent to:
@ -1673,6 +1769,7 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const
 *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
 *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
 * @return : 0, or an error code (which can be tested using ZSTD_isError())
 *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
@ -1718,8 +1815,10 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
 *
 * note: no dictionary will be used if dict == NULL or dictSize < 8
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 /**
 * This function is deprecated, and is equivalent to:
 *
@ -1727,14 +1826,17 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
 *     ZSTD_DCtx_refDDict(zds, ddict);
 *
 * note : ddict is referenced, it must outlive decompression session
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
 /**
 * This function is deprecated, and is equivalent to:
 *
 *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
 *
 * re-use decompression parameters from previous init; saves dictionary loading
 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
 */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@ -1908,7 +2010,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
 /*!
    Block functions produce and decode raw zstd blocks, without frame metadata.
-    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
    A few rules to respect :