16#include "../common/zstd_internal.h"
42ZSTD_compressSubBlock_literal(
const HUF_CElt* hufTable,
45 void* dst,
size_t dstSize,
46 const int bmi2,
int writeEntropy,
int* entropyWritten)
48 size_t const header = writeEntropy ? 200 : 0;
49 size_t const lhSize = 3 + (litSize >= (1
KB -
header)) + (litSize >= (16
KB -
header));
51 BYTE*
const oend = ostart + dstSize;
52 BYTE* op = ostart + lhSize;
53 U32 const singleStream = lhSize == 3;
57 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
61 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using raw literal");
64 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using rle literal");
88 if (!writeEntropy && cLitSize >= litSize) {
89 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal using raw literal because uncompressible");
93 if (lhSize < (
size_t)(3 + (cLitSize >= 1
KB) + (cLitSize >= 16
KB))) {
94 assert(cLitSize > litSize);
95 DEBUGLOG(5,
"Literals expanded beyond allowed header size");
98 DEBUGLOG(5,
"ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
105 {
U32 const lhc = hType + ((
U32)(!singleStream) << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<14);
110 {
U32 const lhc = hType + (2 << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<18);
115 {
U32 const lhc = hType + (3 << 2) + ((
U32)litSize<<4) + ((
U32)cLitSize<<22);
117 ostart[4] = (
BYTE)(cLitSize >> 10);
124 DEBUGLOG(5,
"Compressed literals: %u -> %u", (
U32)litSize, (
U32)(op-ostart));
125 return (
size_t)(op-ostart);
129ZSTD_seqDecompressedSize(
seqStore_t const* seqStore,
130 const seqDef* sequences,
size_t nbSeqs,
131 size_t litSize,
int lastSubBlock)
133 size_t matchLengthSum = 0;
134 size_t litLengthSum = 0;
136 for (n=0;
n<nbSeqs;
n++) {
141 DEBUGLOG(5,
"ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
142 (
unsigned)nbSeqs, (
const void*)sequences,
143 (
unsigned)litLengthSum, (
unsigned)matchLengthSum);
145 assert(litLengthSum == litSize);
147 assert(litLengthSum <= litSize);
149 return matchLengthSum + litSize;
165 const seqDef* sequences,
size_t nbSeq,
166 const BYTE* llCode,
const BYTE* mlCode,
const BYTE* ofCode,
167 const ZSTD_CCtx_params* cctxParams,
168 void* dst,
size_t dstCapacity,
169 const int bmi2,
int writeEntropy,
int* entropyWritten)
173 BYTE*
const oend = ostart + dstCapacity;
177 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
182 dstSize_tooSmall,
"");
186 op[0] = (
BYTE)((nbSeq>>8) + 0x80), op[1] = (
BYTE)nbSeq, op+=2;
190 return (
size_t)(op - ostart);
196 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (
unsigned)(op-ostart));
203 *seqHead = (
BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
208 *seqHead = (
BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
212 op, (
size_t)(oend - op),
228#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
232 DEBUGLOG(5,
"Avoiding bug in zstd decoder in versions <= 1.3.4 by "
233 "emitting an uncompressed block.");
237 DEBUGLOG(5,
"ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
247#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
248 if (op-seqHead < 4) {
249 DEBUGLOG(5,
"Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
250 "an uncompressed block when sequences are < 4 bytes");
256 return (
size_t)(op - ostart);
265 const seqDef* sequences,
size_t nbSeq,
267 const BYTE* llCode,
const BYTE* mlCode,
const BYTE* ofCode,
268 const ZSTD_CCtx_params* cctxParams,
269 void* dst,
size_t dstCapacity,
271 int writeLitEntropy,
int writeSeqEntropy,
272 int* litEntropyWritten,
int* seqEntropyWritten,
276 BYTE*
const oend = ostart + dstCapacity;
277 BYTE* op = ostart + ZSTD_blockHeaderSize;
278 DEBUGLOG(5,
"ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
279 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
280 {
size_t cLitSize = ZSTD_compressSubBlock_literal((
const HUF_CElt*)entropy->
huf.
CTable,
282 op, (
size_t)(oend-op),
283 bmi2, writeLitEntropy, litEntropyWritten);
285 if (cLitSize == 0)
return 0;
288 {
size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->
fse,
291 llCode, mlCode, ofCode,
293 op, (
size_t)(oend-op),
294 bmi2, writeSeqEntropy, seqEntropyWritten);
296 if (cSeqSize == 0)
return 0;
300 {
size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
304 return (
size_t)(op-ostart);
307static size_t ZSTD_estimateSubBlockSize_literal(
const BYTE*
literals,
size_t litSize,
310 void* workspace,
size_t wkspSize,
313 unsigned*
const countWksp = (
unsigned*)workspace;
314 unsigned maxSymbolValue = 255;
315 size_t literalSectionHeaderSize = 3;
323 if (writeEntropy) cLitSizeEstimate += hufMetadata->
hufDesSize;
324 return cLitSizeEstimate + literalSectionHeaderSize;
331 const BYTE* codeTable,
unsigned maxCode,
333 const U8* additionalBits,
334 short const* defaultNorm,
U32 defaultNormLog,
U32 defaultMax,
335 void* workspace,
size_t wkspSize)
337 unsigned*
const countWksp = (
unsigned*)workspace;
338 const BYTE* ctp = codeTable;
339 const BYTE*
const ctStart = ctp;
340 const BYTE*
const ctEnd = ctStart + nbSeq;
341 size_t cSymbolTypeSizeEstimateInBits = 0;
342 unsigned max = maxCode;
347 assert(max <= defaultMax);
348 cSymbolTypeSizeEstimateInBits =
max <= defaultMax
352 cSymbolTypeSizeEstimateInBits = 0;
354 cSymbolTypeSizeEstimateInBits =
ZSTD_fseBitCost(fseCTable, countWksp, max);
356 if (
ZSTD_isError(cSymbolTypeSizeEstimateInBits))
return nbSeq * 10;
357 while (ctp < ctEnd) {
358 if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
359 else cSymbolTypeSizeEstimateInBits += *ctp;
362 return cSymbolTypeSizeEstimateInBits / 8;
365static size_t ZSTD_estimateSubBlockSize_sequences(
const BYTE* ofCodeTable,
366 const BYTE* llCodeTable,
367 const BYTE* mlCodeTable,
371 void* workspace,
size_t wkspSize,
374 size_t const sequencesSectionHeaderSize = 3;
375 size_t cSeqSizeEstimate = 0;
376 if (nbSeq == 0)
return sequencesSectionHeaderSize;
377 cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->
ofType, ofCodeTable,
MaxOff,
380 workspace, wkspSize);
381 cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->
llType, llCodeTable,
MaxLL,
383 LL_defaultNorm, LL_defaultNormLog,
MaxLL,
384 workspace, wkspSize);
385 cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->
mlType, mlCodeTable,
MaxML,
387 ML_defaultNorm, ML_defaultNormLog,
MaxML,
388 workspace, wkspSize);
389 if (writeEntropy) cSeqSizeEstimate += fseMetadata->
fseTablesSize;
390 return cSeqSizeEstimate + sequencesSectionHeaderSize;
398 const BYTE* ofCodeTable,
399 const BYTE* llCodeTable,
400 const BYTE* mlCodeTable,
404 void* workspace,
size_t wkspSize,
405 int writeLitEntropy,
int writeSeqEntropy)
410 workspace, wkspSize, writeLitEntropy);
411 ebs.
estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
413 workspace, wkspSize, writeSeqEntropy);
429static size_t countLiterals(
seqStore_t const* seqStore,
const seqDef* sp,
size_t seqCount)
433 for (n=0;
n<seqCount;
n++) {
436 DEBUGLOG(6,
"countLiterals for %zu sequences from %p => %zu bytes", seqCount, (
const void*)sp, total);
442static size_t sizeBlockSequences(
const seqDef* sp,
size_t nbSeqs,
443 size_t targetBudget,
size_t avgLitCost,
size_t avgSeqCost,
446 size_t n, budget = 0, inSize=0;
448 size_t const headerSize = (size_t)firstSubBlock * 120 *
BYTESCALE;
449 assert(firstSubBlock==0 || firstSubBlock==1);
450 budget += headerSize;
453 budget += sp[0].
litLength * avgLitCost + avgSeqCost;
454 if (budget > targetBudget)
return 1;
458 for (n=1; n<nbSeqs; n++) {
459 size_t currentCost = sp[n].
litLength * avgLitCost + avgSeqCost;
460 budget += currentCost;
463 if ( (budget > targetBudget)
479static size_t ZSTD_compressSubBlock_multi(
const seqStore_t* seqStorePtr,
483 const ZSTD_CCtx_params* cctxParams,
484 void* dst,
size_t dstCapacity,
485 const void* src,
size_t srcSize,
486 const int bmi2,
U32 lastBlock,
487 void* workspace,
size_t wkspSize)
491 const seqDef* sp = sstart;
492 size_t const nbSeqs = (size_t)(send - sstart);
494 const BYTE*
const lend = seqStorePtr->
lit;
495 const BYTE* lp = lstart;
496 size_t const nbLiterals = (size_t)(lend - lstart);
498 BYTE const*
const iend = ip + srcSize;
500 BYTE*
const oend = ostart + dstCapacity;
505 size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN;
506 size_t const targetCBlockSize =
MAX(minTarget, cctxParams->targetCBlockSize);
508 int writeSeqEntropy = 1;
510 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
511 (
unsigned)srcSize, (
unsigned)(lend-lstart), (
unsigned)(send-sstart));
516 ZSTD_estimateSubBlockSize(lp, nbLiterals,
517 ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
518 &nextCBlock->
entropy, entropyMetadata,
520 writeLitEntropy, writeSeqEntropy);
524 const size_t nbSubBlocks =
MAX((ebs.
estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
525 size_t n, avgBlockBudget, blockBudgetSupp=0;
527 DEBUGLOG(5,
"estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
529 (
unsigned)targetCBlockSize, (
unsigned)nbSubBlocks, (
double)avgBlockBudget/
BYTESCALE);
536 for (n=0;
n < nbSubBlocks-1;
n++) {
538 size_t const seqCount = sizeBlockSequences(sp, (
size_t)(send-sp),
539 avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
541 assert(seqCount <= (
size_t)(send-sp));
542 if (sp + seqCount == send)
break;
545 {
int litEntropyWritten = 0;
546 int seqEntropyWritten = 0;
547 size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
548 const size_t decompressedSize =
549 ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
550 size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->
entropy, entropyMetadata,
553 llCodePtr, mlCodePtr, ofCodePtr,
555 op, (
size_t)(oend-op),
556 bmi2, writeLitEntropy, writeSeqEntropy,
557 &litEntropyWritten, &seqEntropyWritten,
562 if (cSize > 0 && cSize < decompressedSize) {
563 DEBUGLOG(5,
"Committed sub-block compressing %u bytes => %u bytes",
564 (
unsigned)decompressedSize, (
unsigned)cSize);
565 assert(ip + decompressedSize <= iend);
566 ip += decompressedSize;
569 llCodePtr += seqCount;
570 mlCodePtr += seqCount;
571 ofCodePtr += seqCount;
573 if (litEntropyWritten) {
576 if (seqEntropyWritten) {
587 DEBUGLOG(5,
"Generate last sub-block: %u sequences remaining", (
unsigned)(send - sp));
588 {
int litEntropyWritten = 0;
589 int seqEntropyWritten = 0;
590 size_t litSize = (size_t)(lend - lp);
591 size_t seqCount = (size_t)(send - sp);
592 const size_t decompressedSize =
593 ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
594 size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->
entropy, entropyMetadata,
597 llCodePtr, mlCodePtr, ofCodePtr,
599 op, (
size_t)(oend-op),
600 bmi2, writeLitEntropy, writeSeqEntropy,
601 &litEntropyWritten, &seqEntropyWritten,
606 if (cSize > 0 && cSize < decompressedSize) {
607 DEBUGLOG(5,
"Last sub-block compressed %u bytes => %u bytes",
608 (
unsigned)decompressedSize, (
unsigned)cSize);
609 assert(ip + decompressedSize <= iend);
610 ip += decompressedSize;
613 llCodePtr += seqCount;
614 mlCodePtr += seqCount;
615 ofCodePtr += seqCount;
617 if (litEntropyWritten) {
620 if (seqEntropyWritten) {
628 if (writeLitEntropy) {
629 DEBUGLOG(5,
"Literal entropy tables were never written");
632 if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->
fseMetadata)) {
636 DEBUGLOG(5,
"Sequence entropy tables were never written => cancel, emit an uncompressed block");
642 size_t const rSize = (size_t)((iend - ip));
644 DEBUGLOG(5,
"Generate last uncompressed sub-block of %u bytes", (
unsigned)(rSize));
653 for (seq = sstart; seq < sp; ++seq) {
660 DEBUGLOG(5,
"ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
661 (
unsigned)(op-ostart));
662 return (
size_t)(op-ostart);
666 void* dst,
size_t dstCapacity,
667 const void* src,
size_t srcSize,
679 return ZSTD_compressSubBlock_multi(&zc->
seqStore,
#define STREAM_ACCUMULATOR_MIN
#define assert(condition)
#define FORWARD_IF_ERROR(err,...)
ERR_STATIC unsigned ERR_isError(size_t code)
#define RETURN_ERROR_IF(cond, err,...)
size_t HIST_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, void *workSpace, size_t workSpaceSize)
size_t HIST_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, void *workSpace, size_t workSpaceSize)
size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable, int flags)
size_t HUF_estimateCompressedSize(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable, int flags)
MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
constexpr dcon::demographics_key total(0)
void send(sys::state &state, element_base *parent, T value)
ZSTD_CCtx_params appliedParams
ZSTD_blockState_t blockState
ZSTD_compressedBlockState_t * prevCBlock
ZSTD_compressedBlockState_t * nextCBlock
ZSTD_entropyCTables_t entropy
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]
const char * ZSTD_getErrorName(size_t code)
size_t ZSTD_buildBlockEntropyStats(const seqStore_t *seqStorePtr, const ZSTD_entropyCTables_t *prevEntropy, ZSTD_entropyCTables_t *nextEntropy, const ZSTD_CCtx_params *cctxParams, ZSTD_entropyCTablesMetadata_t *entropyMetadata, void *workspace, size_t wkspSize)
MEM_STATIC size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastBlock)
MEM_STATIC void ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
#define ENTROPY_WORKSPACE_SIZE
size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
size_t ZSTD_crossEntropyCost(short const *norm, unsigned accuracyLog, unsigned const *count, unsigned const max)
size_t ZSTD_fseBitCost(FSE_CTable const *ctable, unsigned const *count, unsigned const max)
size_t ZSTD_encodeSequences(void *dst, size_t dstCapacity, FSE_CTable const *CTable_MatchLength, BYTE const *mlCodeTable, FSE_CTable const *CTable_OffsetBits, BYTE const *ofCodeTable, FSE_CTable const *CTable_LitLength, BYTE const *llCodeTable, seqDef const *sequences, size_t nbSeq, int longOffsets, int bmi2)
size_t ZSTD_compressSuperBlock(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize, unsigned lastBlock)
#define ZSTD_memcpy(d, s, l)
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const *seqStore, seqDef const *seq)