18#include "../common/zstd_deps.h"
19#include "../common/compiler.h"
20#include "../common/bitstream.h"
21#include "../common/fse.h"
22#include "../common/huf.h"
23#include "../common/error_private.h"
24#include "../common/zstd_internal.h"
25#include "../common/bits.h"
31#define HUF_DECODER_FAST_TABLELOG 11
37#ifdef HUF_DISABLE_FAST_DECODE
38# define HUF_ENABLE_FAST_DECODE 0
40# define HUF_ENABLE_FAST_DECODE 1
47#if defined(HUF_FORCE_DECOMPRESS_X1) && \
48 defined(HUF_FORCE_DECOMPRESS_X2)
49#error "Cannot force the use of the X1 and X2 decoders at the same time!"
57# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
59# define HUF_FAST_BMI2_ATTRS
63# define HUF_EXTERN_C extern "C"
67#define HUF_ASM_DECL HUF_EXTERN_C
70# define HUF_NEED_BMI2_FUNCTION 1
72# define HUF_NEED_BMI2_FUNCTION 0
78#define HUF_isError ERR_isError
84#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
85#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
100 static size_t fn##_default( \
101 void* dst, size_t dstSize, \
102 const void* cSrc, size_t cSrcSize, \
103 const HUF_DTable* DTable) \
105 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
108 static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
109 void* dst, size_t dstSize, \
110 const void* cSrc, size_t cSrcSize, \
111 const HUF_DTable* DTable) \
113 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
116 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
117 size_t cSrcSize, HUF_DTable const* DTable, int flags) \
119 if (flags & HUF_flags_bmi2) { \
120 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
122 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
127#define HUF_DGEN(fn) \
128 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
129 size_t cSrcSize, HUF_DTable const* DTable, int flags) \
132 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
150static size_t HUF_initFastDStream(
BYTE const* ip) {
151 BYTE const lastByte = ip[7];
152 size_t const bitsConsumed = lastByte ? 8 -
ZSTD_highbit32(lastByte) : 0;
154 assert(bitsConsumed <= 8);
155 assert(
sizeof(
size_t) == 8);
156 return value << bitsConsumed;
191static size_t HUF_DecompressFastArgs_init(
HUF_DecompressFastArgs* args,
void* dst,
size_t dstSize,
void const* src,
size_t srcSize,
const HUF_DTable* DTable)
193 void const* dt = DTable + 1;
194 U32 const dtLog = HUF_getDTableDesc(DTable).
tableLog;
196 const BYTE*
const istart = (
const BYTE*)src;
213 return ERROR(corruption_detected);
227 size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
228 args->
iend[0] = istart + 6;
229 args->
iend[1] = args->
iend[0] + length1;
230 args->
iend[2] = args->
iend[1] + length2;
231 args->
iend[3] = args->
iend[2] + length3;
236 if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
238 if (length4 > srcSize)
return ERROR(corruption_detected);
241 args->
ip[0] = args->
iend[1] -
sizeof(
U64);
242 args->
ip[1] = args->
iend[2] -
sizeof(
U64);
243 args->
ip[2] = args->
iend[3] -
sizeof(
U64);
244 args->
ip[3] = (
BYTE const*)src + srcSize -
sizeof(
U64);
248 args->
op[1] = args->
op[0] + (dstSize+3)/4;
249 args->
op[2] = args->
op[1] + (dstSize+3)/4;
250 args->
op[3] = args->
op[2] + (dstSize+3)/4;
253 if (args->
op[3] >= oend)
263 args->
bits[0] = HUF_initFastDStream(args->
ip[0]);
264 args->
bits[1] = HUF_initFastDStream(args->
ip[1]);
265 args->
bits[2] = HUF_initFastDStream(args->
ip[2]);
266 args->
bits[3] = HUF_initFastDStream(args->
ip[3]);
284 if (args->
op[stream] > segmentEnd)
285 return ERROR(corruption_detected);
291 if (args->
ip[stream] < args->
iend[stream] - 8)
292 return ERROR(corruption_detected);
295 assert(
sizeof(
size_t) == 8);
300 bit->
ptr = (
const char*)args->
ip[stream];
306#define HUF_4X_FOR_EACH_STREAM(X) \
315#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
324#ifndef HUF_FORCE_DECOMPRESS_X2
335static U64 HUF_DEltX1_set4(
BYTE symbol,
BYTE nbBits) {
338 D4 = (
U64)((symbol << 8) + nbBits);
340 D4 = (
U64)(symbol + (nbBits << 8));
343 D4 *= 0x0001000100010001ULL;
352static U32 HUF_rescaleStats(
BYTE* huffWeight,
U32* rankVal,
U32 nbSymbols,
U32 tableLog,
U32 targetTableLog)
354 if (tableLog > targetTableLog)
356 if (tableLog < targetTableLog) {
357 U32 const scale = targetTableLog - tableLog;
360 for (s = 0; s < nbSymbols; ++s) {
361 huffWeight[s] += (
BYTE)((huffWeight[s] == 0) ? 0 : scale);
367 for (s = targetTableLog; s > scale; --s) {
368 rankVal[s] = rankVal[s - scale];
370 for (s = scale; s > 0; --s) {
374 return targetTableLog;
390 void*
const dtPtr = DTable + 1;
395 if (
sizeof(*wksp) > wkspSize)
return ERROR(tableLog_tooLarge);
408 tableLog = HUF_rescaleStats(wksp->
huffWeight, wksp->
rankVal, nbSymbols, tableLog, targetTableLog);
428 U32 nextRankStart = 0;
429 int const unroll = 4;
430 int const nLimit = (int)nbSymbols - unroll + 1;
431 for (n=0; n<(int)tableLog+1; n++) {
432 U32 const curr = nextRankStart;
433 nextRankStart += wksp->
rankVal[n];
436 for (n=0; n < nLimit; n += unroll) {
438 for (u=0; u < unroll; ++u) {
443 for (; n < (int)nbSymbols; ++n) {
458 for (w=1; w<tableLog+1; ++w) {
459 int const symbolCount = wksp->
rankVal[w];
460 int const length = (1 << w) >> 1;
461 int uStart = rankStart;
462 BYTE const nbBits = (
BYTE)(tableLog + 1 - w);
467 for (s=0; s<symbolCount; ++s) {
476 for (s=0; s<symbolCount; ++s) {
486 for (s=0; s<symbolCount; ++s) {
487 U64 const D4 = HUF_DEltX1_set4(wksp->
symbols[symbol + s], nbBits);
493 for (s=0; s<symbolCount; ++s) {
494 U64 const D4 = HUF_DEltX1_set4(wksp->
symbols[symbol + s], nbBits);
501 for (s=0; s<symbolCount; ++s) {
502 U64 const D4 = HUF_DEltX1_set4(wksp->
symbols[symbol + s], nbBits);
503 for (u=0; u < length; u += 16) {
514 symbol += symbolCount;
515 rankStart += symbolCount * length;
530#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
531 do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
533#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
535 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
536 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
539#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
542 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
548 BYTE*
const pStart = p;
551 if ((pEnd - p) > 3) {
571 return (
size_t)(pEnd-pStart);
576 void* dst,
size_t dstSize,
577 const void* cSrc,
size_t cSrcSize,
582 const void* dtPtr = DTable + 1;
585 DTableDesc const dtd = HUF_getDTableDesc(DTable);
603 void* dst,
size_t dstSize,
604 const void* cSrc,
size_t cSrcSize,
608 if (cSrcSize < 10)
return ERROR(corruption_detected);
609 if (dstSize < 6)
return ERROR(corruption_detected);
611 {
const BYTE*
const istart = (
const BYTE*) cSrc;
613 BYTE*
const oend = ostart + dstSize;
614 BYTE*
const olimit = oend - 3;
615 const void*
const dtPtr = DTable + 1;
626 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
627 const BYTE*
const istart1 = istart + 6;
628 const BYTE*
const istart2 = istart1 + length1;
629 const BYTE*
const istart3 = istart2 + length2;
630 const BYTE*
const istart4 = istart3 + length3;
631 const size_t segmentSize = (dstSize+3) / 4;
632 BYTE*
const opStart2 = ostart + segmentSize;
633 BYTE*
const opStart3 = opStart2 + segmentSize;
634 BYTE*
const opStart4 = opStart3 + segmentSize;
636 BYTE* op2 = opStart2;
637 BYTE* op3 = opStart3;
638 BYTE* op4 = opStart4;
639 DTableDesc const dtd = HUF_getDTableDesc(DTable);
643 if (length4 > cSrcSize)
return ERROR(corruption_detected);
644 if (opStart4 > oend)
return ERROR(corruption_detected);
652 if ((
size_t)(oend - op4) >=
sizeof(
size_t)) {
653 for ( ; (endSignal) & (op4 < olimit) ; ) {
680 if (op1 > opStart2)
return ERROR(corruption_detected);
681 if (op2 > opStart3)
return ERROR(corruption_detected);
682 if (op3 > opStart4)
return ERROR(corruption_detected);
693 if (!endCheck)
return ERROR(corruption_detected); }
700#if HUF_NEED_BMI2_FUNCTION
702size_t HUF_decompress4X1_usingDTable_internal_bmi2(
void* dst,
size_t dstSize,
void const* cSrc,
709size_t HUF_decompress4X1_usingDTable_internal_default(
void* dst,
size_t dstSize,
void const* cSrc,
714#if ZSTD_ENABLE_ASM_X86_64_BMI2
726 U16 const*
const dtable = (
U16 const*)args->
dt;
744 for (stream = 0; stream < 4; ++stream) {
745 assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
746 assert(ip[stream] >= ilowest);
752 size_t const oiters = (size_t)(oend - op[3]) / 5;
756 size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
758 size_t const iters =
MIN(oiters, iiters);
759 size_t const symbols = iters * 5;
765 olimit = op[3] + symbols;
775 for (stream = 1; stream < 4; ++stream) {
776 if (ip[stream] < ip[stream - 1])
782 for (stream = 1; stream < 4; ++stream) {
783 assert(ip[stream] >= ip[stream - 1]);
787#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
789 int const index = (int)(bits[(_stream)] >> 53); \
790 int const entry = (int)dtable[index]; \
791 bits[(_stream)] <<= (entry & 0x3F); \
792 op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
795#define HUF_4X1_RELOAD_STREAM(_stream) \
797 int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
798 int const nbBits = ctz & 7; \
799 int const nbBytes = ctz >> 3; \
800 op[(_stream)] += 5; \
801 ip[(_stream)] -= nbBytes; \
802 bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
803 bits[(_stream)] <<= nbBits; \
819 }
while (op[3] < olimit);
821#undef HUF_4X1_DECODE_SYMBOL
822#undef HUF_4X1_RELOAD_STREAM
840HUF_decompress4X1_usingDTable_internal_fast(
841 void* dst,
size_t dstSize,
842 const void* cSrc,
size_t cSrcSize,
846 void const* dt = DTable + 1;
847 BYTE const*
const ilowest = (
BYTE const*)cSrc;
850 {
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
874 {
size_t const segmentSize = (dstSize+3) / 4;
877 for (i = 0; i < 4; ++i) {
879 if (segmentSize <= (
size_t)(oend - segmentEnd))
880 segmentEnd += segmentSize;
883 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd),
"corruption");
886 if (args.
op[i] != segmentEnd)
return ERROR(corruption_detected);
895HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
897static size_t HUF_decompress4X1_usingDTable_internal(
void* dst,
size_t dstSize,
void const* cSrc,
898 size_t cSrcSize,
HUF_DTable const* DTable,
int flags)
905 fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
906# if ZSTD_ENABLE_ASM_X86_64_BMI2
908 loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
912 return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
916#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
918 loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
923 size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
927 return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
930static size_t HUF_decompress4X1_DCtx_wksp(
HUF_DTable* dctx,
void* dst,
size_t dstSize,
931 const void* cSrc,
size_t cSrcSize,
932 void* workSpace,
size_t wkspSize,
int flags)
934 const BYTE* ip = (
const BYTE*) cSrc;
938 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
939 ip += hSize; cSrcSize -= hSize;
941 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
947#ifndef HUF_FORCE_DECOMPRESS_X1
961static U32 HUF_buildDEltX2U32(
U32 symbol,
U32 nbBits,
U32 baseSeq,
int level)
969 seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
970 return seq + (nbBits << 16) + ((
U32)level << 24);
972 seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
973 return (seq << 16) + (nbBits << 8) + (
U32)level;
983 U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
992static U64 HUF_buildDEltX2U64(
U32 symbol,
U32 nbBits,
U16 baseSeq,
int level)
994 U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
995 return (
U64)DElt + ((
U64)DElt << 32);
1010static void HUF_fillDTableX2ForWeight(
1013 U32 nbBits,
U32 tableLog,
1014 U16 baseSeq,
int const level)
1016 U32 const length = 1U << ((tableLog - nbBits) & 0x1F );
1018 assert(level >= 1 && level <= 2);
1021 for (ptr = begin; ptr != end; ++ptr) {
1022 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->
symbol, nbBits, baseSeq, level);
1023 *DTableRank++ = DElt;
1027 for (ptr = begin; ptr != end; ++ptr) {
1028 HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->
symbol, nbBits, baseSeq, level);
1029 DTableRank[0] = DElt;
1030 DTableRank[1] = DElt;
1035 for (ptr = begin; ptr != end; ++ptr) {
1036 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->
symbol, nbBits, baseSeq, level);
1037 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
1038 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
1043 for (ptr = begin; ptr != end; ++ptr) {
1044 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->
symbol, nbBits, baseSeq, level);
1045 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
1046 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
1047 ZSTD_memcpy(DTableRank + 4, &DEltX2,
sizeof(DEltX2));
1048 ZSTD_memcpy(DTableRank + 6, &DEltX2,
sizeof(DEltX2));
1053 for (ptr = begin; ptr != end; ++ptr) {
1054 U64 const DEltX2 = HUF_buildDEltX2U64(ptr->
symbol, nbBits, baseSeq, level);
1055 HUF_DEltX2*
const DTableRankEnd = DTableRank + length;
1056 for (; DTableRank != DTableRankEnd; DTableRank += 8) {
1057 ZSTD_memcpy(DTableRank + 0, &DEltX2,
sizeof(DEltX2));
1058 ZSTD_memcpy(DTableRank + 2, &DEltX2,
sizeof(DEltX2));
1059 ZSTD_memcpy(DTableRank + 4, &DEltX2,
sizeof(DEltX2));
1060 ZSTD_memcpy(DTableRank + 6, &DEltX2,
sizeof(DEltX2));
1069static void HUF_fillDTableX2Level2(
HUF_DEltX2* DTable,
U32 targetLog,
const U32 consumedBits,
1070 const U32* rankVal,
const int minWeight,
const int maxWeight1,
1072 U32 nbBitsBaseline,
U16 baseSeq)
1079 U32 const length = 1U << ((targetLog - consumedBits) & 0x1F );
1080 U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, 0, 1);
1081 int const skipSize = rankVal[minWeight];
1097 for (i = 0; i < skipSize; i += 8) {
1098 ZSTD_memcpy(DTable + i + 0, &DEltX2,
sizeof(DEltX2));
1099 ZSTD_memcpy(DTable + i + 2, &DEltX2,
sizeof(DEltX2));
1100 ZSTD_memcpy(DTable + i + 4, &DEltX2,
sizeof(DEltX2));
1101 ZSTD_memcpy(DTable + i + 6, &DEltX2,
sizeof(DEltX2));
1110 for (w = minWeight; w < maxWeight1; ++w) {
1111 int const begin = rankStart[w];
1112 int const end = rankStart[w+1];
1113 U32 const nbBits = nbBitsBaseline - w;
1114 U32 const totalBits = nbBits + consumedBits;
1115 HUF_fillDTableX2ForWeight(
1116 DTable + rankVal[w],
1117 sortedSymbols + begin, sortedSymbols + end,
1118 totalBits, targetLog,
1124static void HUF_fillDTableX2(
HUF_DEltX2* DTable,
const U32 targetLog,
1127 const U32 nbBitsBaseline)
1129 U32*
const rankVal = rankValOrigin[0];
1130 const int scaleLog = nbBitsBaseline - targetLog;
1131 const U32 minBits = nbBitsBaseline - maxWeight;
1133 int const wEnd = (int)maxWeight + 1;
1136 for (w = 1; w < wEnd; ++w) {
1137 int const begin = (int)rankStart[w];
1138 int const end = (int)rankStart[w+1];
1139 U32 const nbBits = nbBitsBaseline - w;
1141 if (targetLog-nbBits >= minBits) {
1143 int start = rankVal[w];
1144 U32 const length = 1U << ((targetLog - nbBits) & 0x1F );
1145 int minWeight = nbBits + scaleLog;
1147 if (minWeight < 1) minWeight = 1;
1151 for (s = begin; s != end; ++s) {
1152 HUF_fillDTableX2Level2(
1153 DTable + start, targetLog, nbBits,
1154 rankValOrigin[nbBits], minWeight, wEnd,
1155 sortedList, rankStart,
1156 nbBitsBaseline, sortedList[s].symbol);
1161 HUF_fillDTableX2ForWeight(
1162 DTable + rankVal[w],
1163 sortedList + begin, sortedList + end,
1180 const void* src,
size_t srcSize,
1181 void* workSpace,
size_t wkspSize,
int flags)
1183 U32 tableLog, maxW, nbSymbols;
1187 void* dtPtr = DTable+1;
1193 if (
sizeof(*wksp) > wkspSize)
return ERROR(GENERIC);
1207 if (tableLog > maxTableLog)
return ERROR(tableLog_tooLarge);
1211 for (maxW = tableLog; wksp->
rankStats[maxW]==0; maxW--) {}
1214 {
U32 w, nextRankStart = 0;
1215 for (w=1; w<maxW+1; w++) {
1216 U32 curr = nextRankStart;
1218 rankStart[w] = curr;
1220 rankStart[0] = nextRankStart;
1221 rankStart[maxW+1] = nextRankStart;
1226 for (s=0; s<nbSymbols; s++) {
1228 U32 const r = rankStart[w]++;
1236 {
int const rescale = (maxTableLog-tableLog) - 1;
1237 U32 nextRankVal = 0;
1239 for (w=1; w<maxW+1; w++) {
1240 U32 curr = nextRankVal;
1241 nextRankVal += wksp->
rankStats[w] << (w+rescale);
1244 {
U32 const minBits = tableLog+1 - maxW;
1246 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1247 U32*
const rankValPtr = wksp->
rankVal[consumed];
1249 for (w = 1; w < maxW+1; w++) {
1250 rankValPtr[w] = rankVal0[w] >> consumed;
1253 HUF_fillDTableX2(dt, maxTableLog,
1279 if (dt[val].length==1) {
1292#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
1293 do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
1295#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
1297 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
1298 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
1301#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
1304 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
1311 BYTE*
const pStart = p;
1314 if ((
size_t)(pEnd - p) >=
sizeof(bitDPtr->
bitContainer)) {
1338 if ((
size_t)(pEnd - p) >= 2) {
1354 void* dst,
size_t dstSize,
1355 const void* cSrc,
size_t cSrcSize,
1364 {
BYTE*
const ostart = (
BYTE*) dst;
1366 const void*
const dtPtr = DTable+1;
1368 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1385 void* dst,
size_t dstSize,
1386 const void* cSrc,
size_t cSrcSize,
1389 if (cSrcSize < 10)
return ERROR(corruption_detected);
1390 if (dstSize < 6)
return ERROR(corruption_detected);
1392 {
const BYTE*
const istart = (
const BYTE*) cSrc;
1394 BYTE*
const oend = ostart + dstSize;
1395 BYTE*
const olimit = oend - (
sizeof(size_t)-1);
1396 const void*
const dtPtr = DTable+1;
1407 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
1408 const BYTE*
const istart1 = istart + 6;
1409 const BYTE*
const istart2 = istart1 + length1;
1410 const BYTE*
const istart3 = istart2 + length2;
1411 const BYTE*
const istart4 = istart3 + length3;
1412 size_t const segmentSize = (dstSize+3) / 4;
1413 BYTE*
const opStart2 = ostart + segmentSize;
1414 BYTE*
const opStart3 = opStart2 + segmentSize;
1415 BYTE*
const opStart4 = opStart3 + segmentSize;
1417 BYTE* op2 = opStart2;
1418 BYTE* op3 = opStart3;
1419 BYTE* op4 = opStart4;
1421 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1424 if (length4 > cSrcSize)
return ERROR(corruption_detected);
1425 if (opStart4 > oend)
return ERROR(corruption_detected);
1433 if ((
size_t)(oend - op4) >=
sizeof(
size_t)) {
1434 for ( ; (endSignal) & (op4 < olimit); ) {
1435#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
1483 if (op1 > opStart2)
return ERROR(corruption_detected);
1484 if (op2 > opStart3)
return ERROR(corruption_detected);
1485 if (op3 > opStart4)
return ERROR(corruption_detected);
1496 if (!endCheck)
return ERROR(corruption_detected); }
1503#if HUF_NEED_BMI2_FUNCTION
1505size_t HUF_decompress4X2_usingDTable_internal_bmi2(
void* dst,
size_t dstSize,
void const* cSrc,
1512size_t HUF_decompress4X2_usingDTable_internal_default(
void* dst,
size_t dstSize,
void const* cSrc,
1517#if ZSTD_ENABLE_ASM_X86_64_BMI2
1541 oend[3] = args->
oend;
1552 for (stream = 0; stream < 4; ++stream) {
1553 assert(op[stream] <= oend[stream]);
1554 assert(ip[stream] >= ilowest);
1567 size_t iters = (size_t)(ip[0] - ilowest) / 7;
1572 for (stream = 0; stream < 4; ++stream) {
1573 size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
1574 iters =
MIN(iters, oiters);
1583 olimit = op[3] + (iters * 5);
1586 if (op[3] == olimit)
1593 for (stream = 1; stream < 4; ++stream) {
1594 if (ip[stream] < ip[stream - 1])
1600 for (stream = 1; stream < 4; ++stream) {
1601 assert(ip[stream] >= ip[stream - 1]);
1605#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
1607 if ((_decode3) || (_stream) != 3) { \
1608 int const index = (int)(bits[(_stream)] >> 53); \
1609 HUF_DEltX2 const entry = dtable[index]; \
1610 MEM_write16(op[(_stream)], entry.sequence); \
1611 bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
1612 op[(_stream)] += (entry.length); \
1616#define HUF_4X2_RELOAD_STREAM(_stream) \
1618 HUF_4X2_DECODE_SYMBOL(3, 1); \
1620 int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
1621 int const nbBits = ctz & 7; \
1622 int const nbBytes = ctz >> 3; \
1623 ip[(_stream)] -= nbBytes; \
1624 bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
1625 bits[(_stream)] <<= nbBits; \
1651 }
while (op[3] < olimit);
1654#undef HUF_4X2_DECODE_SYMBOL
1655#undef HUF_4X2_RELOAD_STREAM
1667HUF_decompress4X2_usingDTable_internal_fast(
1668 void* dst,
size_t dstSize,
1669 const void* cSrc,
size_t cSrcSize,
1672 void const* dt = DTable + 1;
1673 const BYTE*
const ilowest = (
const BYTE*)cSrc;
1677 size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1699 size_t const segmentSize = (dstSize+3) / 4;
1702 for (i = 0; i < 4; ++i) {
1704 if (segmentSize <= (
size_t)(oend - segmentEnd))
1705 segmentEnd += segmentSize;
1708 FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd),
"corruption");
1710 if (args.
op[i] != segmentEnd)
1711 return ERROR(corruption_detected);
1719static size_t HUF_decompress4X2_usingDTable_internal(
void* dst,
size_t dstSize,
void const* cSrc,
1720 size_t cSrcSize,
HUF_DTable const* DTable,
int flags)
1727 fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
1728# if ZSTD_ENABLE_ASM_X86_64_BMI2
1730 loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
1734 return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
1738#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1740 loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
1745 size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
1749 return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
1752HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1755 const
void* cSrc,
size_t cSrcSize,
1756 void* workSpace,
size_t wkspSize,
int flags)
1758 const BYTE* ip = (
const BYTE*) cSrc;
1761 workSpace, wkspSize, flags);
1763 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
1764 ip += hSize; cSrcSize -= hSize;
1766 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
1769static size_t HUF_decompress4X2_DCtx_wksp(
HUF_DTable* dctx,
void* dst,
size_t dstSize,
1770 const void* cSrc,
size_t cSrcSize,
1771 void* workSpace,
size_t wkspSize,
int flags)
1773 const BYTE* ip = (
const BYTE*) cSrc;
1776 workSpace, wkspSize, flags);
1778 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
1779 ip += hSize; cSrcSize -= hSize;
1781 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
1792#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1799 {{ 150,216}, { 381,119}},
1800 {{ 170,205}, { 514,112}},
1801 {{ 177,199}, { 539,110}},
1802 {{ 197,194}, { 644,107}},
1803 {{ 221,192}, { 735,107}},
1804 {{ 256,189}, { 881,106}},
1805 {{ 359,188}, {1167,109}},
1806 {{ 582,187}, {1570,114}},
1807 {{ 688,187}, {1712,122}},
1808 {{ 825,186}, {1965,136}},
1809 {{ 976,185}, {2131,150}},
1810 {{1180,186}, {2070,175}},
1811 {{1377,185}, {1731,202}},
1812 {{1412,185}, {1695,202}},
1824 assert(dstSize <= 128*1024);
1825#if defined(HUF_FORCE_DECOMPRESS_X1)
1829#elif defined(HUF_FORCE_DECOMPRESS_X2)
1835 {
U32 const Q = (cSrcSize >= dstSize) ? 15 : (
U32)(cSrcSize * 16 / dstSize);
1836 U32 const D256 = (
U32)(dstSize >> 8);
1839 DTime1 += DTime1 >> 5;
1840 return DTime1 < DTime0;
1846 const void* cSrc,
size_t cSrcSize,
1847 void* workSpace,
size_t wkspSize,
int flags)
1850 if (dstSize == 0)
return ERROR(dstSize_tooSmall);
1851 if (cSrcSize > dstSize)
return ERROR(corruption_detected);
1852 if (cSrcSize == dstSize) {
ZSTD_memcpy(dst, cSrc, dstSize);
return dstSize; }
1853 if (cSrcSize == 1) {
ZSTD_memset(dst, *(
const BYTE*)cSrc, dstSize);
return dstSize; }
1856#if defined(HUF_FORCE_DECOMPRESS_X1)
1860 cSrcSize, workSpace, wkspSize, flags);
1861#elif defined(HUF_FORCE_DECOMPRESS_X2)
1865 cSrcSize, workSpace, wkspSize, flags);
1868 cSrcSize, workSpace, wkspSize, flags):
1870 cSrcSize, workSpace, wkspSize, flags);
1878 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1879#if defined(HUF_FORCE_DECOMPRESS_X1)
1882 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1883#elif defined(HUF_FORCE_DECOMPRESS_X2)
1886 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1888 return dtd.
tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
1889 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1893#ifndef HUF_FORCE_DECOMPRESS_X2
1896 const BYTE* ip = (
const BYTE*) cSrc;
1900 if (hSize >= cSrcSize)
return ERROR(srcSize_wrong);
1901 ip += hSize; cSrcSize -= hSize;
1903 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
1909 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1910#if defined(HUF_FORCE_DECOMPRESS_X1)
1913 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1914#elif defined(HUF_FORCE_DECOMPRESS_X2)
1917 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1919 return dtd.
tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
1920 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1927 if (dstSize == 0)
return ERROR(dstSize_tooSmall);
1928 if (cSrcSize == 0)
return ERROR(corruption_detected);
1931#if defined(HUF_FORCE_DECOMPRESS_X1)
1934 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
1935#elif defined(HUF_FORCE_DECOMPRESS_X2)
1938 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
1940 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
1941 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
MEM_STATIC unsigned ZSTD_highbit32(U32 val)
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize)
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits)
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD)
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD)
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits)
MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t *bitD)
MEM_STATIC unsigned char * ZSTD_maybeNullPtrAdd(unsigned char *ptr, ptrdiff_t add)
#define BMI2_TARGET_ATTRIBUTE
#define FORCE_INLINE_TEMPLATE
#define DEBUG_STATIC_ASSERT(c)
#define assert(condition)
size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workSpace, size_t wkspSize, int flags)
#define FORWARD_IF_ERROR(err,...)
#define HUF_SYMBOLVALUE_MAX
#define HUF_READ_STATS_WORKSPACE_SIZE_U32
#define HUF_TABLELOG_ABSOLUTEMAX
#define HUF_DECOMPRESS_WORKSPACE_SIZE
void(* HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs *)
U32 rankValCol_t[HUF_TABLELOG_MAX+1]
HINT_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var)
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)
size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workSpace, size_t wkspSize, int flags)
HINT_INLINE size_t HUF_decodeStreamX1(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX1 *const dt, const U32 dtLog)
#define HUF_DECODER_FAST_TABLELOG
rankValCol_t rankVal_t[HUF_TABLELOG_MAX]
U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
FORCE_INLINE_TEMPLATE BYTE HUF_decodeSymbolX1(BIT_DStream_t *Dstream, const HUF_DEltX1 *dt, const U32 dtLog)
FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workSpace, size_t wkspSize, int flags)
FORCE_INLINE_TEMPLATE size_t HUF_decompress1X2_usingDTable_internal_body(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)
#define HUF_4X1_RELOAD_STREAM(_stream)
#define HUF_ENABLE_FAST_DECODE
size_t(* HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
FORCE_INLINE_TEMPLATE size_t HUF_decompress4X1_usingDTable_internal_body(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workSpace, size_t wkspSize, int flags)
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
FORCE_INLINE_TEMPLATE size_t HUF_decompress1X1_usingDTable_internal_body(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)
size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable, int flags)
FORCE_INLINE_TEMPLATE U32 HUF_decodeSymbolX2(void *op, BIT_DStream_t *DStream, const HUF_DEltX2 *dt, const U32 dtLog)
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workSpace, size_t wkspSize, int flags)
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)
size_t HUF_readDTableX1_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workSpace, size_t wkspSize, int flags)
size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable, int flags)
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workSpace, size_t wkspSize, int flags)
FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void *op, BIT_DStream_t *DStream, const HUF_DEltX2 *dt, const U32 dtLog)
#define HUF_4X_FOR_EACH_STREAM(X)
#define HUF_FAST_BMI2_ATTRS
#define HUF_4X2_RELOAD_STREAM(_stream)
MEM_STATIC void MEM_write64(void *memPtr, U64 value)
MEM_STATIC unsigned MEM_32bits(void)
MEM_STATIC size_t MEM_readLEST(const void *memPtr)
MEM_STATIC unsigned MEM_isLittleEndian(void)
MEM_STATIC U16 MEM_readLE16(const void *memPtr)
MEM_STATIC unsigned MEM_64bits(void)
BitContainerType bitContainer
U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX+1]
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX+1]
BYTE symbols[HUF_SYMBOLVALUE_MAX+1]
BYTE huffWeight[HUF_SYMBOLVALUE_MAX+1]
BYTE weightList[HUF_SYMBOLVALUE_MAX+1]
U32 rankStart0[HUF_TABLELOG_MAX+3]
rankValCol_t rankVal[HUF_TABLELOG_MAX]
U32 rankStats[HUF_TABLELOG_MAX+1]
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX+1]
U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]
#define ZSTD_memcpy(d, s, l)
#define ZSTD_memset(p, v, l)