Project Alice
Loading...
Searching...
No Matches
zstd_compress_literals.c
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /*-*************************************
12 * Dependencies
13 ***************************************/
15
16
17/* **************************************************************
18* Debug Traces
19****************************************************************/
20#if DEBUGLEVEL >= 2
21
22static size_t showHexa(const void* src, size_t srcSize)
23{
24 const BYTE* const ip = (const BYTE*)src;
25 size_t u;
26 for (u=0; u<srcSize; u++) {
27 RAWLOG(5, " %02X", ip[u]); (void)ip;
28 }
29 RAWLOG(5, " \n");
30 return srcSize;
31}
32
33#endif
34
35
36/* **************************************************************
37* Literals compression - special cases
38****************************************************************/
39size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
40{
41 BYTE* const ostart = (BYTE*)dst;
42 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
43
44 DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
46 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
47
48 switch(flSize)
49 {
50 case 1: /* 2 - 1 - 5 */
51 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
52 break;
53 case 2: /* 2 - 2 - 12 */
54 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
55 break;
56 case 3: /* 2 - 2 - 20 */
57 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
58 break;
59 default: /* not necessary : flSize is {1,2,3} */
60 assert(0);
61 }
62
63 ZSTD_memcpy(ostart + flSize, src, srcSize);
64 DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65 return srcSize + flSize;
66}
67
68static int allBytesIdentical(const void* src, size_t srcSize)
69{
70 assert(srcSize >= 1);
71 assert(src != NULL);
72 { const BYTE b = ((const BYTE*)src)[0];
73 size_t p;
74 for (p=1; p<srcSize; p++) {
75 if (((const BYTE*)src)[p] != b) return 0;
76 }
77 return 1;
78 }
79}
80
81size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
82{
83 BYTE* const ostart = (BYTE*)dst;
84 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
85
86 assert(dstCapacity >= 4); (void)dstCapacity;
87 assert(allBytesIdentical(src, srcSize));
88
89 switch(flSize)
90 {
91 case 1: /* 2 - 1 - 5 */
92 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
93 break;
94 case 2: /* 2 - 2 - 12 */
95 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
96 break;
97 case 3: /* 2 - 2 - 20 */
98 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
99 break;
100 default: /* not necessary : flSize is {1,2,3} */
101 assert(0);
102 }
103
104 ostart[flSize] = *(const BYTE*)src;
105 DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
106 return flSize+1;
107}
108
109/* ZSTD_minLiteralsToCompress() :
110 * returns minimal amount of literals
111 * for literal compression to even be attempted.
112 * Minimum is made tighter as compression strategy increases.
113 */
114static size_t
115ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116{
117 assert((int)strategy >= 0);
118 assert((int)strategy <= 9);
119 /* btultra2 : min 8 bytes;
120 * then 2x larger for each successive compression strategy
121 * max threshold 64 bytes */
122 { int const shift = MIN(9-(int)strategy, 3);
123 size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124 DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125 return mintc;
126 }
127}
128
130 void* dst, size_t dstCapacity,
131 const void* src, size_t srcSize,
132 void* entropyWorkspace, size_t entropyWorkspaceSize,
133 const ZSTD_hufCTables_t* prevHuf,
134 ZSTD_hufCTables_t* nextHuf,
135 ZSTD_strategy strategy,
136 int disableLiteralCompression,
137 int suspectUncompressible,
138 int bmi2)
139{
140 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141 BYTE* const ostart = (BYTE*)dst;
142 U32 singleStream = srcSize < 256;
144 size_t cLitSize;
145
146 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147 disableLiteralCompression, (U32)srcSize, dstCapacity);
148
149 DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
150
151 /* Prepare nextEntropy assuming reusing the existing table */
152 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
153
154 if (disableLiteralCompression)
155 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
156
157 /* if too small, don't even attempt compression (speed opt) */
158 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
160
161 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162 { HUF_repeat repeat = prevHuf->repeatMode;
163 int const flags = 0
164 | (bmi2 ? HUF_flags_bmi2 : 0)
165 | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
167 | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
169 typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170 huf_compress_f huf_compress;
171 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172 huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174 src, srcSize,
176 entropyWorkspace, entropyWorkspaceSize,
177 (HUF_CElt*)nextHuf->CTable,
178 &repeat, flags);
179 DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180 if (repeat != HUF_repeat_none) {
181 /* reused the existing table */
182 DEBUGLOG(5, "reusing statistics from previous huffman block");
183 hType = set_repeat;
184 }
185 }
186
187 { size_t const minGain = ZSTD_minGain(srcSize, strategy);
188 if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191 } }
192 if (cLitSize==1) {
193 /* A return value of 1 signals that the alphabet consists of a single symbol.
194 * However, in some rare circumstances, it could be the compressed size (a single byte).
195 * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196 * (it's also necessary to not generate statistics).
197 * Therefore, in such a case, actively check that all bytes are identical. */
198 if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201 } }
202
203 if (hType == set_compressed) {
204 /* using a newly constructed table */
205 nextHuf->repeatMode = HUF_repeat_check;
206 }
207
208 /* Build header */
209 switch(lhSize)
210 {
211 case 3: /* 2 - 2 - 10 - 10 */
212 if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213 { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214 MEM_writeLE24(ostart, lhc);
215 break;
216 }
217 case 4: /* 2 - 2 - 14 - 14 */
219 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220 MEM_writeLE32(ostart, lhc);
221 break;
222 }
223 case 5: /* 2 - 2 - 18 - 18 */
225 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226 MEM_writeLE32(ostart, lhc);
227 ostart[4] = (BYTE)(cLitSize >> 10);
228 break;
229 }
230 default: /* not possible : lhSize is {3,4,5} */
231 assert(0);
232 }
233 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234 return lhSize+cLitSize;
235}
#define DEBUGLOG(l,...)
Definition: debug.h:108
#define RAWLOG(l,...)
Definition: debug.h:107
#define assert(condition)
Definition: debug.h:74
ERR_STATIC unsigned ERR_isError(size_t code)
Definition: error_private.h:58
#define RETURN_ERROR_IF(cond, err,...)
#define HUF_SYMBOLVALUE_MAX
Definition: huf.h:44
@ HUF_flags_bmi2
Definition: huf.h:90
@ HUF_flags_suspectUncompressible
Definition: huf.h:105
@ HUF_flags_optimalDepth
Definition: huf.h:95
@ HUF_flags_preferRepeat
Definition: huf.h:100
size_t HUF_CElt
Definition: huf.h:62
size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int flags)
#define HUF_OPTIMAL_DEPTH_THRESHOLD
Definition: huf.h:122
size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int flags)
HUF_repeat
Definition: huf.h:144
@ HUF_repeat_valid
Definition: huf.h:147
@ HUF_repeat_none
Definition: huf.h:145
@ HUF_repeat_check
Definition: huf.h:146
unsigned char BYTE
Definition: mem.h:58
MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
Definition: mem.h:298
MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
Definition: mem.h:328
unsigned int U32
Definition: mem.h:69
MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
Definition: mem.h:314
unsigned short U16
Definition: mem.h:64
#define MIN(a, b)
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]
ZSTD_strategy
Definition: zstd.h:315
@ ZSTD_lazy
Definition: zstd.h:318
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
size_t ZSTD_compressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize, void *entropyWorkspace, size_t entropyWorkspaceSize, const ZSTD_hufCTables_t *prevHuf, ZSTD_hufCTables_t *nextHuf, ZSTD_strategy strategy, int disableLiteralCompression, int suspectUncompressible, int bmi2)
#define ZSTD_memcpy(d, s, l)
Definition: zstd_deps.h:36
#define KB
Definition: zstd_internal.h:71
#define MIN_LITERALS_FOR_4_STREAMS
Definition: zstd_internal.h:96
symbolEncodingType_e
Definition: zstd_internal.h:98
@ set_repeat
Definition: zstd_internal.h:98
@ set_basic
Definition: zstd_internal.h:98
@ set_compressed
Definition: zstd_internal.h:98
@ set_rle
Definition: zstd_internal.h:98
#define LitHufLog