16#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
17#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
24#define _FILE_OFFSET_BITS 64
25#if (defined(__sun__) && (!defined(__LP64__)))
26# ifndef _LARGEFILE_SOURCE
27# define _LARGEFILE_SOURCE
29#elif ! defined(__LP64__)
30# ifndef _LARGEFILE64_SOURCE
31# define _LARGEFILE64_SOURCE
44#ifndef ZDICT_STATIC_LINKING_ONLY
45# define ZDICT_STATIC_LINKING_ONLY
48#include "../common/mem.h"
49#include "../common/fse.h"
50#include "../common/huf.h"
51#include "../common/zstd_internal.h"
52#include "../common/xxhash.h"
53#include "../compress/zstd_compress_internal.h"
56#include "../common/bits.h"
66#define DICTLISTSIZE_DEFAULT 10000
70static const U32 g_selectivity_default = 9;
77#define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
79#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0)
81static clock_t ZDICT_clockSpan(clock_t nPrevious) {
return clock() - nPrevious; }
83static void ZDICT_printHex(
const void* ptr,
size_t length)
85 const BYTE*
const b = (
const BYTE*)ptr;
87 for (u=0; u<length; u++) {
89 if (c<32 || c>126) c =
'.';
104 if (dictSize < 8)
return 0;
117 headerSize =
ERROR(memory_allocation);
137static size_t ZDICT_count(
const void* pIn,
const void* pMatch)
139 const char*
const pStart = (
const char*)pIn;
143 pIn = (
const char*)pIn+
sizeof(
size_t);
144 pMatch = (
const char*)pMatch+
sizeof(
size_t);
148 return (
size_t)((
const char*)pIn - pStart);
159static void ZDICT_initDictItem(
dictItem* d)
163 d->savings = (
U32)(-1);
168#define MINMATCHLENGTH 7
171 const int* suffix,
U32 start,
172 const void* buffer,
U32 minRatio,
U32 notificationLevel)
177 const BYTE* b = (
const BYTE*)buffer;
178 size_t maxLength =
LLIMIT;
179 size_t pos = (size_t)suffix[start];
184 memset(&solution, 0,
sizeof(solution));
193 U32 u, patternEnd = 6;
194 while (
MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
195 if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
196 for (u=1; u<patternEnd; u++)
197 doneMarks[pos+u] = 1;
205 length = ZDICT_count(b + pos, b + suffix[end]);
212 length = ZDICT_count(b + pos, b + *(suffix+start-1));
218 if (end-start < minRatio) {
220 for(idx=start; idx<end; idx++)
221 doneMarks[suffix[idx]] = 1;
227 U32 refinedStart = start;
228 U32 refinedEnd = end;
235 BYTE currentChar = 0;
236 U32 currentCount = 0;
237 U32 currentID = refinedStart;
239 U32 selectedCount = 0;
240 U32 selectedID = currentID;
241 for (
id =refinedStart;
id < refinedEnd;
id++) {
242 if (b[suffix[
id] + mml] != currentChar) {
243 if (currentCount > selectedCount) {
244 selectedCount = currentCount;
245 selectedID = currentID;
248 currentChar = b[ suffix[id] + mml];
253 if (currentCount > selectedCount) {
254 selectedCount = currentCount;
255 selectedID = currentID;
258 if (selectedCount < minRatio)
260 refinedStart = selectedID;
261 refinedEnd = refinedStart + selectedCount;
265 start = refinedStart;
266 pos = suffix[refinedStart];
268 memset(lengthList, 0,
sizeof(lengthList));
274 length = ZDICT_count(b + pos, b + suffix[end]);
276 lengthList[length]++;
283 length = ZDICT_count(b + pos, b + suffix[start - 1]);
285 lengthList[length]++;
291 memset(cumulLength, 0,
sizeof(cumulLength));
292 cumulLength[maxLength-1] = lengthList[maxLength-1];
293 for (i=(
int)(maxLength-2); i>=0; i--)
294 cumulLength[i] = cumulLength[i+1] + lengthList[i];
301 BYTE const c = b[pos + maxLength-1];
302 while (b[pos+l-2]==c) l--;
310 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
312 DISPLAYLEVEL(4,
"Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
313 (
unsigned)pos, (
unsigned)maxLength, (
unsigned)savings[maxLength], (
double)savings[maxLength] / (
double)maxLength);
317 solution.
savings = savings[maxLength];
321 for (
id=start;
id<end;
id++) {
323 U32 const testedPos = (
U32)suffix[
id];
324 if (testedPos == pos)
327 length = (
U32)ZDICT_count(b+pos, b+testedPos);
330 pEnd = (
U32)(testedPos + length);
331 for (p=testedPos; p<pEnd; p++)
339static int isIncluded(
const void* in,
const void* container,
size_t length)
341 const char*
const ip = (
const char*) in;
342 const char*
const into = (
const char*) container;
345 for (u=0; u<length; u++) {
346 if (ip[u] != into[u])
break;
360 const char*
const buf = (
const char*) buffer;
363 U32 u;
for (u=1; u<tableSize; u++) {
364 if (u==eltNbToSkip)
continue;
368 table[u].length += addedLength;
381 for (u=1; u<tableSize; u++) {
382 if (u==eltNbToSkip)
continue;
386 int const addedLength = (int)eltEnd - (
int)(
table[u].pos +
table[u].length);
388 if (addedLength > 0) {
389 table[u].length += addedLength;
401 if (isIncluded(buf +
table[u].pos, buf + elt.
pos + 1,
table[u].length)) {
402 size_t const addedLength =
MAX( (
int)elt.
length - (
int)
table[u].length , 1 );
421 for (u=
id; u<
max-1; u++)
430 U32 mergeId = ZDICT_tryMerge(
table, elt, 0, buffer);
434 newMerge = ZDICT_tryMerge(
table,
table[mergeId], mergeId, buffer);
435 if (newMerge) ZDICT_removeDictItem(
table, mergeId);
444 if (nextElt >= maxSize) nextElt = maxSize-1;
451 table->pos = nextElt+1;
459 for (u=1; u<dictList[0].
pos; u++)
460 dictSize += dictList[u].length;
465static size_t ZDICT_trainBuffer_legacy(
dictItem* dictList,
U32 dictListSize,
466 const void*
const buffer,
size_t bufferSize,
467 const size_t* fileSizes,
unsigned nbFiles,
468 unsigned minRatio,
U32 notificationLevel)
470 int*
const suffix0 = (
int*)malloc((bufferSize+2)*
sizeof(*suffix0));
471 int*
const suffix = suffix0+1;
472 U32* reverseSuffix = (
U32*)malloc((bufferSize)*
sizeof(*reverseSuffix));
473 BYTE* doneMarks = (
BYTE*)malloc((bufferSize+16)*
sizeof(*doneMarks));
474 U32* filePos = (
U32*)malloc(nbFiles *
sizeof(*filePos));
476 clock_t displayClock = 0;
477 clock_t
const refreshRate = CLOCKS_PER_SEC * 3 / 10;
480# define DISPLAYUPDATE(l, ...) \
482 if (notificationLevel>=l) { \
483 if (ZDICT_clockSpan(displayClock) > refreshRate) { \
484 displayClock = clock(); \
485 DISPLAY(__VA_ARGS__); \
487 if (notificationLevel>=4) fflush(stderr); \
493 if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
498 memset(doneMarks, 0, bufferSize+16);
505 DISPLAYLEVEL(2,
"sorting %u files of total size %u MB ...\n", nbFiles, (
unsigned)(bufferSize>>20));
506 {
int const divSuftSortResult =
divsufsort((
const unsigned char*)buffer, suffix, (
int)bufferSize, 0);
507 if (divSuftSortResult != 0) {
result =
ERROR(GENERIC);
goto _cleanup; }
509 suffix[bufferSize] = (int)bufferSize;
510 suffix0[0] = (int)bufferSize;
513 for (pos=0; pos < bufferSize; pos++)
514 reverseSuffix[suffix[pos]] = (
U32)pos;
518 for (pos=1; pos<nbFiles; pos++)
519 filePos[pos] = (
U32)(filePos[pos-1] + fileSizes[pos-1]);
525 {
U32 cursor;
for (cursor=0; cursor < bufferSize; ) {
527 if (doneMarks[cursor]) { cursor++;
continue; }
528 solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
529 if (solution.
length==0) { cursor++;
continue; }
530 ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
531 cursor += solution.
length;
532 DISPLAYUPDATE(2,
"\r%4.2f %% \r", (
double)cursor / (
double)bufferSize * 100.0);
544static void ZDICT_fillNoise(
void* buffer,
size_t length)
546 unsigned const prime1 = 2654435761U;
547 unsigned const prime2 = 2246822519U;
548 unsigned acc = prime1;
550 for (p=0; p<length; p++) {
552 ((
unsigned char*)buffer)[p] = (
unsigned char)(acc >> 21);
564#define MAXREPOFFSET 1024
566static void ZDICT_countEStats(
EStats_ress_t esr,
const ZSTD_parameters* params,
567 unsigned* countLit,
unsigned* offsetcodeCount,
unsigned* matchlengthCount,
unsigned* litlengthCount,
U32* repOffsets,
568 const void* src,
size_t srcSize,
569 U32 notificationLevel)
574 if (srcSize > blockSizeMax) srcSize = blockSizeMax;
580 if (
ZSTD_isError(cSize)) {
DISPLAYLEVEL(3,
"warning : could not compress sample size %u \n", (
unsigned)srcSize);
return; }
586 {
const BYTE* bytePtr;
587 for(bytePtr = seqStorePtr->
litStart; bytePtr < seqStorePtr->lit; bytePtr++)
588 countLit[*bytePtr]++;
597 for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
602 for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
607 for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
616 repOffsets[offset1] += 3;
617 repOffsets[offset2] += 1;
621static size_t ZDICT_totalSampleSize(
const size_t* fileSizes,
unsigned nbFiles)
625 for (u=0; u<nbFiles; u++) total += fileSizes[u];
638 if (
table[u-1].count >=
table[u].count)
break;
649static void ZDICT_flatLit(
unsigned* countLit)
652 for (u=1; u<256; u++) countLit[u] = 2;
658#define OFFCODE_MAX 30
659static size_t ZDICT_analyzeEntropy(
void* dstBuffer,
size_t maxDstSize,
660 int compressionLevel,
661 const void* srcBuffer,
const size_t* fileSizes,
unsigned nbFiles,
662 const void* dictBuffer,
size_t dictBufferSize,
663 unsigned notificationLevel)
665 unsigned countLit[256];
670 unsigned matchLengthCount[
MaxML+1];
671 short matchLengthNCount[
MaxML+1];
672 unsigned litLengthCount[
MaxLL+1];
673 short litLengthNCount[
MaxLL+1];
677 ZSTD_parameters params;
679 size_t pos = 0, errorCode;
681 size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
682 size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
687 DEBUGLOG(4,
"ZDICT_analyzeEntropy");
688 if (offcodeMax>
OFFCODE_MAX) { eSize =
ERROR(dictionaryCreation_failed);
goto _cleanup; }
689 for (u=0; u<256; u++) countLit[u] = 1;
690 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
691 for (u=0; u<=
MaxML; u++) matchLengthCount[u] = 1;
692 for (u=0; u<=
MaxLL; u++) litLengthCount[u] = 1;
693 memset(repOffset, 0,
sizeof(repOffset));
694 repOffset[1] = repOffset[4] = repOffset[8] = 1;
695 memset(bestRepOffset, 0,
sizeof(bestRepOffset));
697 params =
ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
703 eSize =
ERROR(memory_allocation);
709 for (u=0; u<nbFiles; u++) {
710 ZDICT_countEStats(esr, ¶ms,
711 countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
712 (
const char*)srcBuffer + pos, fileSizes[u],
717 if (notificationLevel >= 4) {
720 for (u=0; u<=offcodeMax; u++) {
725 {
size_t maxNbBits =
HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp,
sizeof(wksp));
732 DISPLAYLEVEL(2,
"warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
733 ZDICT_flatLit(countLit);
737 huffLog = (
U32)maxNbBits;
743 ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
747 total=0;
for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
748 errorCode =
FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, 1);
751 DISPLAYLEVEL(1,
"FSE_normalizeCount error with offcodeCount \n");
754 Offlog = (
U32)errorCode;
756 total=0;
for (u=0; u<=
MaxML; u++) total+=matchLengthCount[u];
760 DISPLAYLEVEL(1,
"FSE_normalizeCount error with matchLengthCount \n");
763 mlLog = (
U32)errorCode;
765 total=0;
for (u=0; u<=
MaxLL; u++) total+=litLengthCount[u];
769 DISPLAYLEVEL(1,
"FSE_normalizeCount error with litLengthCount \n");
772 llLog = (
U32)errorCode;
775 {
size_t const hhSize =
HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp,
sizeof(wksp));
782 maxDstSize -= hhSize;
789 DISPLAYLEVEL(1,
"FSE_writeNCount error with offcodeNCount \n");
793 maxDstSize -= ohSize;
800 DISPLAYLEVEL(1,
"FSE_writeNCount error with matchLengthNCount \n");
804 maxDstSize -= mhSize;
811 DISPLAYLEVEL(1,
"FSE_writeNCount error with litlengthNCount \n");
815 maxDstSize -= lhSize;
820 eSize =
ERROR(dstSize_tooSmall);
821 DISPLAYLEVEL(1,
"not enough space to write RepOffsets \n");
851 U32 maxRep = reps[0];
854 maxRep =
MAX(maxRep, reps[r]);
859 const void* customDictContent,
size_t dictContentSize,
860 const void* samplesBuffer,
const size_t* samplesSizes,
869 size_t const minContentSize = (
size_t)ZDICT_maxRep(repStartValue);
873 DEBUGLOG(4,
"ZDICT_finalizeDictionary");
874 if (dictBufferCapacity < dictContentSize)
return ERROR(dstSize_tooSmall);
875 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN)
return ERROR(dstSize_tooSmall);
879 {
U64 const randomID =
XXH64(customDictContent, dictContentSize, 0);
880 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
889 {
size_t const eSize = ZDICT_analyzeEntropy(header+hSize,
HBUFFSIZE-hSize,
891 samplesBuffer, samplesSizes, nbSamples,
892 customDictContent, dictContentSize,
899 if (hSize + dictContentSize > dictBufferCapacity) {
900 dictContentSize = dictBufferCapacity - hSize;
904 if (dictContentSize < minContentSize) {
905 RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
906 "dictBufferCapacity too small to fit max repcode");
907 paddingSize = minContentSize - dictContentSize;
913 size_t const dictSize = hSize + paddingSize + dictContentSize;
919 BYTE*
const outDictHeader = (
BYTE*)dictBuffer;
920 BYTE*
const outDictPadding = outDictHeader + hSize;
921 BYTE*
const outDictContent = outDictPadding + paddingSize;
923 assert(dictSize <= dictBufferCapacity);
924 assert(outDictContent + dictContentSize == (
BYTE*)dictBuffer + dictSize);
931 memmove(outDictContent, customDictContent, dictContentSize);
932 memcpy(outDictHeader, header, hSize);
933 memset(outDictPadding, 0, paddingSize);
940static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
941 void* dictBuffer,
size_t dictContentSize,
size_t dictBufferCapacity,
942 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
952 {
size_t const eSize = ZDICT_analyzeEntropy((
char*)dictBuffer+hSize, dictBufferCapacity-hSize,
954 samplesBuffer, samplesSizes, nbSamples,
955 (
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
963 {
U64 const randomID =
XXH64((
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
964 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
969 if (hSize + dictContentSize < dictBufferCapacity)
970 memmove((
char*)dictBuffer + hSize, (
char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
971 return MIN(dictBufferCapacity, hSize+dictContentSize);
978static size_t ZDICT_trainFromBuffer_unsafe_legacy(
979 void* dictBuffer,
size_t maxDictSize,
980 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
981 ZDICT_legacy_params_t params)
984 dictItem*
const dictList = (
dictItem*)malloc(dictListSize *
sizeof(*dictList));
985 unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
986 unsigned const minRep = (selectivity > 30) ?
MINRATIO : nbSamples >> selectivity;
987 size_t const targetDictSize = maxDictSize;
988 size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
990 U32 const notificationLevel = params.zParams.notificationLevel;
993 if (!dictList)
return ERROR(memory_allocation);
994 if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList);
return ERROR(dstSize_tooSmall); }
998 ZDICT_initDictItem(dictList);
1001 ZDICT_trainBuffer_legacy(dictList, dictListSize,
1002 samplesBuffer, samplesBuffSize,
1003 samplesSizes, nbSamples,
1004 minRep, notificationLevel);
1007 if (params.zParams.notificationLevel>= 3) {
1008 unsigned const nb =
MIN(25, dictList[0].pos);
1009 unsigned const dictContentSize = ZDICT_dictSize(dictList);
1011 DISPLAYLEVEL(3,
"\n %u segments found, of total size %u \n", (
unsigned)dictList[0].pos-1, dictContentSize);
1013 for (u=1; u<nb; u++) {
1014 unsigned const pos = dictList[u].
pos;
1015 unsigned const length = dictList[u].
length;
1016 U32 const printedLength =
MIN(40, length);
1017 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
1019 return ERROR(GENERIC);
1021 DISPLAYLEVEL(3,
"%3u:%3u bytes at pos %8u, savings %7u bytes |",
1022 u, length, pos, (
unsigned)dictList[u].savings);
1023 ZDICT_printHex((
const char*)samplesBuffer+pos, printedLength);
1029 {
unsigned dictContentSize = ZDICT_dictSize(dictList);
1030 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList);
return ERROR(dictionaryCreation_failed); }
1031 if (dictContentSize < targetDictSize/4) {
1032 DISPLAYLEVEL(2,
"! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (
unsigned)maxDictSize);
1033 if (samplesBuffSize < 10 * targetDictSize)
1034 DISPLAYLEVEL(2,
"! consider increasing the number of samples (total size : %u MB)\n", (
unsigned)(samplesBuffSize>>20));
1036 DISPLAYLEVEL(2,
"! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
1037 DISPLAYLEVEL(2,
"! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
1041 if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*
MINRATIO) && (selectivity>1)) {
1042 unsigned proposedSelectivity = selectivity-1;
1043 while ((nbSamples >> proposedSelectivity) <=
MINRATIO) { proposedSelectivity--; }
1044 DISPLAYLEVEL(2,
"! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (
unsigned)maxDictSize);
1045 DISPLAYLEVEL(2,
"! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
1046 DISPLAYLEVEL(2,
"! always test dictionary efficiency on real samples \n");
1051 U32 currentSize = 0;
1053 currentSize += dictList[
n].
length;
1054 if (currentSize > targetDictSize) { currentSize -= dictList[
n].
length;
break; }
1057 dictContentSize = currentSize;
1062 BYTE* ptr = (
BYTE*)dictBuffer + maxDictSize;
1063 for (u=1; u<dictList->
pos; u++) {
1066 if (ptr<(
BYTE*)dictBuffer) { free(dictList);
return ERROR(GENERIC); }
1067 memcpy(ptr, (
const char*)samplesBuffer+dictList[u].pos, l);
1070 dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
1071 samplesBuffer, samplesSizes, nbSamples,
1085 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples,
1086 ZDICT_legacy_params_t params)
1090 size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
1094 if (!newBuff)
return ERROR(memory_allocation);
1096 memcpy(newBuff, samplesBuffer, sBuffSize);
1097 ZDICT_fillNoise((
char*)newBuff + sBuffSize,
NOISELENGTH);
1100 ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
1101 samplesSizes, nbSamples, params);
1108 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples)
1110 ZDICT_fastCover_params_t params;
1111 DEBUGLOG(3,
"ZDICT_trainFromBuffer");
1112 memset(¶ms, 0,
sizeof(params));
1117#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
1118 params.zParams.notificationLevel =
DEBUGLEVEL;
1121 samplesBuffer, samplesSizes, nbSamples,
1126 const void* samplesBuffer,
const size_t* samplesSizes,
unsigned nbSamples)
1129 memset(¶ms, 0,
sizeof(params));
1130 return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity,
1131 samplesBuffer, samplesSizes, nbSamples,
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
MEM_STATIC unsigned ZSTD_highbit32(U32 val)
#define assert(condition)
int divsufsort(const unsigned char *T, int *SA, int n, int openMP)
ERR_STATIC unsigned ERR_isError(size_t code)
#define RETURN_ERROR_IF(cond, err,...)
ERR_STATIC const char * ERR_getErrorName(size_t code)
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t *parameters)
FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount)
#define HUF_CTABLE_WORKSPACE_SIZE_U32
size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize)
#define HUF_WORKSPACE_SIZE
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue)
size_t HUF_buildCTable_wksp(HUF_CElt *tree, const unsigned *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
MEM_STATIC U16 MEM_read16(const void *memPtr)
MEM_STATIC size_t MEM_readST(const void *memPtr)
MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
MEM_STATIC U32 MEM_readLE32(const void *memPtr)
MEM_STATIC U64 MEM_read64(const void *memPtr)
constexpr dcon::demographics_key total(0)
unsigned notificationLevel
size_t ZDICT_trainFromBuffer_legacy(void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t params)
#define ZDICT_MAX_SAMPLES_SIZE
#define DISPLAYUPDATE(l,...)
unsigned ZDICT_getDictID(const void *dictBuffer, size_t dictSize)
size_t ZDICT_trainFromBuffer(void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples)
#define DISPLAYLEVEL(l,...)
size_t ZDICT_finalizeDictionary(void *dictBuffer, size_t dictBufferCapacity, const void *customDictContent, size_t dictContentSize, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_params_t params)
const char * ZDICT_getErrorName(size_t errorCode)
size_t ZDICT_addEntropyTablesFromBuffer(void *dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples)
unsigned ZDICT_isError(size_t errorCode)
size_t ZDICT_getDictHeaderSize(const void *dictBuffer, size_t dictSize)
#define ZDICT_MIN_SAMPLES_SIZE
#define DICTLISTSIZE_DEFAULT
#define ZSTD_MAGIC_DICTIONARY
#define ZSTD_CLEVEL_DEFAULT
#define ZSTD_BLOCKSIZE_MAX
ZSTD_CCtx * ZSTD_createCCtx(void)
size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx)
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t *bs, void *workspace, const void *const dict, size_t dictSize)
size_t ZSTD_freeCDict(ZSTD_CDict *cdict)
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict)
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t *bs)
ZSTD_CDict * ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
int ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
const seqStore_t * ZSTD_getSeqStore(const ZSTD_CCtx *ctx)