Go to the source code of this file.
|
COVER_epoch_info_t | COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes) |
|
void | COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel) |
|
size_t | COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity) |
|
size_t | COVER_sum (const size_t *samplesSizes, unsigned nbSamples) |
|
void | COVER_best_init (COVER_best_t *best) |
|
void | COVER_best_wait (COVER_best_t *best) |
|
void | COVER_best_destroy (COVER_best_t *best) |
|
void | COVER_best_start (COVER_best_t *best) |
|
void | COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection) |
|
unsigned | COVER_dictSelectionIsError (COVER_dictSelection_t selection) |
|
COVER_dictSelection_t | COVER_dictSelectionError (size_t error) |
|
void | COVER_dictSelectionFree (COVER_dictSelection_t selection) |
|
COVER_dictSelection_t | COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize) |
|
◆ COVER_best_t
COVER_best_t is used for two purposes:
- Synchronizing threads.
- Saving the best parameters and dictionary.
All of the methods except COVER_best_init() are thread safe if zstd is compiled with multithreaded support.
◆ COVER_dictSelection_t
Struct used for the dictionary selection function.
◆ COVER_best_destroy()
◆ COVER_best_finish()
Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.
Definition at line 912 of file cover.c.
◆ COVER_best_init()
Initialize the COVER_best_t
.
Definition at line 854 of file cover.c.
◆ COVER_best_start()
Called when a thread is about to be launched. Increments liveJobs.
Definition at line 898 of file cover.c.
◆ COVER_best_wait()
Wait until liveJobs == 0.
Definition at line 868 of file cover.c.
◆ COVER_checkTotalCompressedSize()
size_t COVER_checkTotalCompressedSize |
( |
const ZDICT_cover_params_t |
parameters, |
|
|
const size_t * |
samplesSizes, |
|
|
const BYTE * |
samples, |
|
|
size_t * |
offsets, |
|
|
size_t |
nbTrainSamples, |
|
|
size_t |
nbSamples, |
|
|
BYTE *const |
dict, |
|
|
size_t |
dictBufferCapacity |
|
) |
| |
Checks total compressed size of a dictionary
Definition at line 798 of file cover.c.
◆ COVER_computeEpochs()
Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.
The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.
- Parameters
-
maxDictSize | The maximum allowed dictionary size. |
nbDmers | The number of dmers we are training on. |
k | The parameter k (segment size). |
passes | The target number of passes over the dmer corpus. More passes means a better dictionary. |
Definition at line 666 of file cover.c.
◆ COVER_dictSelectionError()
Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.
Definition at line 967 of file cover.c.
◆ COVER_dictSelectionFree()
Always call after selectDict is called to free up used memory from newly created dictionary.
Definition at line 975 of file cover.c.
◆ COVER_dictSelectionIsError()
Error function for COVER_selectDict function. Checks if the return value is an error.
Definition at line 971 of file cover.c.
◆ COVER_selectDict()
COVER_dictSelection_t COVER_selectDict |
( |
BYTE * |
customDictContent, |
|
|
size_t |
dictBufferCapacity, |
|
|
size_t |
dictContentSize, |
|
|
const BYTE * |
samplesBuffer, |
|
|
const size_t * |
samplesSizes, |
|
|
unsigned |
nbFinalizeSamples, |
|
|
size_t |
nbCheckSamples, |
|
|
size_t |
nbSamples, |
|
|
ZDICT_cover_params_t |
params, |
|
|
size_t * |
offsets, |
|
|
size_t |
totalCompressedSize |
|
) |
| |
Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.
Definition at line 979 of file cover.c.
◆ COVER_sum()
size_t COVER_sum |
( |
const size_t * |
samplesSizes, |
|
|
unsigned |
nbSamples |
|
) |
| |
Returns the sum of the sample sizes.
Definition at line 245 of file cover.c.
◆ COVER_warnOnSmallCorpus()
void COVER_warnOnSmallCorpus |
( |
size_t |
maxDictSize, |
|
|
size_t |
nbDmers, |
|
|
int |
displayLevel |
|
) |
| |
Warns the user when their corpus is too small.
Definition at line 650 of file cover.c.