Project Alice
Loading...
Searching...
No Matches
cover.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "../common/mem.h"
#include "../common/pool.h"
#include "../common/threading.h"
#include "../common/zstd_internal.h"
#include "../common/bits.h"
#include "../zdict.h"
#include "cover.h"
Include dependency graph for cover.c:

Go to the source code of this file.

Classes

struct  COVER_map_pair_t_s
 
struct  COVER_map_s
 
struct  COVER_ctx_t
 
struct  COVER_tryParameters_data_s
 

Macros

#define ZDICT_STATIC_LINKING_ONLY
 
#define COVER_MAX_SAMPLES_SIZE   (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
 
#define COVER_DEFAULT_SPLITPOINT   1.0
 
#define DISPLAY(...)
 
#define LOCALDISPLAYLEVEL(displayLevel, l, ...)
 
#define DISPLAYLEVEL(l, ...)   LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
 
#define LOCALDISPLAYUPDATE(displayLevel, l, ...)
 
#define DISPLAYUPDATE(l, ...)   LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
 
#define MAP_EMPTY_VALUE   ((U32)-1)
 

Typedefs

typedef struct COVER_map_pair_t_s COVER_map_pair_t
 
typedef struct COVER_map_s COVER_map_t
 
typedef struct COVER_tryParameters_data_s COVER_tryParameters_data_t
 

Functions

size_t COVER_sum (const size_t *samplesSizes, unsigned nbSamples)
 
void COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel)
 
COVER_epoch_info_t COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes)
 
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters)
 
size_t COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity)
 
void COVER_best_init (COVER_best_t *best)
 
void COVER_best_wait (COVER_best_t *best)
 
void COVER_best_destroy (COVER_best_t *best)
 
void COVER_best_start (COVER_best_t *best)
 
void COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_dictSelectionError (size_t error)
 
unsigned COVER_dictSelectionIsError (COVER_dictSelection_t selection)
 
void COVER_dictSelectionFree (COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize)
 
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t *parameters)
 

Macro Definition Documentation

◆ COVER_DEFAULT_SPLITPOINT

#define COVER_DEFAULT_SPLITPOINT   1.0

Definition at line 52 of file cover.c.

◆ COVER_MAX_SAMPLES_SIZE

#define COVER_MAX_SAMPLES_SIZE   (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))

There are 32bit indexes used to ref samples, so limit samples size to 4GB on 64bit builds. For 32bit builds we choose 1 GB. Most 32bit platforms have 2GB user-mode addressable space and we allocate a large contiguous buffer, so 1GB is already a high limit.

Definition at line 51 of file cover.c.

◆ DISPLAY

#define DISPLAY (   ...)
Value:
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}

Definition at line 61 of file cover.c.

◆ DISPLAYLEVEL

#define DISPLAYLEVEL (   l,
  ... 
)    LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)

Definition at line 72 of file cover.c.

◆ DISPLAYUPDATE

#define DISPLAYUPDATE (   l,
  ... 
)    LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)

Definition at line 87 of file cover.c.

◆ LOCALDISPLAYLEVEL

#define LOCALDISPLAYLEVEL (   displayLevel,
  l,
  ... 
)
Value:
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */

Definition at line 67 of file cover.c.

◆ LOCALDISPLAYUPDATE

#define LOCALDISPLAYUPDATE (   displayLevel,
  l,
  ... 
)
Value:
if (displayLevel >= l) { \
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}

Definition at line 79 of file cover.c.

◆ MAP_EMPTY_VALUE

#define MAP_EMPTY_VALUE   ((U32)-1)

Definition at line 98 of file cover.c.

◆ ZDICT_STATIC_LINKING_ONLY

#define ZDICT_STATIC_LINKING_ONLY

Definition at line 30 of file cover.c.

Typedef Documentation

◆ COVER_map_pair_t

◆ COVER_map_t

typedef struct COVER_map_s COVER_map_t

◆ COVER_tryParameters_data_t

Parameters for COVER_tryParameters().

Function Documentation

◆ COVER_best_destroy()

void COVER_best_destroy ( COVER_best_t best)

Call COVER_best_wait() and then destroy the COVER_best_t.

Definition at line 882 of file cover.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ COVER_best_finish()

void COVER_best_finish ( COVER_best_t best,
ZDICT_cover_params_t  parameters,
COVER_dictSelection_t  selection 
)

Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.

Definition at line 912 of file cover.c.

◆ COVER_best_init()

void COVER_best_init ( COVER_best_t best)

Initialize the COVER_best_t.

Definition at line 854 of file cover.c.

Here is the caller graph for this function:

◆ COVER_best_start()

void COVER_best_start ( COVER_best_t best)

Called when a thread is about to be launched. Increments liveJobs.

Definition at line 898 of file cover.c.

Here is the caller graph for this function:

◆ COVER_best_wait()

void COVER_best_wait ( COVER_best_t best)

Wait until liveJobs == 0.

Definition at line 868 of file cover.c.

Here is the caller graph for this function:

◆ COVER_checkTotalCompressedSize()

size_t COVER_checkTotalCompressedSize ( const ZDICT_cover_params_t  parameters,
const size_t *  samplesSizes,
const BYTE samples,
size_t *  offsets,
size_t  nbTrainSamples,
size_t  nbSamples,
BYTE *const  dict,
size_t  dictBufferCapacity 
)

Checks total compressed size of a dictionary

Definition at line 798 of file cover.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ COVER_computeEpochs()

COVER_epoch_info_t COVER_computeEpochs ( U32  maxDictSize,
U32  nbDmers,
U32  k,
U32  passes 
)

Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.

The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.

Parameters
maxDictSizeThe maximum allowed dictionary size.
nbDmersThe number of dmers we are training on.
kThe parameter k (segment size).
passesThe target number of passes over the dmer corpus. More passes means a better dictionary.

Definition at line 666 of file cover.c.

◆ COVER_dictSelectionError()

COVER_dictSelection_t COVER_dictSelectionError ( size_t  error)

Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.

Definition at line 967 of file cover.c.

Here is the caller graph for this function:

◆ COVER_dictSelectionFree()

void COVER_dictSelectionFree ( COVER_dictSelection_t  selection)

Always call after selectDict is called to free up used memory from newly created dictionary.

Definition at line 975 of file cover.c.

◆ COVER_dictSelectionIsError()

unsigned COVER_dictSelectionIsError ( COVER_dictSelection_t  selection)

Error function for COVER_selectDict function. Checks if the return value is an error.

Definition at line 971 of file cover.c.

◆ COVER_selectDict()

COVER_dictSelection_t COVER_selectDict ( BYTE customDictContent,
size_t  dictBufferCapacity,
size_t  dictContentSize,
const BYTE samplesBuffer,
const size_t *  samplesSizes,
unsigned  nbFinalizeSamples,
size_t  nbCheckSamples,
size_t  nbSamples,
ZDICT_cover_params_t  params,
size_t *  offsets,
size_t  totalCompressedSize 
)

Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.

Definition at line 979 of file cover.c.

Here is the call graph for this function:

◆ COVER_sum()

size_t COVER_sum ( const size_t *  samplesSizes,
unsigned  nbSamples 
)

Returns the sum of the sample sizes.

Definition at line 245 of file cover.c.

◆ COVER_warnOnSmallCorpus()

void COVER_warnOnSmallCorpus ( size_t  maxDictSize,
size_t  nbDmers,
int  displayLevel 
)

Warns the user when their corpus is too small.

Definition at line 650 of file cover.c.

Here is the caller graph for this function:

◆ ZDICT_optimizeTrainFromBuffer_cover()

ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover ( void *  dictBuffer,
size_t  dictBufferCapacity,
const void *  samplesBuffer,
const size_t *  samplesSizes,
unsigned  nbSamples,
ZDICT_cover_params_t *  parameters 
)

Definition at line 1126 of file cover.c.

Here is the call graph for this function:

◆ ZDICT_trainFromBuffer_cover()

ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover ( void *  dictBuffer,
size_t  dictBufferCapacity,
const void *  samplesBuffer,
const size_t *  samplesSizes,
unsigned  nbSamples,
ZDICT_cover_params_t  parameters 
)

Definition at line 738 of file cover.c.

Here is the call graph for this function: