32 #include "io_lib_config.h"
41 #include <sys/types.h>
58 if (val < MAX_STAT_VAL && val >= 0) {
68 k =
kh_put(m_i2i, st->h, val, &r);
83 if (val < MAX_STAT_VAL && val >= 0) {
85 assert(st->
freqs[val] >= 0);
90 if (--
kh_val(st->h, k) == 0)
93 fprintf(stderr,
"Failed to remove val %d from cram_stats\n", val);
97 fprintf(stderr,
"Failed to remove val %d from cram_stats\n", val);
104 fprintf(stderr,
"cram_stats:\n");
108 fprintf(stderr,
"\t%d\t%d\n", i, st->
freqs[i]);
116 fprintf(stderr,
"\t%d\t%d\n",
kh_key(st->h, k),
kh_val(st->h, k));
123 static int nbits(
int v) {
124 static const int MultiplyDeBruijnBitPosition[32] = {
125 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31,
126 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32
136 return MultiplyDeBruijnBitPosition[(
uint32_t)(v * 0x07C4ACDDU) >> 27];
151 int best_size = INT_MAX, bits;
152 int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k;
153 int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes;
161 if (nvals >= vals_alloc) {
162 vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
163 vals = realloc(vals, vals_alloc *
sizeof(
int));
164 freqs = realloc(freqs, vals_alloc *
sizeof(
int));
165 if (!vals || !freqs) {
166 if (vals) free(vals);
167 if (freqs) free(freqs);
172 freqs[nvals] = st->
freqs[i];
173 ntot += freqs[nvals];
174 if (max_val < i) max_val = i;
175 if (min_val > i) min_val = i;
186 if (nvals >= vals_alloc) {
187 vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
188 vals = realloc(vals, vals_alloc *
sizeof(
int));
189 freqs = realloc(freqs, vals_alloc *
sizeof(
int));
195 freqs[nvals] =
kh_val(st->h, k);
196 ntot += freqs[nvals];
197 if (max_val < i) max_val = i;
198 if (min_val > i) min_val = i;
204 assert(ntot == st->
nsamp);
216 free(vals); free(freqs);
223 fprintf(stderr,
"Range = %d..%d, nvals=%d, ntot=%d\n",
224 min_val, max_val, nvals, ntot);
229 for (i = 0; i < nvals; i++) {
230 dbits += freqs[i] * log((
double)freqs[i]/ntot);
234 fprintf(stderr,
"Entropy = %f\n", dbits);
238 bits = nbits(max_val - min_val) * ntot;
240 fprintf(stderr,
"BETA = %d\n", bits);
241 if (best_size > bits)
242 best_size = bits, best_encoding =
E_BETA;
247 for (bits = i = 0; i < nvals; i++)
248 bits += freqs[i]*(vals[i]+1);
250 fprintf(stderr,
"UNARY = %d\n", bits);
251 if (best_size > bits)
252 best_size = bits, best_encoding =
E_NULL;
256 for (bits = i = 0; i < nvals; i++)
257 bits += ((nbits(vals[i]-min_val+1)-1) + nbits(vals[i]-min_val+1)) * freqs[i];
259 fprintf(stderr,
"GAMMA = %d\n", bits);
260 if (best_size > bits)
261 best_size = bits, best_encoding =
E_GAMMA;
264 for (k = 0; k < 10; k++) {
265 for (bits = i = 0; i < nvals; i++) {
266 if (vals[i]-min_val < (1<<k))
267 bits += (1 + k)*freqs[i];
269 bits += (nbits(vals[i]-min_val)*2-k)*freqs[i];
273 fprintf(stderr,
"SUBEXP%d = %d\n", k, bits);
274 if (best_size > bits)
275 best_size = bits, best_encoding =
E_SUBEXP;
294 freqs = realloc(freqs, 2*nvals*
sizeof(*freqs));
295 codes = calloc(2*nvals,
sizeof(*codes));
296 if (!freqs || !codes)
303 int low1 = INT_MAX, low2 = INT_MAX;
304 int ind1 = 0, ind2 = 0;
305 for (i = 0; i < nvals; i++) {
309 low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;
310 else if (low2 > freqs[i])
311 low2 = freqs[i], ind2 = i;
320 freqs[nvals] = low1 + low2;
329 for (i = 0; i < nvals; i++) {
331 for (k = codes[i]; k; k = codes[k])
338 for (bits = i = 0; i < nvals; i++) {
339 bits += freqs[i] * codes[i];
342 fprintf(stderr,
"HUFFMAN = %d\n", bits);
343 if (best_size >= bits)
344 best_size = bits, best_encoding =
E_HUFFMAN;
350 return best_encoding;