ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trees.c
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file trees.c
1 /* trees.c -- output deflated data using Huffman coding
2  * Copyright (C) 1995-2017 Jean-loup Gailly
3  * detect_data_type() function provided freely by Cosmin Truta, 2006
4  * For conditions of distribution and use, see copyright notice in zlib.h
5  */
6 
7 /*
8  * ALGORITHM
9  *
10  * The "deflation" process uses several Huffman trees. The more
11  * common source values are represented by shorter bit sequences.
12  *
13  * Each code tree is stored in a compressed form which is itself
14  * a Huffman encoding of the lengths of all the code strings (in
15  * ascending order by source values). The actual code strings are
16  * reconstructed from the lengths in the inflate process, as described
17  * in the deflate specification.
18  *
19  * REFERENCES
20  *
21  * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
22  * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
23  *
24  * Storer, James A.
25  * Data Compression: Methods and Theory, pp. 49-50.
26  * Computer Science Press, 1988. ISBN 0-7167-8156-5.
27  *
28  * Sedgewick, R.
29  * Algorithms, p290.
30  * Addison-Wesley, 1983. ISBN 0-201-06672-6.
31  */
32 
33 
34 /* #define GEN_TREES_H */
35 
36 #include "deflate.h"
37 
38 #ifdef ZLIB_DEBUG
39 # include <ctype.h>
40 #endif
41 
42 /* ===========================================================================
43  * Constants
44  */
45 
46 #define MAX_BL_BITS 7
47 /* Bit length codes must not exceed MAX_BL_BITS bits */
48 
49 #define END_BLOCK 256
50 /* end of block literal code */
51 
52 #define REP_3_6 16
53 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
54 
55 #define REPZ_3_10 17
56 /* repeat a zero length 3-10 times (3 bits of repeat count) */
57 
58 #define REPZ_11_138 18
59 /* repeat a zero length 11-138 times (7 bits of repeat count) */
60 
61 local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
62  = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
63 
64 local const int extra_dbits[D_CODES] /* extra bits for each distance code */
65  = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
66 
67 local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
68  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
69 
71  = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
72 /* The lengths of the bit length codes are sent in order of decreasing
73  * probability, to avoid transmitting the lengths for unused bit length codes.
74  */
75 
76 /* ===========================================================================
77  * Local data. These are initialized only once.
78  */
79 
80 #define DIST_CODE_LEN 512 /* see definition of array dist_code below */
81 
82 #if defined(GEN_TREES_H) || !defined(STDC)
83 /* non ANSI compilers may not accept trees.h */
84 
86 /* The static literal tree. Since the bit lengths are imposed, there is no
87  * need for the L_CODES extra codes used during heap construction. However
88  * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
89  * below).
90  */
91 
93 /* The static distance tree. (Actually a trivial tree since all codes use
94  * 5 bits.)
95  */
96 
98 /* Distance codes. The first 256 values correspond to the distances
99  * 3 .. 258, the last 256 values correspond to the top 8 bits of
100  * the 15 bit distances.
101  */
102 
104 /* length code for each normalized match length (0 == MIN_MATCH) */
105 
107 /* First normalized length for each code (0 = MIN_MATCH) */
108 
110 /* First normalized distance for each code (0 = distance of 1) */
111 
112 #else
113 # include "trees.h"
114 #endif /* GEN_TREES_H */
115 
117  const ct_data *static_tree; /* static tree or NULL */
118  const intf *extra_bits; /* extra bits for each code or NULL */
119  int extra_base; /* base index for extra_bits */
120  int elems; /* max number of elements in the tree */
121  int max_length; /* max bit length for the codes */
122 };
123 
126 
129 
131 {(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
132 
133 /* ===========================================================================
134  * Local (static) routines in this file.
135  */
136 
137 local void tr_static_init OF((void));
139 local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
141 local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
142 local void build_tree OF((deflate_state *s, tree_desc *desc));
143 local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
144 local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
146 local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
147  int blcodes));
148 local void compress_block OF((deflate_state *s, const ct_data *ltree,
149  const ct_data *dtree));
151 local unsigned bi_reverse OF((unsigned value, int length));
152 local void bi_windup OF((deflate_state *s));
153 local void bi_flush OF((deflate_state *s));
154 
155 #ifdef GEN_TREES_H
156 local void gen_trees_header OF((void));
157 #endif
158 
159 #ifndef ZLIB_DEBUG
160 # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
161  /* Send a code of the given tree. c and tree must not have side effects */
162 
163 #else /* !ZLIB_DEBUG */
164 # define send_code(s, c, tree) \
165  { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
166  send_bits(s, tree[c].Code, tree[c].Len); }
167 #endif
168 
169 /* ===========================================================================
170  * Output a short LSB first on the stream.
171  * IN assertion: there is enough room in pendingBuf.
172  */
173 #define put_short(s, w) { \
174  put_byte(s, (uch)((w) & 0xff)); \
175  put_byte(s, (uch)((ush)(w) >> 8)); \
176 }
177 
178 /* ===========================================================================
179  * Send a value on a given number of bits.
180  * IN assertion: length <= 16 and value fits in length bits.
181  */
182 #ifdef ZLIB_DEBUG
183 local void send_bits OF((deflate_state *s, int value, int length));
184 
185 local void send_bits(s, value, length)
186  deflate_state *s;
187  int value; /* value to send */
188  int length; /* number of bits */
189 {
190  Tracevv((stderr," l %2d v %4x ", length, value));
191  Assert(length > 0 && length <= 15, "invalid length");
192  s->bits_sent += (ulg)length;
193 
194  /* If not enough room in bi_buf, use (valid) bits from bi_buf and
195  * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
196  * unused bits in value.
197  */
198  if (s->bi_valid > (int)Buf_size - length) {
199  s->bi_buf |= (ush)value << s->bi_valid;
200  put_short(s, s->bi_buf);
201  s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
202  s->bi_valid += length - Buf_size;
203  } else {
204  s->bi_buf |= (ush)value << s->bi_valid;
205  s->bi_valid += length;
206  }
207 }
208 #else /* !ZLIB_DEBUG */
209 
210 #define send_bits(s, value, length) \
211 { int len = length;\
212  if (s->bi_valid > (int)Buf_size - len) {\
213  int val = (int)value;\
214  s->bi_buf |= (ush)val << s->bi_valid;\
215  put_short(s, s->bi_buf);\
216  s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
217  s->bi_valid += len - Buf_size;\
218  } else {\
219  s->bi_buf |= (ush)(value) << s->bi_valid;\
220  s->bi_valid += len;\
221  }\
222 }
223 #endif /* ZLIB_DEBUG */
224 
225 
226 /* the arguments must not have side effects */
227 
228 /* ===========================================================================
229  * Initialize the various 'constant' tables.
230  */
232 {
233 #if defined(GEN_TREES_H) || !defined(STDC)
234  static int static_init_done = 0;
235  int n; /* iterates over tree elements */
236  int bits; /* bit counter */
237  int length; /* length value */
238  int code; /* code value */
239  int dist; /* distance index */
240  ush bl_count[MAX_BITS+1];
241  /* number of codes at each bit length for an optimal tree */
242 
243  if (static_init_done) return;
244 
245  /* For some embedded targets, global variables are not initialized: */
246 #ifdef NO_INIT_GLOBAL_POINTERS
252 #endif
253 
254  /* Initialize the mapping length (0..255) -> length code (0..28) */
255  length = 0;
256  for (code = 0; code < LENGTH_CODES-1; code++) {
257  base_length[code] = length;
258  for (n = 0; n < (1<<extra_lbits[code]); n++) {
259  _length_code[length++] = (uch)code;
260  }
261  }
262  Assert (length == 256, "tr_static_init: length != 256");
263  /* Note that the length 255 (match length 258) can be represented
264  * in two different ways: code 284 + 5 bits or code 285, so we
265  * overwrite length_code[255] to use the best encoding:
266  */
267  _length_code[length-1] = (uch)code;
268 
269  /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
270  dist = 0;
271  for (code = 0 ; code < 16; code++) {
272  base_dist[code] = dist;
273  for (n = 0; n < (1<<extra_dbits[code]); n++) {
274  _dist_code[dist++] = (uch)code;
275  }
276  }
277  Assert (dist == 256, "tr_static_init: dist != 256");
278  dist >>= 7; /* from now on, all distances are divided by 128 */
279  for ( ; code < D_CODES; code++) {
280  base_dist[code] = dist << 7;
281  for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
282  _dist_code[256 + dist++] = (uch)code;
283  }
284  }
285  Assert (dist == 256, "tr_static_init: 256+dist != 512");
286 
287  /* Construct the codes of the static literal tree */
288  for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
289  n = 0;
290  while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
291  while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
292  while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
293  while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
294  /* Codes 286 and 287 do not exist, but we must include them in the
295  * tree construction to get a canonical Huffman tree (longest code
296  * all ones)
297  */
298  gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
299 
300  /* The static distance tree is trivial: */
301  for (n = 0; n < D_CODES; n++) {
302  static_dtree[n].Len = 5;
303  static_dtree[n].Code = bi_reverse((unsigned)n, 5);
304  }
305  static_init_done = 1;
306 
307 # ifdef GEN_TREES_H
308  gen_trees_header();
309 # endif
310 #endif /* defined(GEN_TREES_H) || !defined(STDC) */
311 }
312 
313 /* ===========================================================================
314  * Genererate the file trees.h describing the static trees.
315  */
316 #ifdef GEN_TREES_H
317 # ifndef ZLIB_DEBUG
318 # include <stdio.h>
319 # endif
320 
321 # define SEPARATOR(i, last, width) \
322  ((i) == (last)? "\n};\n\n" : \
323  ((i) % (width) == (width)-1 ? ",\n" : ", "))
324 
325 void gen_trees_header()
326 {
327  FILE *header = fopen("trees.h", "w");
328  int i;
329 
330  Assert (header != NULL, "Can't open trees.h");
331  fprintf(header,
332  "/* header created automatically with -DGEN_TREES_H */\n\n");
333 
334  fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
335  for (i = 0; i < L_CODES+2; i++) {
336  fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
337  static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
338  }
339 
340  fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
341  for (i = 0; i < D_CODES; i++) {
342  fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
343  static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
344  }
345 
346  fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
347  for (i = 0; i < DIST_CODE_LEN; i++) {
348  fprintf(header, "%2u%s", _dist_code[i],
349  SEPARATOR(i, DIST_CODE_LEN-1, 20));
350  }
351 
352  fprintf(header,
353  "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
354  for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
355  fprintf(header, "%2u%s", _length_code[i],
356  SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
357  }
358 
359  fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
360  for (i = 0; i < LENGTH_CODES; i++) {
361  fprintf(header, "%1u%s", base_length[i],
362  SEPARATOR(i, LENGTH_CODES-1, 20));
363  }
364 
365  fprintf(header, "local const int base_dist[D_CODES] = {\n");
366  for (i = 0; i < D_CODES; i++) {
367  fprintf(header, "%5u%s", base_dist[i],
368  SEPARATOR(i, D_CODES-1, 10));
369  }
370 
371  fclose(header);
372 }
373 #endif /* GEN_TREES_H */
374 
375 /* ===========================================================================
376  * Initialize the tree data structures for a new zlib stream.
377  */
379  deflate_state *s;
380 {
381  tr_static_init();
382 
383  s->l_desc.dyn_tree = s->dyn_ltree;
384  s->l_desc.stat_desc = &static_l_desc;
385 
386  s->d_desc.dyn_tree = s->dyn_dtree;
387  s->d_desc.stat_desc = &static_d_desc;
388 
389  s->bl_desc.dyn_tree = s->bl_tree;
390  s->bl_desc.stat_desc = &static_bl_desc;
391 
392  s->bi_buf = 0;
393  s->bi_valid = 0;
394 #ifdef ZLIB_DEBUG
395  s->compressed_len = 0L;
396  s->bits_sent = 0L;
397 #endif
398 
399  /* Initialize the first block of the first file: */
400  init_block(s);
401 }
402 
403 /* ===========================================================================
404  * Initialize a new block.
405  */
407  deflate_state *s;
408 {
409  int n; /* iterates over tree elements */
410 
411  /* Initialize the trees. */
412  for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
413  for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
414  for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
415 
416  s->dyn_ltree[END_BLOCK].Freq = 1;
417  s->opt_len = s->static_len = 0L;
418  s->last_lit = s->matches = 0;
419 }
420 
421 #define SMALLEST 1
422 /* Index within the heap array of least frequent node in the Huffman tree */
423 
424 
425 /* ===========================================================================
426  * Remove the smallest element from the heap and recreate the heap with
427  * one less element. Updates heap and heap_len.
428  */
429 #define pqremove(s, tree, top) \
430 {\
431  top = s->heap[SMALLEST]; \
432  s->heap[SMALLEST] = s->heap[s->heap_len--]; \
433  pqdownheap(s, tree, SMALLEST); \
434 }
435 
436 /* ===========================================================================
437  * Compares to subtrees, using the tree depth as tie breaker when
438  * the subtrees have equal frequency. This minimizes the worst case length.
439  */
440 #define smaller(tree, n, m, depth) \
441  (tree[n].Freq < tree[m].Freq || \
442  (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
443 
444 /* ===========================================================================
445  * Restore the heap property by moving down the tree starting at node k,
446  * exchanging a node with the smallest of its two sons if necessary, stopping
447  * when the heap property is re-established (each father smaller than its
448  * two sons).
449  */
450 local void pqdownheap(s, tree, k)
451  deflate_state *s;
452  ct_data *tree; /* the tree to restore */
453  int k; /* node to move down */
454 {
455  int v = s->heap[k];
456  int j = k << 1; /* left son of k */
457  while (j <= s->heap_len) {
458  /* Set j to the smallest of the two sons: */
459  if (j < s->heap_len &&
460  smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
461  j++;
462  }
463  /* Exit if v is smaller than both sons */
464  if (smaller(tree, v, s->heap[j], s->depth)) break;
465 
466  /* Exchange v with the smallest son */
467  s->heap[k] = s->heap[j]; k = j;
468 
469  /* And continue down the tree, setting j to the left son of k */
470  j <<= 1;
471  }
472  s->heap[k] = v;
473 }
474 
475 /* ===========================================================================
476  * Compute the optimal bit lengths for a tree and update the total bit length
477  * for the current block.
478  * IN assertion: the fields freq and dad are set, heap[heap_max] and
479  * above are the tree nodes sorted by increasing frequency.
480  * OUT assertions: the field len is set to the optimal bit length, the
481  * array bl_count contains the frequencies for each bit length.
482  * The length opt_len is updated; static_len is also updated if stree is
483  * not null.
484  */
485 local void gen_bitlen(s, desc)
486  deflate_state *s;
487  tree_desc *desc; /* the tree descriptor */
488 {
489  ct_data *tree = desc->dyn_tree;
490  int max_code = desc->max_code;
491  const ct_data *stree = desc->stat_desc->static_tree;
492  const intf *extra = desc->stat_desc->extra_bits;
493  int base = desc->stat_desc->extra_base;
494  int max_length = desc->stat_desc->max_length;
495  int h; /* heap index */
496  int n, m; /* iterate over the tree elements */
497  int bits; /* bit length */
498  int xbits; /* extra bits */
499  ush f; /* frequency */
500  int overflow = 0; /* number of elements with bit length too large */
501 
502  for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
503 
504  /* In a first pass, compute the optimal bit lengths (which may
505  * overflow in the case of the bit length tree).
506  */
507  tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
508 
509  for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
510  n = s->heap[h];
511  bits = tree[tree[n].Dad].Len + 1;
512  if (bits > max_length) bits = max_length, overflow++;
513  tree[n].Len = (ush)bits;
514  /* We overwrite tree[n].Dad which is no longer needed */
515 
516  if (n > max_code) continue; /* not a leaf node */
517 
518  s->bl_count[bits]++;
519  xbits = 0;
520  if (n >= base) xbits = extra[n-base];
521  f = tree[n].Freq;
522  s->opt_len += (ulg)f * (unsigned)(bits + xbits);
523  if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
524  }
525  if (overflow == 0) return;
526 
527  Tracev((stderr,"\nbit length overflow\n"));
528  /* This happens for example on obj2 and pic of the Calgary corpus */
529 
530  /* Find the first bit length which could increase: */
531  do {
532  bits = max_length-1;
533  while (s->bl_count[bits] == 0) bits--;
534  s->bl_count[bits]--; /* move one leaf down the tree */
535  s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
536  s->bl_count[max_length]--;
537  /* The brother of the overflow item also moves one step up,
538  * but this does not affect bl_count[max_length]
539  */
540  overflow -= 2;
541  } while (overflow > 0);
542 
543  /* Now recompute all bit lengths, scanning in increasing frequency.
544  * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
545  * lengths instead of fixing only the wrong ones. This idea is taken
546  * from 'ar' written by Haruhiko Okumura.)
547  */
548  for (bits = max_length; bits != 0; bits--) {
549  n = s->bl_count[bits];
550  while (n != 0) {
551  m = s->heap[--h];
552  if (m > max_code) continue;
553  if ((unsigned) tree[m].Len != (unsigned) bits) {
554  Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
555  s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
556  tree[m].Len = (ush)bits;
557  }
558  n--;
559  }
560  }
561 }
562 
563 /* ===========================================================================
564  * Generate the codes for a given tree and bit counts (which need not be
565  * optimal).
566  * IN assertion: the array bl_count contains the bit length statistics for
567  * the given tree and the field len is set for all tree elements.
568  * OUT assertion: the field code is set for all tree elements of non
569  * zero code length.
570  */
571 local void gen_codes (tree, max_code, bl_count)
572  ct_data *tree; /* the tree to decorate */
573  int max_code; /* largest code with non zero frequency */
574  ushf *bl_count; /* number of codes at each bit length */
575 {
576  ush next_code[MAX_BITS+1]; /* next code value for each bit length */
577  unsigned code = 0; /* running code value */
578  int bits; /* bit index */
579  int n; /* code index */
580 
581  /* The distribution counts are first used to generate the code values
582  * without bit reversal.
583  */
584  for (bits = 1; bits <= MAX_BITS; bits++) {
585  code = (code + bl_count[bits-1]) << 1;
586  next_code[bits] = (ush)code;
587  }
588  /* Check that the bit counts in bl_count are consistent. The last code
589  * must be all ones.
590  */
591  Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
592  "inconsistent bit counts");
593  Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
594 
595  for (n = 0; n <= max_code; n++) {
596  int len = tree[n].Len;
597  if (len == 0) continue;
598  /* Now reverse the bits */
599  tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
600 
601  Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
602  n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
603  }
604 }
605 
606 /* ===========================================================================
607  * Construct one Huffman tree and assigns the code bit strings and lengths.
608  * Update the total bit length for the current block.
609  * IN assertion: the field freq is set for all tree elements.
610  * OUT assertions: the fields len and code are set to the optimal bit length
611  * and corresponding code. The length opt_len is updated; static_len is
612  * also updated if stree is not null. The field max_code is set.
613  */
614 local void build_tree(s, desc)
615  deflate_state *s;
616  tree_desc *desc; /* the tree descriptor */
617 {
618  ct_data *tree = desc->dyn_tree;
619  const ct_data *stree = desc->stat_desc->static_tree;
620  int elems = desc->stat_desc->elems;
621  int n, m; /* iterate over heap elements */
622  int max_code = -1; /* largest code with non zero frequency */
623  int node; /* new node being created */
624 
625  /* Construct the initial heap, with least frequent element in
626  * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
627  * heap[0] is not used.
628  */
629  s->heap_len = 0, s->heap_max = HEAP_SIZE;
630 
631  for (n = 0; n < elems; n++) {
632  if (tree[n].Freq != 0) {
633  s->heap[++(s->heap_len)] = max_code = n;
634  s->depth[n] = 0;
635  } else {
636  tree[n].Len = 0;
637  }
638  }
639 
640  /* The pkzip format requires that at least one distance code exists,
641  * and that at least one bit should be sent even if there is only one
642  * possible code. So to avoid special checks later on we force at least
643  * two codes of non zero frequency.
644  */
645  while (s->heap_len < 2) {
646  node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
647  tree[node].Freq = 1;
648  s->depth[node] = 0;
649  s->opt_len--; if (stree) s->static_len -= stree[node].Len;
650  /* node is 0 or 1 so it does not have extra bits */
651  }
652  desc->max_code = max_code;
653 
654  /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
655  * establish sub-heaps of increasing lengths:
656  */
657  for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
658 
659  /* Construct the Huffman tree by repeatedly combining the least two
660  * frequent nodes.
661  */
662  node = elems; /* next internal node of the tree */
663  do {
664  pqremove(s, tree, n); /* n = node of least frequency */
665  m = s->heap[SMALLEST]; /* m = node of next least frequency */
666 
667  s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
668  s->heap[--(s->heap_max)] = m;
669 
670  /* Create a new node father of n and m */
671  tree[node].Freq = tree[n].Freq + tree[m].Freq;
672  s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
673  s->depth[n] : s->depth[m]) + 1);
674  tree[n].Dad = tree[m].Dad = (ush)node;
675 #ifdef DUMP_BL_TREE
676  if (tree == s->bl_tree) {
677  fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
678  node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
679  }
680 #endif
681  /* and insert the new node in the heap */
682  s->heap[SMALLEST] = node++;
683  pqdownheap(s, tree, SMALLEST);
684 
685  } while (s->heap_len >= 2);
686 
687  s->heap[--(s->heap_max)] = s->heap[SMALLEST];
688 
689  /* At this point, the fields freq and dad are set. We can now
690  * generate the bit lengths.
691  */
692  gen_bitlen(s, (tree_desc *)desc);
693 
694  /* The field len is now set, we can generate the bit codes */
695  gen_codes ((ct_data *)tree, max_code, s->bl_count);
696 }
697 
698 /* ===========================================================================
699  * Scan a literal or distance tree to determine the frequencies of the codes
700  * in the bit length tree.
701  */
702 local void scan_tree (s, tree, max_code)
703  deflate_state *s;
704  ct_data *tree; /* the tree to be scanned */
705  int max_code; /* and its largest code of non zero frequency */
706 {
707  int n; /* iterates over all tree elements */
708  int prevlen = -1; /* last emitted length */
709  int curlen; /* length of current code */
710  int nextlen = tree[0].Len; /* length of next code */
711  int count = 0; /* repeat count of the current code */
712  int max_count = 7; /* max repeat count */
713  int min_count = 4; /* min repeat count */
714 
715  if (nextlen == 0) max_count = 138, min_count = 3;
716  tree[max_code+1].Len = (ush)0xffff; /* guard */
717 
718  for (n = 0; n <= max_code; n++) {
719  curlen = nextlen; nextlen = tree[n+1].Len;
720  if (++count < max_count && curlen == nextlen) {
721  continue;
722  } else if (count < min_count) {
723  s->bl_tree[curlen].Freq += count;
724  } else if (curlen != 0) {
725  if (curlen != prevlen) s->bl_tree[curlen].Freq++;
726  s->bl_tree[REP_3_6].Freq++;
727  } else if (count <= 10) {
728  s->bl_tree[REPZ_3_10].Freq++;
729  } else {
730  s->bl_tree[REPZ_11_138].Freq++;
731  }
732  count = 0; prevlen = curlen;
733  if (nextlen == 0) {
734  max_count = 138, min_count = 3;
735  } else if (curlen == nextlen) {
736  max_count = 6, min_count = 3;
737  } else {
738  max_count = 7, min_count = 4;
739  }
740  }
741 }
742 
743 /* ===========================================================================
744  * Send a literal or distance tree in compressed form, using the codes in
745  * bl_tree.
746  */
747 local void send_tree (s, tree, max_code)
748  deflate_state *s;
749  ct_data *tree; /* the tree to be scanned */
750  int max_code; /* and its largest code of non zero frequency */
751 {
752  int n; /* iterates over all tree elements */
753  int prevlen = -1; /* last emitted length */
754  int curlen; /* length of current code */
755  int nextlen = tree[0].Len; /* length of next code */
756  int count = 0; /* repeat count of the current code */
757  int max_count = 7; /* max repeat count */
758  int min_count = 4; /* min repeat count */
759 
760  /* tree[max_code+1].Len = -1; */ /* guard already set */
761  if (nextlen == 0) max_count = 138, min_count = 3;
762 
763  for (n = 0; n <= max_code; n++) {
764  curlen = nextlen; nextlen = tree[n+1].Len;
765  if (++count < max_count && curlen == nextlen) {
766  continue;
767  } else if (count < min_count) {
768  do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
769 
770  } else if (curlen != 0) {
771  if (curlen != prevlen) {
772  send_code(s, curlen, s->bl_tree); count--;
773  }
774  Assert(count >= 3 && count <= 6, " 3_6?");
775  send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
776 
777  } else if (count <= 10) {
778  send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
779 
780  } else {
781  send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
782  }
783  count = 0; prevlen = curlen;
784  if (nextlen == 0) {
785  max_count = 138, min_count = 3;
786  } else if (curlen == nextlen) {
787  max_count = 6, min_count = 3;
788  } else {
789  max_count = 7, min_count = 4;
790  }
791  }
792 }
793 
794 /* ===========================================================================
795  * Construct the Huffman tree for the bit lengths and return the index in
796  * bl_order of the last bit length code to send.
797  */
799  deflate_state *s;
800 {
801  int max_blindex; /* index of last bit length code of non zero freq */
802 
803  /* Determine the bit length frequencies for literal and distance trees */
804  scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
805  scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
806 
807  /* Build the bit length tree: */
808  build_tree(s, (tree_desc *)(&(s->bl_desc)));
809  /* opt_len now includes the length of the tree representations, except
810  * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
811  */
812 
813  /* Determine the number of bit length codes to send. The pkzip format
814  * requires that at least 4 bit length codes be sent. (appnote.txt says
815  * 3 but the actual value used is 4.)
816  */
817  for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
818  if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
819  }
820  /* Update opt_len to include the bit length tree and counts */
821  s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
822  Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
823  s->opt_len, s->static_len));
824 
825  return max_blindex;
826 }
827 
828 /* ===========================================================================
829  * Send the header for a block using dynamic Huffman trees: the counts, the
830  * lengths of the bit length codes, the literal tree and the distance tree.
831  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
832  */
833 local void send_all_trees(s, lcodes, dcodes, blcodes)
834  deflate_state *s;
835  int lcodes, dcodes, blcodes; /* number of codes for each tree */
836 {
837  int rank; /* index in bl_order */
838 
839  Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
840  Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
841  "too many codes");
842  Tracev((stderr, "\nbl counts: "));
843  send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
844  send_bits(s, dcodes-1, 5);
845  send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
846  for (rank = 0; rank < blcodes; rank++) {
847  Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
848  send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
849  }
850  Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
851 
852  send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
853  Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
854 
855  send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
856  Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
857 }
858 
859 /* ===========================================================================
860  * Send a stored block
861  */
862 void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
863  deflate_state *s;
864  charf *buf; /* input block */
865  ulg stored_len; /* length of input block */
866  int last; /* one if this is the last block for a file */
867 {
868  send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
869  bi_windup(s); /* align on byte boundary */
870  put_short(s, (ush)stored_len);
871  put_short(s, (ush)~stored_len);
872  zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
873  s->pending += stored_len;
874 #ifdef ZLIB_DEBUG
875  s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876  s->compressed_len += (stored_len + 4) << 3;
877  s->bits_sent += 2*16;
878  s->bits_sent += stored_len<<3;
879 #endif
880 }
881 
882 /* ===========================================================================
883  * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
884  */
886  deflate_state *s;
887 {
888  bi_flush(s);
889 }
890 
891 /* ===========================================================================
892  * Send one empty static block to give enough lookahead for inflate.
893  * This takes 10 bits, of which 7 may remain in the bit buffer.
894  */
896  deflate_state *s;
897 {
898  send_bits(s, STATIC_TREES<<1, 3);
900 #ifdef ZLIB_DEBUG
901  s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
902 #endif
903  bi_flush(s);
904 }
905 
906 /* ===========================================================================
907  * Determine the best encoding for the current block: dynamic trees, static
908  * trees or store, and write out the encoded block.
909  */
910 void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
911  deflate_state *s;
912  charf *buf; /* input block, or NULL if too old */
913  ulg stored_len; /* length of input block */
914  int last; /* one if this is the last block for a file */
915 {
916  ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
917  int max_blindex = 0; /* index of last bit length code of non zero freq */
918 
919  /* Build the Huffman trees unless a stored block is forced */
920  if (s->level > 0) {
921 
922  /* Check if the file is binary or text */
923  if (s->strm->data_type == Z_UNKNOWN)
924  s->strm->data_type = detect_data_type(s);
925 
926  /* Construct the literal and distance trees */
927  build_tree(s, (tree_desc *)(&(s->l_desc)));
928  Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
929  s->static_len));
930 
931  build_tree(s, (tree_desc *)(&(s->d_desc)));
932  Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
933  s->static_len));
934  /* At this point, opt_len and static_len are the total bit lengths of
935  * the compressed block data, excluding the tree representations.
936  */
937 
938  /* Build the bit length tree for the above two trees, and get the index
939  * in bl_order of the last bit length code to send.
940  */
941  max_blindex = build_bl_tree(s);
942 
943  /* Determine the best encoding. Compute the block lengths in bytes. */
944  opt_lenb = (s->opt_len+3+7)>>3;
945  static_lenb = (s->static_len+3+7)>>3;
946 
947  Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
948  opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
949  s->last_lit));
950 
951  if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
952 
953  } else {
954  Assert(buf != (char*)0, "lost buf");
955  opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
956  }
957 
958 #ifdef FORCE_STORED
959  if (buf != (char*)0) { /* force stored block */
960 #else
961  if (stored_len+4 <= opt_lenb && buf != (char*)0) {
962  /* 4: two words for the lengths */
963 #endif
964  /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
965  * Otherwise we can't have processed more than WSIZE input bytes since
966  * the last block flush, because compression would have been
967  * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
968  * transform a block into a stored block.
969  */
970  _tr_stored_block(s, buf, stored_len, last);
971 
972 #ifdef FORCE_STATIC
973  } else if (static_lenb >= 0) { /* force static trees */
974 #else
975  } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
976 #endif
977  send_bits(s, (STATIC_TREES<<1)+last, 3);
978  compress_block(s, (const ct_data *)static_ltree,
979  (const ct_data *)static_dtree);
980 #ifdef ZLIB_DEBUG
981  s->compressed_len += 3 + s->static_len;
982 #endif
983  } else {
984  send_bits(s, (DYN_TREES<<1)+last, 3);
985  send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
986  max_blindex+1);
987  compress_block(s, (const ct_data *)s->dyn_ltree,
988  (const ct_data *)s->dyn_dtree);
989 #ifdef ZLIB_DEBUG
990  s->compressed_len += 3 + s->opt_len;
991 #endif
992  }
993  Assert (s->compressed_len == s->bits_sent, "bad compressed size");
994  /* The above check is made mod 2^32, for files larger than 512 MB
995  * and uLong implemented on 32 bits.
996  */
997  init_block(s);
998 
999  if (last) {
1000  bi_windup(s);
1001 #ifdef ZLIB_DEBUG
1002  s->compressed_len += 7; /* align on byte boundary */
1003 #endif
1004  }
1005  Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1006  s->compressed_len-7*last));
1007 }
1008 
1009 /* ===========================================================================
1010  * Save the match info and tally the frequency counts. Return true if
1011  * the current block must be flushed.
1012  */
1014  deflate_state *s;
1015  unsigned dist; /* distance of matched string */
1016  unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1017 {
1018  s->d_buf[s->last_lit] = (ush)dist;
1019  s->l_buf[s->last_lit++] = (uch)lc;
1020  if (dist == 0) {
1021  /* lc is the unmatched char */
1022  s->dyn_ltree[lc].Freq++;
1023  } else {
1024  s->matches++;
1025  /* Here, lc is the match length - MIN_MATCH */
1026  dist--; /* dist = match distance - 1 */
1027  Assert((ush)dist < (ush)MAX_DIST(s) &&
1028  (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1029  (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
1030 
1031  s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1032  s->dyn_dtree[d_code(dist)].Freq++;
1033  }
1034 
1035 #ifdef TRUNCATE_BLOCK
1036  /* Try to guess if it is profitable to stop the current block here */
1037  if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1038  /* Compute an upper bound for the compressed length */
1039  ulg out_length = (ulg)s->last_lit*8L;
1040  ulg in_length = (ulg)((long)s->strstart - s->block_start);
1041  int dcode;
1042  for (dcode = 0; dcode < D_CODES; dcode++) {
1043  out_length += (ulg)s->dyn_dtree[dcode].Freq *
1044  (5L+extra_dbits[dcode]);
1045  }
1046  out_length >>= 3;
1047  Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1048  s->last_lit, in_length, out_length,
1049  100L - out_length*100L/in_length));
1050  if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1051  }
1052 #endif
1053  return (s->last_lit == s->lit_bufsize-1);
1054  /* We avoid equality with lit_bufsize because of wraparound at 64K
1055  * on 16 bit machines and because stored blocks are restricted to
1056  * 64K-1 bytes.
1057  */
1058 }
1059 
1060 /* ===========================================================================
1061  * Send the block data compressed using the given Huffman trees
1062  */
1063 local void compress_block(s, ltree, dtree)
1064  deflate_state *s;
1065  const ct_data *ltree; /* literal tree */
1066  const ct_data *dtree; /* distance tree */
1067 {
1068  unsigned dist; /* distance of matched string */
1069  int lc; /* match length or unmatched char (if dist == 0) */
1070  unsigned lx = 0; /* running index in l_buf */
1071  unsigned code; /* the code to send */
1072  int extra; /* number of extra bits to send */
1073 
1074  if (s->last_lit != 0) do {
1075  dist = s->d_buf[lx];
1076  lc = s->l_buf[lx++];
1077  if (dist == 0) {
1078  send_code(s, lc, ltree); /* send a literal byte */
1079  Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1080  } else {
1081  /* Here, lc is the match length - MIN_MATCH */
1082  code = _length_code[lc];
1083  send_code(s, code+LITERALS+1, ltree); /* send the length code */
1084  extra = extra_lbits[code];
1085  if (extra != 0) {
1086  lc -= base_length[code];
1087  send_bits(s, lc, extra); /* send the extra length bits */
1088  }
1089  dist--; /* dist is now the match distance - 1 */
1090  code = d_code(dist);
1091  Assert (code < D_CODES, "bad d_code");
1092 
1093  send_code(s, code, dtree); /* send the distance code */
1094  extra = extra_dbits[code];
1095  if (extra != 0) {
1096  dist -= (unsigned)base_dist[code];
1097  send_bits(s, dist, extra); /* send the extra distance bits */
1098  }
1099  } /* literal or match pair ? */
1100 
1101  /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1102  Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
1103  "pendingBuf overflow");
1104 
1105  } while (lx < s->last_lit);
1106 
1107  send_code(s, END_BLOCK, ltree);
1108 }
1109 
1110 /* ===========================================================================
1111  * Check if the data type is TEXT or BINARY, using the following algorithm:
1112  * - TEXT if the two conditions below are satisfied:
1113  * a) There are no non-portable control characters belonging to the
1114  * "black list" (0..6, 14..25, 28..31).
1115  * b) There is at least one printable character belonging to the
1116  * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1117  * - BINARY otherwise.
1118  * - The following partially-portable control characters form a
1119  * "gray list" that is ignored in this detection algorithm:
1120  * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1121  * IN assertion: the fields Freq of dyn_ltree are set.
1122  */
1124  deflate_state *s;
1125 {
1126  /* black_mask is the bit mask of black-listed bytes
1127  * set bits 0..6, 14..25, and 28..31
1128  * 0xf3ffc07f = binary 11110011111111111100000001111111
1129  */
1130  unsigned long black_mask = 0xf3ffc07fUL;
1131  int n;
1132 
1133  /* Check for non-textual ("black-listed") bytes. */
1134  for (n = 0; n <= 31; n++, black_mask >>= 1)
1135  if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
1136  return Z_BINARY;
1137 
1138  /* Check for textual ("white-listed") bytes. */
1139  if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
1140  || s->dyn_ltree[13].Freq != 0)
1141  return Z_TEXT;
1142  for (n = 32; n < LITERALS; n++)
1143  if (s->dyn_ltree[n].Freq != 0)
1144  return Z_TEXT;
1145 
1146  /* There are no "black-listed" or "white-listed" bytes:
1147  * this stream either is empty or has tolerated ("gray-listed") bytes only.
1148  */
1149  return Z_BINARY;
1150 }
1151 
1152 /* ===========================================================================
1153  * Reverse the first len bits of a code, using straightforward code (a faster
1154  * method would use a table)
1155  * IN assertion: 1 <= len <= 15
1156  */
1158  unsigned code; /* the value to invert */
1159  int len; /* its bit length */
1160 {
1161  register unsigned res = 0;
1162  do {
1163  res |= code & 1;
1164  code >>= 1, res <<= 1;
1165  } while (--len > 0);
1166  return res >> 1;
1167 }
1168 
1169 /* ===========================================================================
1170  * Flush the bit buffer, keeping at most 7 bits in it.
1171  */
1173  deflate_state *s;
1174 {
1175  if (s->bi_valid == 16) {
1176  put_short(s, s->bi_buf);
1177  s->bi_buf = 0;
1178  s->bi_valid = 0;
1179  } else if (s->bi_valid >= 8) {
1180  put_byte(s, (Byte)s->bi_buf);
1181  s->bi_buf >>= 8;
1182  s->bi_valid -= 8;
1183  }
1184 }
1185 
1186 /* ===========================================================================
1187  * Flush the bit buffer and align the output on a byte boundary
1188  */
1190  deflate_state *s;
1191 {
1192  if (s->bi_valid > 8) {
1193  put_short(s, s->bi_buf);
1194  } else if (s->bi_valid > 0) {
1195  put_byte(s, (Byte)s->bi_buf);
1196  }
1197  s->bi_buf = 0;
1198  s->bi_valid = 0;
1199 #ifdef ZLIB_DEBUG
1200  s->bits_sent = (s->bits_sent+7) & ~7;
1201 #endif
1202 }