NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gzread.c
Go to the documentation of this file.
1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "gzguts.h"
7 
8 /* Local functions */
9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10 local int gz_avail OF((gz_statep));
11 local int gz_look OF((gz_statep));
13 local int gz_fetch OF((gz_statep));
15 
16 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
17  state->fd, and update state->eof, state->err, and state->msg as appropriate.
18  This function needs to loop on read(), since read() is not guaranteed to
19  read the number of bytes requested, depending on the type of descriptor. */
20 local int gz_load(state, buf, len, have)
21  gz_statep state;
22  unsigned char *buf;
23  unsigned len;
24  unsigned *have;
25 {
26  int ret;
27 
28  *have = 0;
29  do {
30  ret = read(state->fd, buf + *have, len - *have);
31  if (ret <= 0)
32  break;
33  *have += ret;
34  } while (*have < len);
35  if (ret < 0) {
36  gz_error(state, Z_ERRNO, zstrerror());
37  return -1;
38  }
39  if (ret == 0)
40  state->eof = 1;
41  return 0;
42 }
43 
44 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
45  error, 0 otherwise. Note that the eof flag is set when the end of the input
46  file is reached, even though there may be unused data in the buffer. Once
47  that data has been used, no more attempts will be made to read the file.
48  If strm->avail_in != 0, then the current data is moved to the beginning of
49  the input buffer, and then the remainder of the buffer is loaded with the
50  available data from the input file. */
51 local int gz_avail(state)
52  gz_statep state;
53 {
54  unsigned got;
55  z_streamp strm = &(state->strm);
56 
57  if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58  return -1;
59  if (state->eof == 0) {
60  if (strm->avail_in) { /* copy what's there to the start */
61  unsigned char *p = state->in, *q = strm->next_in;
62  unsigned n = strm->avail_in;
63  do {
64  *p++ = *q++;
65  } while (--n);
66  }
67  if (gz_load(state, state->in + strm->avail_in,
68  state->size - strm->avail_in, &got) == -1)
69  return -1;
70  strm->avail_in += got;
71  strm->next_in = state->in;
72  }
73  return 0;
74 }
75 
76 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
77  If this is the first time in, allocate required memory. state->how will be
78  left unchanged if there is no more input data available, will be set to COPY
79  if there is no gzip header and direct copying will be performed, or it will
80  be set to GZIP for decompression. If direct copying, then leftover input
81  data from the input buffer will be copied to the output buffer. In that
82  case, all further file reads will be directly to either the output buffer or
83  a user buffer. If decompressing, the inflate state will be initialized.
84  gz_look() will return 0 on success or -1 on failure. */
85 local int gz_look(state)
86  gz_statep state;
87 {
88  z_streamp strm = &(state->strm);
89 
90  /* allocate read buffers and inflate memory */
91  if (state->size == 0) {
92  /* allocate buffers */
93  state->in = malloc(state->want);
94  state->out = malloc(state->want << 1);
95  if (state->in == NULL || state->out == NULL) {
96  if (state->out != NULL)
97  free(state->out);
98  if (state->in != NULL)
99  free(state->in);
100  gz_error(state, Z_MEM_ERROR, "out of memory");
101  return -1;
102  }
103  state->size = state->want;
104 
105  /* allocate inflate memory */
106  state->strm.zalloc = Z_NULL;
107  state->strm.zfree = Z_NULL;
108  state->strm.opaque = Z_NULL;
109  state->strm.avail_in = 0;
110  state->strm.next_in = Z_NULL;
111  if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
112  free(state->out);
113  free(state->in);
114  state->size = 0;
115  gz_error(state, Z_MEM_ERROR, "out of memory");
116  return -1;
117  }
118  }
119 
120  /* get at least the magic bytes in the input buffer */
121  if (strm->avail_in < 2) {
122  if (gz_avail(state) == -1)
123  return -1;
124  if (strm->avail_in == 0)
125  return 0;
126  }
127 
128  /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
129  a logical dilemma here when considering the case of a partially written
130  gzip file, to wit, if a single 31 byte is written, then we cannot tell
131  whether this is a single-byte file, or just a partially written gzip
132  file -- for here we assume that if a gzip file is being written, then
133  the header will be written in a single operation, so that reading a
134  single byte is sufficient indication that it is not a gzip file) */
135  if (strm->avail_in > 1 &&
136  strm->next_in[0] == 31 && strm->next_in[1] == 139) {
137  inflateReset(strm);
138  state->how = GZIP;
139  state->direct = 0;
140  return 0;
141  }
142 
143  /* no gzip header -- if we were decoding gzip before, then this is trailing
144  garbage. Ignore the trailing garbage and finish. */
145  if (state->direct == 0) {
146  strm->avail_in = 0;
147  state->eof = 1;
148  state->x.have = 0;
149  return 0;
150  }
151 
152  /* doing raw i/o, copy any leftover input to output -- this assumes that
153  the output buffer is larger than the input buffer, which also assures
154  space for gzungetc() */
155  state->x.next = state->out;
156  if (strm->avail_in) {
157  memcpy(state->x.next, strm->next_in, strm->avail_in);
158  state->x.have = strm->avail_in;
159  strm->avail_in = 0;
160  }
161  state->how = COPY;
162  state->direct = 1;
163  return 0;
164 }
165 
166 /* Decompress from input to the provided next_out and avail_out in the state.
167  On return, state->x.have and state->x.next point to the just decompressed
168  data. If the gzip stream completes, state->how is reset to LOOK to look for
169  the next gzip stream or raw data, once state->x.have is depleted. Returns 0
170  on success, -1 on failure. */
171 local int gz_decomp(state)
172  gz_statep state;
173 {
174  int ret = Z_OK;
175  unsigned had;
176  z_streamp strm = &(state->strm);
177 
178  /* fill output buffer up to end of deflate stream */
179  had = strm->avail_out;
180  do {
181  /* get more input for inflate() */
182  if (strm->avail_in == 0 && gz_avail(state) == -1)
183  return -1;
184  if (strm->avail_in == 0) {
185  gz_error(state, Z_BUF_ERROR, "unexpected end of file");
186  break;
187  }
188 
189  /* decompress and handle errors */
190  ret = inflate(strm, Z_NO_FLUSH);
191  if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
192  gz_error(state, Z_STREAM_ERROR,
193  "internal error: inflate stream corrupt");
194  return -1;
195  }
196  if (ret == Z_MEM_ERROR) {
197  gz_error(state, Z_MEM_ERROR, "out of memory");
198  return -1;
199  }
200  if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
201  gz_error(state, Z_DATA_ERROR,
202  strm->msg == NULL ? "compressed data error" : strm->msg);
203  return -1;
204  }
205  } while (strm->avail_out && ret != Z_STREAM_END);
206 
207  /* update available output */
208  state->x.have = had - strm->avail_out;
209  state->x.next = strm->next_out - state->x.have;
210 
211  /* if the gzip stream completed successfully, look for another */
212  if (ret == Z_STREAM_END)
213  state->how = LOOK;
214 
215  /* good decompression */
216  return 0;
217 }
218 
219 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
220  Data is either copied from the input file or decompressed from the input
221  file depending on state->how. If state->how is LOOK, then a gzip header is
222  looked for to determine whether to copy or decompress. Returns -1 on error,
223  otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
224  end of the input file has been reached and all data has been processed. */
225 local int gz_fetch(state)
226  gz_statep state;
227 {
228  z_streamp strm = &(state->strm);
229 
230  do {
231  switch(state->how) {
232  case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
233  if (gz_look(state) == -1)
234  return -1;
235  if (state->how == LOOK)
236  return 0;
237  break;
238  case COPY: /* -> COPY */
239  if (gz_load(state, state->out, state->size << 1, &(state->x.have))
240  == -1)
241  return -1;
242  state->x.next = state->out;
243  return 0;
244  case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
245  strm->avail_out = state->size << 1;
246  strm->next_out = state->out;
247  if (gz_decomp(state) == -1)
248  return -1;
249  }
250  } while (state->x.have == 0 && (!state->eof || strm->avail_in));
251  return 0;
252 }
253 
254 /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
255 local int gz_skip(state, len)
256  gz_statep state;
257  z_off64_t len;
258 {
259  unsigned n;
260 
261  /* skip over len bytes or reach end-of-file, whichever comes first */
262  while (len)
263  /* skip over whatever is in output buffer */
264  if (state->x.have) {
265  n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
266  (unsigned)len : state->x.have;
267  state->x.have -= n;
268  state->x.next += n;
269  state->x.pos += n;
270  len -= n;
271  }
272 
273  /* output buffer empty -- return if we're at the end of the input */
274  else if (state->eof && state->strm.avail_in == 0)
275  break;
276 
277  /* need more data to skip -- load up output buffer */
278  else {
279  /* get more output, looking for header if required */
280  if (gz_fetch(state) == -1)
281  return -1;
282  }
283  return 0;
284 }
285 
286 /* -- see zlib.h -- */
287 int ZEXPORT gzread(file, buf, len)
288  gzFile file;
289  voidp buf;
290  unsigned len;
291 {
292  unsigned got, n;
293  gz_statep state;
294  z_streamp strm;
295 
296  /* get internal structure */
297  if (file == NULL)
298  return -1;
299  state = (gz_statep)file;
300  strm = &(state->strm);
301 
302  /* check that we're reading and that there's no (serious) error */
303  if (state->mode != GZ_READ ||
304  (state->err != Z_OK && state->err != Z_BUF_ERROR))
305  return -1;
306 
307  /* since an int is returned, make sure len fits in one, otherwise return
308  with an error (this avoids the flaw in the interface) */
309  if ((int)len < 0) {
310  gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
311  return -1;
312  }
313 
314  /* if len is zero, avoid unnecessary operations */
315  if (len == 0)
316  return 0;
317 
318  /* process a skip request */
319  if (state->seek) {
320  state->seek = 0;
321  if (gz_skip(state, state->skip) == -1)
322  return -1;
323  }
324 
325  /* get len bytes to buf, or less than len if at the end */
326  got = 0;
327  do {
328  /* first just try copying data from the output buffer */
329  if (state->x.have) {
330  n = state->x.have > len ? len : state->x.have;
331  memcpy(buf, state->x.next, n);
332  state->x.next += n;
333  state->x.have -= n;
334  }
335 
336  /* output buffer empty -- return if we're at the end of the input */
337  else if (state->eof && strm->avail_in == 0) {
338  state->past = 1; /* tried to read past end */
339  break;
340  }
341 
342  /* need output data -- for small len or new stream load up our output
343  buffer */
344  else if (state->how == LOOK || len < (state->size << 1)) {
345  /* get more output, looking for header if required */
346  if (gz_fetch(state) == -1)
347  return -1;
348  continue; /* no progress yet -- go back to copy above */
349  /* the copy above assures that we will leave with space in the
350  output buffer, allowing at least one gzungetc() to succeed */
351  }
352 
353  /* large len -- read directly into user buffer */
354  else if (state->how == COPY) { /* read directly */
355  if (gz_load(state, buf, len, &n) == -1)
356  return -1;
357  }
358 
359  /* large len -- decompress directly into user buffer */
360  else { /* state->how == GZIP */
361  strm->avail_out = len;
362  strm->next_out = buf;
363  if (gz_decomp(state) == -1)
364  return -1;
365  n = state->x.have;
366  state->x.have = 0;
367  }
368 
369  /* update progress */
370  len -= n;
371  buf = (char *)buf + n;
372  got += n;
373  state->x.pos += n;
374  } while (len);
375 
376  /* return number of bytes read into user buffer (will fit in int) */
377  return (int)got;
378 }
379 
380 /* -- see zlib.h -- */
381 #undef gzgetc
382 int ZEXPORT gzgetc(file)
383  gzFile file;
384 {
385  int ret;
386  unsigned char buf[1];
387  gz_statep state;
388 
389  /* get internal structure */
390  if (file == NULL)
391  return -1;
392  state = (gz_statep)file;
393 
394  /* check that we're reading and that there's no (serious) error */
395  if (state->mode != GZ_READ ||
396  (state->err != Z_OK && state->err != Z_BUF_ERROR))
397  return -1;
398 
399  /* try output buffer (no need to check for skip request) */
400  if (state->x.have) {
401  state->x.have--;
402  state->x.pos++;
403  return *(state->x.next)++;
404  }
405 
406  /* nothing there -- try gzread() */
407  ret = gzread(file, buf, 1);
408  return ret < 1 ? -1 : buf[0];
409 }
410 
411 int ZEXPORT gzgetc_(file)
412 gzFile file;
413 {
414  return gzgetc(file);
415 }
416 
417 /* -- see zlib.h -- */
418 int ZEXPORT gzungetc(c, file)
419  int c;
420  gzFile file;
421 {
422  gz_statep state;
423 
424  /* get internal structure */
425  if (file == NULL)
426  return -1;
427  state = (gz_statep)file;
428 
429  /* check that we're reading and that there's no (serious) error */
430  if (state->mode != GZ_READ ||
431  (state->err != Z_OK && state->err != Z_BUF_ERROR))
432  return -1;
433 
434  /* process a skip request */
435  if (state->seek) {
436  state->seek = 0;
437  if (gz_skip(state, state->skip) == -1)
438  return -1;
439  }
440 
441  /* can't push EOF */
442  if (c < 0)
443  return -1;
444 
445  /* if output buffer empty, put byte at end (allows more pushing) */
446  if (state->x.have == 0) {
447  state->x.have = 1;
448  state->x.next = state->out + (state->size << 1) - 1;
449  state->x.next[0] = c;
450  state->x.pos--;
451  state->past = 0;
452  return c;
453  }
454 
455  /* if no room, give up (must have already done a gzungetc()) */
456  if (state->x.have == (state->size << 1)) {
457  gz_error(state, Z_DATA_ERROR, "out of room to push characters");
458  return -1;
459  }
460 
461  /* slide output data if needed and insert byte before existing data */
462  if (state->x.next == state->out) {
463  unsigned char *src = state->out + state->x.have;
464  unsigned char *dest = state->out + (state->size << 1);
465  while (src > state->out)
466  *--dest = *--src;
467  state->x.next = dest;
468  }
469  state->x.have++;
470  state->x.next--;
471  state->x.next[0] = c;
472  state->x.pos--;
473  state->past = 0;
474  return c;
475 }
476 
477 /* -- see zlib.h -- */
478 char * ZEXPORT gzgets(file, buf, len)
479  gzFile file;
480  char *buf;
481  int len;
482 {
483  unsigned left, n;
484  char *str;
485  unsigned char *eol;
486  gz_statep state;
487 
488  /* check parameters and get internal structure */
489  if (file == NULL || buf == NULL || len < 1)
490  return NULL;
491  state = (gz_statep)file;
492 
493  /* check that we're reading and that there's no (serious) error */
494  if (state->mode != GZ_READ ||
495  (state->err != Z_OK && state->err != Z_BUF_ERROR))
496  return NULL;
497 
498  /* process a skip request */
499  if (state->seek) {
500  state->seek = 0;
501  if (gz_skip(state, state->skip) == -1)
502  return NULL;
503  }
504 
505  /* copy output bytes up to new line or len - 1, whichever comes first --
506  append a terminating zero to the string (we don't check for a zero in
507  the contents, let the user worry about that) */
508  str = buf;
509  left = (unsigned)len - 1;
510  if (left) do {
511  /* assure that something is in the output buffer */
512  if (state->x.have == 0 && gz_fetch(state) == -1)
513  return NULL; /* error */
514  if (state->x.have == 0) { /* end of file */
515  state->past = 1; /* read past end */
516  break; /* return what we have */
517  }
518 
519  /* look for end-of-line in current output buffer */
520  n = state->x.have > left ? left : state->x.have;
521  eol = memchr(state->x.next, '\n', n);
522  if (eol != NULL)
523  n = (unsigned)(eol - state->x.next) + 1;
524 
525  /* copy through end-of-line, or remainder if not found */
526  memcpy(buf, state->x.next, n);
527  state->x.have -= n;
528  state->x.next += n;
529  state->x.pos += n;
530  left -= n;
531  buf += n;
532  } while (left && eol == NULL);
533 
534  /* return terminated string, or if nothing, end of file */
535  if (buf == str)
536  return NULL;
537  buf[0] = 0;
538  return str;
539 }
540 
541 /* -- see zlib.h -- */
542 int ZEXPORT gzdirect(file)
543  gzFile file;
544 {
545  gz_statep state;
546 
547  /* get internal structure */
548  if (file == NULL)
549  return 0;
550  state = (gz_statep)file;
551 
552  /* if the state is not known, but we can find out, then do so (this is
553  mainly for right after a gzopen() or gzdopen()) */
554  if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
555  (void)gz_look(state);
556 
557  /* return 1 if transparent, 0 if processing a gzip stream */
558  return state->direct;
559 }
560 
561 /* -- see zlib.h -- */
563  gzFile file;
564 {
565  int ret, err;
566  gz_statep state;
567 
568  /* get internal structure */
569  if (file == NULL)
570  return Z_STREAM_ERROR;
571  state = (gz_statep)file;
572 
573  /* check that we're reading */
574  if (state->mode != GZ_READ)
575  return Z_STREAM_ERROR;
576 
577  /* free memory and close file */
578  if (state->size) {
579  inflateEnd(&(state->strm));
580  free(state->out);
581  free(state->in);
582  }
583  err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
584  gz_error(state, Z_OK, NULL);
585  free(state->path);
586  ret = close(state->fd);
587  free(state);
588  return ret ? Z_ERRNO : err;
589 }