NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
zfio.c
Go to the documentation of this file.
1 /*
2 Copyright (c) 2009-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "io_lib_config.h"
33 #endif
34 
35 #include <stdlib.h>
36 #include <unistd.h>
37 
38 #include "cram/os.h"
39 #include "cram/zfio.h"
40 
41 /* ------------------------------------------------------------------------ */
42 /* Some wrappers around FILE * vs gzFile *, allowing for either */
43 
44 /*
45  * gzopen() works on both compressed and uncompressed data, but it has
46  * a significant performance hit even for uncompressed data (tested as
47  * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped).
48  *
49  * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed
50  * and gzFile* when compressed. This also means we could hide bzopen in
51  * there too if desired.
52  */
53 
54 off_t zftello(zfp *zf) {
55  return zf->fp ? ftello(zf->fp) : -1;
56 }
57 
58 int zfseeko(zfp *zf, off_t offset, int whence) {
59  return zf->fp ? fseeko(zf->fp, offset, whence) : -1;
60 }
61 
62 
63 /*
64  * A wrapper for either fgets or gzgets depending on what has been
65  * opened.
66  */
67 char *zfgets(char *line, int size, zfp *zf) {
68  if (zf->fp)
69  return fgets(line, size, zf->fp);
70  else
71  return gzgets(zf->gz, line, size);
72 }
73 
74 /*
75  * A wrapper for either fputs or gzputs depending on what has been
76  * opened.
77  */
78 int zfputs(char *line, zfp *zf) {
79  if (zf->fp)
80  return fputs(line, zf->fp);
81  else
82  return gzputs(zf->gz, line) ? 0 : EOF;
83 }
84 
85 /*
86  * Peeks at and returns the next character without consuming it from the
87  * input. (Ie a combination of getc and ungetc).
88  */
89 int zfpeek(zfp *zf) {
90  int c;
91 
92  if (zf->fp) {
93  c = getc(zf->fp);
94  if (c != EOF)
95  ungetc(c, zf->fp);
96  } else {
97  c = gzgetc(zf->gz);
98  if (c != EOF)
99  gzungetc(c, zf->gz);
100  }
101 
102  return c;
103 }
104 
105 /* A replacement for either feof of gzeof */
106 int zfeof(zfp *zf) {
107  return zf->fp ? feof(zf->fp) : gzeof(zf->gz);
108 }
109 
110 /* A replacement for either fopen or gzopen */
111 zfp *zfopen(const char *path, const char *mode) {
112  char path2[1024];
113  zfp *zf;
114 
115  if (!(zf = (zfp *)malloc(sizeof(*zf))))
116  return NULL;
117  zf->fp = NULL;
118  zf->gz = NULL;
119 
120  /* Try normal fopen */
121  if (mode[0] != 'z' && mode[1] != 'z' &&
122  NULL != (zf->fp = fopen(path, mode))) {
123  unsigned char magic[2];
124  if (2 != fread(magic, 1, 2, zf->fp)) {
125  free(zf);
126  return NULL;
127  }
128  if (!(magic[0] == 0x1f &&
129  magic[1] == 0x8b)) {
130  fseeko(zf->fp, 0, SEEK_SET);
131  return zf;
132  }
133 
134  fclose(zf->fp);
135  zf->fp = NULL;
136  }
137 
138 #ifdef HAVE_POPEN
139  /*
140  * I've no idea why, by gzgets is VERY slow, maybe because it handles
141  * arbitrary seeks.
142  * popen to gzip -cd is 3 times faster though.
143  */
144  if (*mode == 'w') {
145  } else {
146  if (access(path, R_OK) == 0) {
147  sprintf(path2, "gzip -cd < %.*s", 1000, path);
148  if (NULL != (zf->fp = popen(path2, "r")))
149  return zf;
150  }
151 
152  sprintf(path2, "gzip -cd < %.*s.gz", 1000, path);
153  if (NULL != (zf->fp = popen(path2, "r")))
154  return zf;
155 
156  printf("Failed on %s\n", path);
157  } else {
158  sprintf(path2, "gzip > %.*s", 1000, path);
159  if (NULL != (zf->fp = popen(path2, "w")))
160  return zf;
161  }
162 
163  printf("Failed on %s\n", path);
164  }
165 #else
166  /* Gzopen instead */
167  if ((zf->gz = gzopen(path, mode)))
168  return zf;
169 
170  sprintf(path2, "%.*s.gz", 1020, path);
171  if ((zf->gz = gzopen(path2, mode)))
172  return zf;
173 #endif
174 
175  perror(path);
176 
177  free(zf);
178  return NULL;
179 }
180 
181 int zfclose(zfp *zf) {
182  int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz);
183  free(zf);
184  return r;
185 }