NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hfile.c
Go to the documentation of this file.
1 /* hfile.c -- buffered low-level input/output streams.
2 
3  Copyright (C) 2013-2014 Genome Research Ltd.
4 
5  Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notices and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <errno.h>
29 
30 #include "htslib/hfile.h"
31 #include "hfile_internal.h"
32 
33 /* hFILE fields are used as follows:
34 
35  char *buffer; // Pointer to the start of the I/O buffer
36  char *begin; // First not-yet-read character / unused position
37  char *end; // First unfilled/unfillable position
38  char *limit; // Pointer to the first position past the buffer
39 
40  const hFILE_backend *backend; // Methods to refill/flush I/O buffer
41 
42  off_t offset; // Offset within the stream of buffer position 0
43  int at_eof:1; // For reading, whether EOF has been seen
44  int has_errno; // Error number from the last failure on this stream
45 
46 For reading, begin is the first unread character in the buffer and end is the
47 first unfilled position:
48 
49  -----------ABCDEFGHIJKLMNO---------------
50  ^buffer ^begin ^end ^limit
51 
52 For writing, begin is the first unused position and end is unused so remains
53 equal to buffer:
54 
55  ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
56  ^buffer ^begin ^limit
57  ^end
58 
59 Thus if begin > end then there is a non-empty write buffer, if begin < end
60 then there is a non-empty read buffer, and if begin == end then both buffers
61 are empty. In all cases, the stream's file position indicator corresponds
62 to the position pointed to by begin. */
63 
64 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
65 {
66  hFILE *fp = (hFILE *) malloc(struct_size);
67  if (fp == NULL) goto error;
68 
69  if (capacity == 0) capacity = 32768;
70  // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
71  if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
72 
73  fp->buffer = (char *) malloc(capacity);
74  if (fp->buffer == NULL) goto error;
75 
76  fp->begin = fp->end = fp->buffer;
77  fp->limit = &fp->buffer[capacity];
78 
79  fp->offset = 0;
80  fp->at_eof = 0;
81  fp->has_errno = 0;
82  return fp;
83 
84 error:
85  hfile_destroy(fp);
86  return NULL;
87 }
88 
90 {
91  int save = errno;
92  if (fp) free(fp->buffer);
93  free(fp);
94  errno = save;
95 }
96 
97 static inline int writebuffer_is_nonempty(hFILE *fp)
98 {
99  return fp->begin > fp->end;
100 }
101 
102 /* Refills the read buffer from the backend (once, so may only partially
103  fill the buffer), returning the number of additional characters read
104  (which might be 0), or negative when an error occurred. */
105 static ssize_t refill_buffer(hFILE *fp)
106 {
107  ssize_t n;
108 
109  // Move any unread characters to the start of the buffer
110  if (fp->begin > fp->buffer) {
111  fp->offset += fp->begin - fp->buffer;
112  memmove(fp->buffer, fp->begin, fp->end - fp->begin);
113  fp->end = &fp->buffer[fp->end - fp->begin];
114  fp->begin = fp->buffer;
115  }
116 
117  // Read into the available buffer space at fp->[end,limit)
118  if (fp->at_eof || fp->end == fp->limit) n = 0;
119  else {
120  n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
121  if (n < 0) { fp->has_errno = errno; return n; }
122  else if (n == 0) fp->at_eof = 1;
123  }
124 
125  fp->end += n;
126  return n;
127 }
128 
129 /* Called only from hgetc(), when our buffer is empty. */
130 int hgetc2(hFILE *fp)
131 {
132  return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
133 }
134 
135 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
136 {
137  size_t n = fp->end - fp->begin;
138  while (n < nbytes) {
139  ssize_t ret = refill_buffer(fp);
140  if (ret < 0) return ret;
141  else if (ret == 0) break;
142  else n += ret;
143  }
144 
145  if (n > nbytes) n = nbytes;
146  memcpy(buffer, fp->begin, n);
147  return n;
148 }
149 
150 /* Called only from hread(); when called, our buffer is empty and nread bytes
151  have already been placed in the destination buffer. */
152 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
153 {
154  const size_t capacity = fp->limit - fp->buffer;
155  char *dest = (char *) destv;
156  dest += nread, nbytes -= nread;
157 
158  // Read large requests directly into the destination buffer
159  while (nbytes * 2 >= capacity && !fp->at_eof) {
160  ssize_t n = fp->backend->read(fp, dest, nbytes);
161  if (n < 0) { fp->has_errno = errno; return n; }
162  else if (n == 0) fp->at_eof = 1;
163  fp->offset += n;
164  dest += n, nbytes -= n;
165  nread += n;
166  }
167 
168  while (nbytes > 0 && !fp->at_eof) {
169  size_t n;
170  ssize_t ret = refill_buffer(fp);
171  if (ret < 0) return ret;
172 
173  n = fp->end - fp->begin;
174  if (n > nbytes) n = nbytes;
175  memcpy(dest, fp->begin, n);
176  fp->begin += n;
177  dest += n, nbytes -= n;
178  nread += n;
179  }
180 
181  return nread;
182 }
183 
184 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
185  returning 0 on success or negative if an error occurred. */
186 static ssize_t flush_buffer(hFILE *fp)
187 {
188  const char *buffer = fp->buffer;
189  while (buffer < fp->begin) {
190  ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
191  if (n < 0) { fp->has_errno = errno; return n; }
192  buffer += n;
193  fp->offset += n;
194  }
195 
196  fp->begin = fp->buffer; // Leave the buffer empty
197  return 0;
198 }
199 
200 int hflush(hFILE *fp)
201 {
202  if (flush_buffer(fp) < 0) return EOF;
203  if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
204  return 0;
205 }
206 
207 /* Called only from hputc(), when our buffer is already full. */
208 int hputc2(int c, hFILE *fp)
209 {
210  if (flush_buffer(fp) < 0) return EOF;
211  *(fp->begin++) = c;
212  return c;
213 }
214 
215 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
216  ncopied bytes from the source have already been copied to our buffer. */
217 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
218 {
219  const char *src = (const char *) srcv;
220  ssize_t ret;
221  const size_t capacity = fp->limit - fp->buffer;
222  size_t remaining = totalbytes - ncopied;
223  src += ncopied;
224 
225  ret = flush_buffer(fp);
226  if (ret < 0) return ret;
227 
228  // Write large blocks out directly from the source buffer
229  while (remaining * 2 >= capacity) {
230  ssize_t n = fp->backend->write(fp, src, remaining);
231  if (n < 0) { fp->has_errno = errno; return n; }
232  fp->offset += n;
233  src += n, remaining -= n;
234  }
235 
236  // Just buffer any remaining characters
237  memcpy(fp->begin, src, remaining);
238  fp->begin += remaining;
239 
240  return totalbytes;
241 }
242 
243 /* Called only from hputs(), when our buffer is already full. */
244 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
245 {
246  return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
247 }
248 
249 off_t hseek(hFILE *fp, off_t offset, int whence)
250 {
251  off_t pos;
252 
253  if (writebuffer_is_nonempty(fp)) {
254  int ret = flush_buffer(fp);
255  if (ret < 0) return ret;
256  }
257 
258  pos = fp->backend->seek(fp, offset, whence);
259  if (pos < 0) { fp->has_errno = errno; return pos; }
260 
261  // Seeking succeeded, so discard any non-empty read buffer
262  fp->begin = fp->end = fp->buffer;
263  fp->at_eof = 0;
264 
265  fp->offset = pos;
266  return pos;
267 }
268 
269 int hclose(hFILE *fp)
270 {
271  int err = fp->has_errno;
272 
273  if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
274  if (fp->backend->close(fp) < 0) err = errno;
275  hfile_destroy(fp);
276 
277  if (err) {
278  errno = err;
279  return EOF;
280  }
281  else return 0;
282 }
283 
285 {
286  int save = errno;
287  if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
288  hfile_destroy(fp);
289  errno = save;
290 }
291 
292 
293 /***************************
294  * File descriptor backend *
295  ***************************/
296 
297 #include <sys/socket.h>
298 #include <sys/stat.h>
299 #include <fcntl.h>
300 #include <unistd.h>
301 
302 #ifdef _WIN32
303 #define HAVE_CLOSESOCKET
304 #endif
305 
306 /* For Unix, it doesn't matter whether a file descriptor is a socket.
307  However Windows insists on send()/recv() and its own closesocket()
308  being used when fd happens to be a socket. */
309 
310 typedef struct {
312  int fd;
313  int is_socket:1;
314 } hFILE_fd;
315 
316 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
317 {
318  hFILE_fd *fp = (hFILE_fd *) fpv;
319  ssize_t n;
320  do {
321  n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
322  : read(fp->fd, buffer, nbytes);
323  } while (n < 0 && errno == EINTR);
324  return n;
325 }
326 
327 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
328 {
329  hFILE_fd *fp = (hFILE_fd *) fpv;
330  ssize_t n;
331  do {
332  n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
333  : write(fp->fd, buffer, nbytes);
334  } while (n < 0 && errno == EINTR);
335  return n;
336 }
337 
338 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
339 {
340  hFILE_fd *fp = (hFILE_fd *) fpv;
341  return lseek(fp->fd, offset, whence);
342 }
343 
344 static int fd_flush(hFILE *fpv)
345 {
346  hFILE_fd *fp = (hFILE_fd *) fpv;
347  int ret;
348  do {
349 #ifdef HAVE_FDATASYNC
350  ret = fdatasync(fp->fd);
351 #else
352  ret = fsync(fp->fd);
353 #endif
354  // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
355  // and operation-not-supported errors (Mac OS X)
356  if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
357  } while (ret < 0 && errno == EINTR);
358  return ret;
359 }
360 
361 static int fd_close(hFILE *fpv)
362 {
363  hFILE_fd *fp = (hFILE_fd *) fpv;
364  int ret;
365  do {
366 #ifdef HAVE_CLOSESOCKET
367  ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
368 #else
369  ret = close(fp->fd);
370 #endif
371  } while (ret < 0 && errno == EINTR);
372  return ret;
373 }
374 
375 static const struct hFILE_backend fd_backend =
376 {
377  fd_read, fd_write, fd_seek, fd_flush, fd_close
378 };
379 
380 static size_t blksize(int fd)
381 {
382  struct stat sbuf;
383  if (fstat(fd, &sbuf) != 0) return 0;
384  return sbuf.st_blksize;
385 }
386 
387 static hFILE *hopen_fd(const char *filename, const char *mode)
388 {
389  hFILE_fd *fp = NULL;
390  int fd = open(filename, hfile_oflags(mode), 0666);
391  if (fd < 0) goto error;
392 
393  fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
394  if (fp == NULL) goto error;
395 
396  fp->fd = fd;
397  fp->is_socket = 0;
398  fp->base.backend = &fd_backend;
399  return &fp->base;
400 
401 error:
402  if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
403  hfile_destroy((hFILE *) fp);
404  return NULL;
405 }
406 
407 hFILE *hdopen(int fd, const char *mode)
408 {
409  hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
410  if (fp == NULL) return NULL;
411 
412  fp->fd = fd;
413  fp->is_socket = (strchr(mode, 's') != NULL);
414  fp->base.backend = &fd_backend;
415  return &fp->base;
416 }
417 
418 static hFILE *hopen_fd_stdinout(const char *mode)
419 {
420  int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
421  // TODO Set binary mode (for Windows)
422  return hdopen(fd, mode);
423 }
424 
425 int hfile_oflags(const char *mode)
426 {
427  int rdwr = 0, flags = 0;
428  const char *s;
429  for (s = mode; *s; s++)
430  switch (*s) {
431  case 'r': rdwr = O_RDONLY; break;
432  case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
433  case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
434  case '+': rdwr = O_RDWR; break;
435  default: break;
436  }
437 
438 #ifdef O_BINARY
439  flags |= O_BINARY;
440 #endif
441 
442  return rdwr | flags;
443 }
444 
445 
446 /*********************
447  * In-memory backend *
448  *********************/
449 
450 typedef struct {
452  const char *buffer;
453  size_t length, pos;
454 } hFILE_mem;
455 
456 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
457 {
458  hFILE_mem *fp = (hFILE_mem *) fpv;
459  size_t avail = fp->length - fp->pos;
460  if (nbytes > avail) nbytes = avail;
461  memcpy(buffer, fp->buffer + fp->pos, nbytes);
462  fp->pos += nbytes;
463  return nbytes;
464 }
465 
466 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
467 {
468  hFILE_mem *fp = (hFILE_mem *) fpv;
469  size_t absoffset = (offset >= 0)? offset : -offset;
470  size_t origin;
471 
472  switch (whence) {
473  case SEEK_SET: origin = 0; break;
474  case SEEK_CUR: origin = fp->pos; break;
475  case SEEK_END: origin = fp->length; break;
476  default: errno = EINVAL; return -1;
477  }
478 
479  if ((offset < 0 && absoffset > origin) ||
480  (offset >= 0 && absoffset > fp->length - origin)) {
481  errno = EINVAL;
482  return -1;
483  }
484 
485  fp->pos = origin + offset;
486  return fp->pos;
487 }
488 
489 static int mem_close(hFILE *fpv)
490 {
491  return 0;
492 }
493 
494 static const struct hFILE_backend mem_backend =
495 {
496  mem_read, NULL, mem_seek, NULL, mem_close
497 };
498 
499 static hFILE *hopen_mem(const char *data, const char *mode)
500 {
501  // TODO Implement write modes, which will require memory allocation
502  if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
503 
504  hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
505  if (fp == NULL) return NULL;
506 
507  fp->buffer = data;
508  fp->length = strlen(data);
509  fp->pos = 0;
510  fp->base.backend = &mem_backend;
511  return &fp->base;
512 }
513 
514 
515 /******************************
516  * hopen() backend dispatcher *
517  ******************************/
518 
519 hFILE *hopen(const char *fname, const char *mode)
520 {
521  if (strncmp(fname, "http://", 7) == 0 ||
522  strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
523  else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
524  else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
525  else return hopen_fd(fname, mode);
526 }