NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mFILE.c
Go to the documentation of this file.
1 /*
2 Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
31 
32 #ifdef HAVE_CONFIG_H
33 #include "io_lib_config.h"
34 #endif
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <stdarg.h>
45 
46 #include "cram/os.h"
47 #include "cram/mFILE.h"
48 #include "cram/vlen.h"
49 
50 /*
51  * This file contains memory-based versions of the most commonly used
52  * (by io_lib) stdio functions.
53  *
54  * Actual file IO takes place either on opening or closing an mFILE.
55  *
56  * Coupled to this are a bunch of rather scary macros which can be obtained
57  * by including stdio_hack.h. It is recommended though that you use mFILE.h
58  * instead and replace fopen with mfopen (etc). This is more or less
59  * mandatory if you wish to use both FILE and mFILE structs in a single file.
60  */
61 
62 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */
63 
64 /*
65  * Reads the entirety of fp into memory. If 'fn' exists it is the filename
66  * associated with fp. This will be used for more optimal reading (via a
67  * stat to identify the size and a single read). Otherwise we use successive
68  * reads until EOF.
69  *
70  * Returns a malloced buffer on success of length *size
71  * NULL on failure
72  */
73 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
74  struct stat sb;
75  char *data = NULL;
76  size_t allocated = 0, used = 0;
77  int bufsize = 8192;
78 
79 #ifdef _WIN32
80  if (binary)
81  _setmode(_fileno(fp), _O_BINARY);
82  else
83  _setmode(_fileno(fp), _O_TEXT);
84 #endif
85 
86  if (fn && -1 != stat(fn, &sb)) {
87  data = malloc(allocated = sb.st_size);
88  bufsize = sb.st_size;
89  } else {
90  fn = NULL;
91  }
92 
93  do {
94  size_t len;
95  if (used + bufsize > allocated) {
96  allocated += bufsize;
97  data = realloc(data, allocated);
98  }
99  len = fread(data + used, 1, allocated - used, fp);
100  if (len > 0)
101  used += len;
102  } while (!feof(fp) && (fn == NULL || used < sb.st_size));
103 
104  *size = used;
105 
106  return data;
107 }
108 
109 /*
110  * Creates and returns m_channel[0].
111  * We initialise this on the first attempted read, which then slurps in
112  * all of stdin until EOF is met.
113  */
114 mFILE *mstdin(void) {
115  if (m_channel[0])
116  return m_channel[0];
117 
118  m_channel[0] = mfcreate(NULL, 0);
119  if (NULL == m_channel[0]) return NULL;
120  m_channel[0]->fp = stdin;
121  return m_channel[0];
122 }
123 
124 static void init_mstdin(void) {
125  static int done_stdin = 0;
126  if (done_stdin)
127  return;
128 
129  m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
130  m_channel[0]->mode = MF_READ;
131  done_stdin = 1;
132 }
133 
134 /*
135  * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
136  * an empty buffer which is physically written out only when mfflush or
137  * mfclose are called.
138  */
139 mFILE *mstdout(void) {
140  if (m_channel[1])
141  return m_channel[1];
142 
143  m_channel[1] = mfcreate(NULL, 0);
144  if (NULL == m_channel[1]) return NULL;
145  m_channel[1]->fp = stdout;
146  m_channel[1]->mode = MF_WRITE;
147  return m_channel[1];
148 }
149 
150 /*
151  * Stderr as an mFILE.
152  * The code handles stderr by returning m_channel[2], but also checking
153  * for stderr in fprintf (the common usage of it) to auto-flush.
154  */
155 mFILE *mstderr(void) {
156  if (m_channel[2])
157  return m_channel[2];
158 
159  m_channel[2] = mfcreate(NULL, 0);
160  if (NULL == m_channel[2]) return NULL;
161  m_channel[2]->fp = stderr;
162  m_channel[2]->mode = MF_WRITE;
163  return m_channel[2];
164 }
165 
166 
167 /*
168  * For creating existing mFILE pointers directly from memory buffers.
169  */
170 mFILE *mfcreate(char *data, int size) {
171  mFILE *mf = (mFILE *)malloc(sizeof(*mf));
172  if (NULL == mf) return NULL;
173  mf->fp = NULL;
174  mf->data = data;
175  mf->alloced = size;
176  mf->size = size;
177  mf->eof = 0;
178  mf->offset = 0;
179  mf->flush_pos = 0;
180  mf->mode = MF_READ | MF_WRITE;
181  return mf;
182 }
183 
184 /*
185  * Recreate an existing mFILE to house new data/size.
186  * It also rewinds the file.
187  */
188 void mfrecreate(mFILE *mf, char *data, int size) {
189  if (mf->data)
190  free(mf->data);
191  mf->data = data;
192  mf->size = size;
193  mf->alloced = size;
194  mf->eof = 0;
195  mf->offset = 0;
196  mf->flush_pos = 0;
197 }
198 
199 
200 /*
201  * Creates a new mFILE to contain the contents of the FILE pointer.
202  * This mFILE is purely for in-memory operations and has no links to the
203  * original FILE* it came from. It also doesn't close the FILE pointer.
204  * Consider using mfreopen() is you need different behaviour.
205  *
206  * Returns mFILE * on success
207  * NULL on failure.
208  */
209 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
210  mFILE *mf;
211 
212  /* Open using mfreopen() */
213  if (NULL == (mf = mfreopen(path, mode_str, fp)))
214  return NULL;
215 
216  /* Disassociate from the input stream */
217  mf->fp = NULL;
218 
219  return mf;
220 }
221 
222 /*
223  * Converts a FILE * to an mFILE *.
224  * Use this for wrapper functions to turn external prototypes requring
225  * FILE * as an argument into internal code using mFILE *.
226  */
227 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
228  mFILE *mf;
229  int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
230 
231  /* Parse mode:
232  * r = read file contents (if truncated => don't read)
233  * w = write on close
234  * a = position at end of buffer
235  * x = position at same location as the original fp, don't seek on flush
236  */
237  if (strchr(mode_str, 'r'))
238  r = 1, mode |= MF_READ;
239  if (strchr(mode_str, 'w'))
240  w = 1, mode |= MF_WRITE | MF_TRUNC;
241  if (strchr(mode_str, 'a'))
242  w = a = 1, mode |= MF_WRITE | MF_APPEND;
243  if (strchr(mode_str, 'b'))
244  b = 1, mode |= MF_BINARY;
245  if (strchr(mode_str, 'x'))
246  x = 1;
247  if (strchr(mode_str, '+')) {
248  w = 1, mode |= MF_READ | MF_WRITE;
249  if (a)
250  r = 1;
251  }
252 
253  if (r) {
254  mf = mfcreate(NULL, 0);
255  if (NULL == mf) return NULL;
256  if (!(mode & MF_TRUNC)) {
257  mf->data = mfload(fp, path, &mf->size, b);
258  mf->alloced = mf->size;
259  if (!a)
260  fseek(fp, 0, SEEK_SET);
261  }
262  } else if (w) {
263  /* Write - initialise the data structures */
264  mf = mfcreate(NULL, 0);
265  if (NULL == mf) return NULL;
266  } else {
267  fprintf(stderr, "Must specify either r, w or a for mode\n");
268  return NULL;
269  }
270  mf->fp = fp;
271  mf->mode = mode;
272 
273  if (x) {
274  mf->mode |= MF_MODEX;
275  }
276 
277  if (a) {
278  mf->flush_pos = mf->size;
279  fseek(fp, 0, SEEK_END);
280  }
281 
282  return mf;
283 }
284 
285 /*
286  * Opens a file. If we have read access (r or a+) then it loads the entire
287  * file into memory. If We have write access then the pathname is stored.
288  * We do not actually write until an mfclose, which then checks this pathname.
289  */
290 mFILE *mfopen(const char *path, const char *mode) {
291  FILE *fp;
292 
293  if (NULL == (fp = fopen(path, mode)))
294  return NULL;
295  return mfreopen(path, mode, fp);
296 }
297 
298 /*
299  * Closes an mFILE. If the filename is known (implying write access) then this
300  * also writes the data to disk.
301  *
302  * Stdout is handled by calling mfflush which writes to stdout if appropriate.
303  */
304 int mfclose(mFILE *mf) {
305  if (!mf)
306  return -1;
307 
308  mfflush(mf);
309 
310  if (mf->fp)
311  fclose(mf->fp);
312 
313  mfdestroy(mf);
314 
315  return 0;
316 }
317 
318 /*
319  * Closes the file pointer contained within the mFILE without destroying
320  * the in-memory data.
321  */
322 int mfdetach(mFILE *mf) {
323  if (!mf)
324  return -1;
325 
326  mfflush(mf);
327 
328  if (mf->fp) {
329  fclose(mf->fp);
330  mf->fp = NULL;
331  }
332 
333  return 0;
334 }
335 
336 /*
337  * Destroys an mFILE structure but does not flush or close it
338  */
339 int mfdestroy(mFILE *mf) {
340  if (!mf)
341  return -1;
342 
343  if (mf->data)
344  free(mf->data);
345  free(mf);
346 
347  return 0;
348 }
349 
350 /*
351  * Steals that data out of an mFILE. The mFILE itself will be closed.
352  * It is up to the caller to free the stolen buffer. If size_out is
353  * not NULL, mf->size will be stored in it.
354  * This is more-or-less the opposite of mfcreate().
355  */
356 
357 void *mfsteal(mFILE *mf, size_t *size_out) {
358  void *data;
359 
360  if (!mf) return NULL;
361 
362  data = mf->data;
363 
364  if (NULL != size_out) *size_out = mf->size;
365 
366  mfdetach(mf);
367  mf->data = NULL;
368  mfdestroy(mf);
369 
370  return data;
371 }
372 
373 /*
374  * Seek/tell functions. Nothing more than updating and reporting an
375  * in-memory index. NB we can seek on stdin or stdout even provided we
376  * haven't been flushing.
377  */
378 int mfseek(mFILE *mf, long offset, int whence) {
379  switch (whence) {
380  case SEEK_SET:
381  mf->offset = offset;
382  break;
383  case SEEK_CUR:
384  mf->offset += offset;
385  break;
386  case SEEK_END:
387  mf->offset = mf->size + offset;
388  break;
389  default:
390  errno = EINVAL;
391  return -1;
392  }
393 
394  mf->eof = 0;
395  return 0;
396 }
397 
398 long mftell(mFILE *mf) {
399  return mf->offset;
400 }
401 
402 void mrewind(mFILE *mf) {
403  mf->offset = 0;
404  mf->eof = 0;
405 }
406 
407 /*
408  * mftruncate is not directly a translation of ftruncate as the latter
409  * takes a file descriptor instead of a FILE *. It performs the analogous
410  * role though.
411  *
412  * If offset is -1 then the file is truncated to be the current file
413  * offset.
414  */
415 void mftruncate(mFILE *mf, long offset) {
416  mf->size = offset != -1 ? offset : mf->offset;
417  if (mf->offset > mf->size)
418  mf->offset = mf->size;
419 }
420 
421 int mfeof(mFILE *mf) {
422  return mf->eof;
423 }
424 
425 /*
426  * mFILE read/write functions. Basically these turn fread/fwrite syntax
427  * into memcpy statements, with appropriate memory handling for writing.
428  */
429 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
430  size_t len;
431  char *cptr = (char *)ptr;
432 
433  if (mf == m_channel[0]) init_mstdin();
434 
435  if (mf->size <= mf->offset)
436  return 0;
437 
438  len = size * nmemb <= mf->size - mf->offset
439  ? size * nmemb
440  : mf->size - mf->offset;
441  if (!size)
442  return 0;
443 
444  memcpy(cptr, &mf->data[mf->offset], len);
445  mf->offset += len;
446 
447  if (len != size * nmemb) {
448  mf->eof = 1;
449  }
450 
451  return len / size;
452 }
453 
454 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
455  if (!(mf->mode & MF_WRITE))
456  return 0;
457 
458  /* Append mode => forced all writes to end of file */
459  if (mf->mode & MF_APPEND)
460  mf->offset = mf->size;
461 
462  /* Make sure we have enough room */
463  while (size * nmemb + mf->offset > mf->alloced) {
464  size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
465  void * new_data = realloc(mf->data, new_alloced);
466  if (NULL == new_data) return 0;
467  mf->alloced = new_alloced;
468  mf->data = new_data;
469  }
470 
471  /* Record where we need to reflush from */
472  if (mf->offset < mf->flush_pos)
473  mf->flush_pos = mf->offset;
474 
475  /* Copy the data over */
476  memcpy(&mf->data[mf->offset], ptr, size * nmemb);
477  mf->offset += size * nmemb;
478  if (mf->size < mf->offset)
479  mf->size = mf->offset;
480 
481  return nmemb;
482 }
483 
484 int mfgetc(mFILE *mf) {
485  if (mf == m_channel[0]) init_mstdin();
486  if (mf->offset < mf->size) {
487  return (unsigned char)mf->data[mf->offset++];
488  }
489 
490  mf->eof = 1;
491  return -1;
492 }
493 
494 int mungetc(int c, mFILE *mf) {
495  if (mf->offset > 0) {
496  mf->data[--mf->offset] = c;
497  return c;
498  }
499 
500  mf->eof = 1;
501  return -1;
502 }
503 
504 char *mfgets(char *s, int size, mFILE *mf) {
505  int i;
506 
507  if (mf == m_channel[0]) init_mstdin();
508  *s = 0;
509  for (i = 0; i < size-1;) {
510  if (mf->offset < mf->size) {
511  s[i] = mf->data[mf->offset++];
512  if (s[i++] == '\n')
513  break;
514  } else {
515  mf->eof = 1;
516  break;
517  }
518  }
519 
520  s[i] = 0;
521  return i ? s : NULL;
522 }
523 
524 /*
525  * Flushes an mFILE. If this is a real open of a file in write mode then
526  * mFILE->fp will be set. We then write out any new data in mFILE since the
527  * last flush. We cannot tell what may have been modified as we don't keep
528  * track of that, so we typically rewrite out the entire file contents between
529  * the last flush_pos and the end of file.
530  *
531  * For stderr/stdout we also reset the offsets so we cannot modify things
532  * we've already output.
533  */
534 int mfflush(mFILE *mf) {
535  if (!mf->fp)
536  return 0;
537 
538  /* FIXME: only do this when opened in write mode */
539  if (mf == m_channel[1] || mf == m_channel[2]) {
540  if (mf->flush_pos < mf->size) {
541  size_t bytes = mf->size - mf->flush_pos;
542  if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
543  return -1;
544  if (0 != fflush(mf->fp))
545  return -1;
546  }
547 
548  /* Stdout & stderr are non-seekable streams so throw away the data */
549  mf->offset = mf->size = mf->flush_pos = 0;
550  }
551 
552  /* only flush when opened in write mode */
553  if (mf->mode & MF_WRITE) {
554  if (mf->flush_pos < mf->size) {
555  size_t bytes = mf->size - mf->flush_pos;
556  if (!(mf->mode & MF_MODEX)) {
557  fseek(mf->fp, mf->flush_pos, SEEK_SET);
558  }
559  if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
560  return -1;
561  if (0 != fflush(mf->fp))
562  return -1;
563  }
564  if (ftell(mf->fp) != -1 &&
565  ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
566  return -1;
567  mf->flush_pos = mf->size;
568  }
569 
570  return 0;
571 }
572 
573 /*
574  * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to
575  * estimate how many additional bytes of storage will be required for the
576  * vsprintf to work.
577  */
578 int mfprintf(mFILE *mf, char *fmt, ...) {
579  int ret;
580  size_t est_length;
581  va_list args;
582 
583  va_start(args, fmt);
584  est_length = vflen(fmt, args);
585  va_end(args);
586  while (est_length + mf->offset > mf->alloced) {
587  size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
588  void * new_data = realloc(mf->data, new_alloced);
589  if (NULL == new_data) return -1;
590  mf->alloced = new_alloced;
591  mf->data = new_data;
592  }
593 
594  va_start(args, fmt);
595  ret = vsprintf(&mf->data[mf->offset], fmt, args);
596  va_end(args);
597 
598  if (ret > 0) {
599  mf->offset += ret;
600  if (mf->size < mf->offset)
601  mf->size = mf->offset;
602  }
603 
604  if (mf->fp == stderr) {
605  /* Auto-flush for stderr */
606  if (0 != mfflush(mf)) return -1;
607  }
608 
609  return ret;
610 }
611 
612 /*
613  * Converts an mFILE from binary to ascii mode by replacing all
614  * cr-nl with nl.
615  *
616  * Primarily used on windows when we've uncompressed a binary file which
617  * happens to be a text file (eg Experiment File). Previously we would have
618  * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
619  *
620  * Side effect: resets offset and flush_pos back to the start.
621  */
622 void mfascii(mFILE *mf) {
623  size_t p1, p2;
624 
625  for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
626  if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
627  p2--; /* delete the \r */
628  }
629  mf->data[p2] = mf->data[p1];
630  }
631  mf->size = p2;
632 
633  mf->offset = mf->flush_pos = 0;
634 }