NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
suftest.c
Go to the documentation of this file.
1 /*
2  * suftest.c for divsufsort-lite
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <time.h>
31 #include "divsufsort.h"
32 
33 
34 /* Checks the suffix array SA of the string T. */
35 int
36 sufcheck(const unsigned char *T, const int *SA, int n, int verbose) {
37  int C[256];
38  int i, p, q, t;
39  int c;
40 
41  if(verbose) { fprintf(stderr, "sufcheck: "); }
42  if(n == 0) {
43  if(verbose) { fprintf(stderr, "Done.\n"); }
44  return 0;
45  }
46 
47  /* Check arguments. */
48  if((T == NULL) || (SA == NULL) || (n < 0)) {
49  if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
50  return -1;
51  }
52 
53  /* check range: [0..n-1] */
54  for(i = 0; i < n; ++i) {
55  if((SA[i] < 0) || (n <= SA[i])) {
56  if(verbose) {
57  fprintf(stderr, "Out of the range [0,%d].\n"
58  " SA[%d]=%d\n",
59  n - 1, i, SA[i]);
60  }
61  return -2;
62  }
63  }
64 
65  /* check first characters. */
66  for(i = 1; i < n; ++i) {
67  if(T[SA[i - 1]] > T[SA[i]]) {
68  if(verbose) {
69  fprintf(stderr, "Suffixes in wrong order.\n"
70  " T[SA[%d]=%d]=%d > T[SA[%d]=%d]=%d\n",
71  i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
72  }
73  return -3;
74  }
75  }
76 
77  /* check suffixes. */
78  for(i = 0; i < 256; ++i) { C[i] = 0; }
79  for(i = 0; i < n; ++i) { ++C[T[i]]; }
80  for(i = 0, p = 0; i < 256; ++i) {
81  t = C[i];
82  C[i] = p;
83  p += t;
84  }
85 
86  q = C[T[n - 1]];
87  C[T[n - 1]] += 1;
88  for(i = 0; i < n; ++i) {
89  p = SA[i];
90  if(0 < p) {
91  c = T[--p];
92  t = C[c];
93  } else {
94  c = T[p = n - 1];
95  t = q;
96  }
97  if((t < 0) || (p != SA[t])) {
98  if(verbose) {
99  fprintf(stderr, "Suffix in wrong position.\n"
100  " SA[%d]=%d or\n"
101  " SA[%d]=%d\n",
102  t, (0 <= t) ? SA[t] : -1, i, SA[i]);
103  }
104  return -4;
105  }
106  if(t != q) {
107  ++C[c];
108  if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
109  }
110  }
111 
112  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
113  return 0;
114 }
115 
116 static
117 void
118 print_help(const char *progname, int status) {
119  fprintf(stderr, "usage: %s FILE\n\n", progname);
120  exit(status);
121 }
122 
123 int
124 main(int argc, const char *argv[]) {
125  FILE *fp;
126  const char *fname;
127  unsigned char *T;
128  int *SA;
129  long n;
130  clock_t start, finish;
131 
132  /* Check arguments. */
133  if((argc == 1) ||
134  (strcmp(argv[1], "-h") == 0) ||
135  (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
136  if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
137 
138  /* Open a file for reading. */
139  if((fp = fopen(fname = argv[1], "rb")) == NULL) {
140  fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
141  perror(NULL);
142  exit(EXIT_FAILURE);
143  }
144 
145  /* Get the file size. */
146  if(fseek(fp, 0, SEEK_END) == 0) {
147  n = ftell(fp);
148  rewind(fp);
149  if(n < 0) {
150  fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
151  perror(NULL);
152  exit(EXIT_FAILURE);
153  }
154  } else {
155  fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
156  perror(NULL);
157  exit(EXIT_FAILURE);
158  }
159 
160  /* Allocate 5n bytes of memory. */
161  T = (unsigned char *)malloc((size_t)n * sizeof(unsigned char));
162  SA = (int *)malloc((size_t)n * sizeof(int));
163  if((T == NULL) || (SA == NULL)) {
164  fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
165  exit(EXIT_FAILURE);
166  }
167 
168  /* Read n bytes of data. */
169  if(fread(T, sizeof(unsigned char), (size_t)n, fp) != (size_t)n) {
170  fprintf(stderr, "%s: %s `%s': ",
171  argv[0],
172  (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
173  argv[1]);
174  perror(NULL);
175  exit(EXIT_FAILURE);
176  }
177  fclose(fp);
178 
179  /* Construct the suffix array. */
180  fprintf(stderr, "%s: %ld bytes ... ", fname, n);
181  start = clock();
182  if(divsufsort(T, SA, (int)n) != 0) {
183  fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
184  exit(EXIT_FAILURE);
185  }
186  finish = clock();
187  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
188 
189  /* Check the suffix array. */
190  if(sufcheck(T, SA, (int)n, 1) != 0) { exit(EXIT_FAILURE); }
191 
192  /* Deallocate memory. */
193  free(SA);
194  free(T);
195 
196  return 0;
197 }