NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fmindex_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 namespace nvbio {
29 
30 // return the number of occurrences of c in the range [0,k] of the given FM-index.
31 //
32 // \param fmi FM-index
33 // \param k range search delimiter
34 // \param c query character
35 //
36 template <
37  typename TRankDictionary,
38  typename TSuffixArray,
39  typename TL2>
44  uint8 c)
45 {
47 
48  if (k == index_type(-1))
49  return 0;
50  if (k == fmi.length())
51  return fmi.count( c );
52 
53  if (k >= fmi.primary()) // because $ is not in bwt
54  --k;
55 
56  return rank( fmi.rank_dict(), k, c );
57 }
58 
59 // return the number of occurrences of c in the ranges [0,l] and [0,r] of the
60 // given FM-index.
61 //
62 // \param fmi FM-index
63 // \param range range query [l,r]
64 // \param c query character
65 //
66 template <
67  typename TRankDictionary,
68  typename TSuffixArray,
69  typename TL2>
74  uint8 c)
75 {
77 
78  if (range.x == range.y)
79  {
80  const index_type r = rank( fmi, range.x, c );
81  return make_vector( r, r );
82  }
83  else if (range.x == index_type(-1))
84  {
85  const index_type r = rank( fmi, range.y, c );
86  return make_vector( index_type(0), r );
87  }
88  if (range.y == fmi.length())
89  {
90  return make_vector(
91  rank( fmi, range.x, c ),
92  fmi.count( c ) );
93  }
94 
95  if (range.x >= fmi.primary()) --range.x; // because $ is not in bwt
96  if (range.y >= fmi.primary()) --range.y; // because $ is not in bwt
97 
98  return rank( fmi.rank_dict(), range, c );
99 }
100 
101 // return the number of occurrences of all characters in the range [0,k] of the
102 // given FM-index.
103 //
104 // \param fmi FM-index
105 // \param k range search delimiter
106 //
107 template <
108  typename TRankDictionary,
109  typename TSuffixArray,
110  typename TL2>
112 typename TRankDictionary::vec4_type rank4(
115 {
116  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
117 
118  const index_type zero = index_type(0);
119 
120  if (k == index_type(-1))
121  return make_vector( zero, zero, zero, zero );
122  else if (k == fmi.length())
123  {
124  return make_vector(
125  fmi.count(0),
126  fmi.count(1),
127  fmi.count(2),
128  fmi.count(3) );
129  }
130 
131  if (k >= fmi.primary()) // because $ is not in bwt
132  --k;
133 
134  return rank4( fmi.rank_dict(), k );
135 }
136 
137 // return the number of occurrences of all characters in the range [0,k] of the
138 // given FM-index.
139 //
140 // \param fmi FM-index
141 // \param range range query [l,r]
142 // \param outl first output
143 // \param outh second output
144 //
145 template <
146  typename TRankDictionary,
147  typename TSuffixArray,
148  typename TL2>
152  typename TRankDictionary::vec4_type* outl,
153  typename TRankDictionary::vec4_type* outh)
154 {
155  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
156 
157  const index_type zero = index_type(0);
158 
159  if (range.x == range.y)
160  {
161  *outl = rank4( fmi, range.x );
162  *outh = *outl;
163  return;
164  }
165  else if (range.x == index_type(-1))
166  {
167  *outl = make_vector( zero, zero, zero, zero );
168  *outh = rank4( fmi, range.y );
169  return;
170  }
171  else if (range.y == fmi.length())
172  {
173  *outl = rank4( fmi, range.x );
174  *outh = make_uint4(
175  fmi.count(0),
176  fmi.count(1),
177  fmi.count(2),
178  fmi.count(3) );
179  return;
180  }
181 
182  if (range.x >= fmi.primary()) --range.x; // because $ is not in bwt
183  if (range.y >= fmi.primary()) --range.y; // because $ is not in bwt
184 
185  rank4( fmi.rank_dict(), range, outl, outh );
186 }
187 
188 // return the number of occurrences of all characters in the range [0,k] of the
189 // given FM-index.
190 //
191 // \param fmi FM-index
192 // \param k range search delimiter
193 //
194 template <
195  typename TRankDictionary,
196  typename TSuffixArray,
197  typename TL2>
199 void rank_all(
203 {
204  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
205 
206  const index_type zero = index_type(0);
207 
208  if (k == index_type(-1))
209  {
210  for (uint32 i = 0; i < fmi.symbol_count(); ++i)
211  (*out)[i] = zero;
212  }
213  else if (k == fmi.length())
214  {
215  for (uint32 i = 0; i < fmi.symbol_count(); ++i)
216  (*out)[i] = fmi.count(i);
217  }
218 
219  if (k >= fmi.primary()) // because $ is not in bwt
220  --k;
221 
222  rank_all( fmi.rank_dict(), k, out );
223 }
224 
225 // return the number of occurrences of all characters in the range [0,k] of the
226 // given FM-index.
227 //
228 // \param fmi FM-index
229 // \param range range query [l,r]
230 // \param outl first output
231 // \param outh second output
232 //
233 template <
234  typename TRankDictionary,
235  typename TSuffixArray,
236  typename TL2>
242 {
243  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
244 
245  const index_type zero = index_type(0);
246 
247  if (range.x == range.y)
248  {
249  rank_all( fmi, range.x, outl );
250  *outh = *outl;
251  return;
252  }
253  else if (range.x == index_type(-1))
254  {
255  for (uint32 i = 0; i < fmi.symbol_count(); ++i)
256  (*outl)[i] = zero;
257  rank_all( fmi, range.y, outh );
258  return;
259  }
260  else if (range.y == fmi.length())
261  {
262  rank_all( fmi, range.x, outl );
263  for (uint32 i = 0; i < fmi.symbol_count(); ++i)
264  (*outh)[i] = fmi.count(i);
265  return;
266  }
267 
268  if (range.x >= fmi.primary()) --range.x; // because $ is not in bwt
269  if (range.y >= fmi.primary()) --range.y; // because $ is not in bwt
270 
271  rank_all( fmi.rank_dict(), range, outl, outh );
272 }
273 
274 // return the range of occurrences of a pattern in the given FM-index.
275 //
276 // \param fmi FM-index
277 // \param pattern query string
278 // \param pattern_len query string length
279 //
280 template <
281  typename TRankDictionary,
282  typename TSuffixArray,
283  typename TL2,
284  typename Iterator>
288  const Iterator pattern,
289  const uint32 pattern_len)
290 {
291  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
292  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
293 
294  // backward search
295  const range_type range = make_vector( index_type(0), fmi.length() );
296 
297  return match( fmi, pattern, pattern_len, range );
298 }
299 
300 // return the range of occurrences of a pattern in the given FM-index.
301 //
302 // \param fmi FM-index
303 // \param pattern query string
304 // \param pattern_len query string length
305 // \param in_range starting range
306 //
307 template <
308  typename TRankDictionary,
309  typename TSuffixArray,
310  typename TL2,
311  typename Iterator>
315  const Iterator pattern,
316  const uint32 pattern_len,
318 {
319  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
320  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
321  typedef typename string_traits<Iterator>::value_type symbol_type;
322 
323  // backward search
324  range_type range = in_range;
325 
326  for (int32 i = pattern_len-1; i >= 0 && range.x <= range.y; --i)
327  {
328  const symbol_type c = pattern[i];
329  if (c > fmi.symbol_count()) // there is an N here. no match
330  return make_vector(index_type(1),index_type(0));
331 
332  const range_type c_rank = rank(
333  fmi,
334  make_vector( range.x-1, range.y ),
335  c );
336 
337  range.x = fmi.L2(c) + c_rank.x + 1;
338  range.y = fmi.L2(c) + c_rank.y;
339  }
340  return range;
341 }
342 
343 // return the range of occurrences of a reversed pattern in the given FM-index.
344 //
345 // \param fmi FM-index
346 // \param pattern query string
347 // \param pattern_len query string length
348 //
349 template <
350  typename TRankDictionary,
351  typename TSuffixArray,
352  typename TL2,
353  typename Iterator>
357  const Iterator pattern,
358  const uint32 pattern_len)
359 {
360  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
361  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
362  typedef typename string_traits<Iterator>::value_type symbol_type;
363 
364  // forward search
365  range_type range = make_vector( index_type(0), fmi.length() );
366 
367  for (uint32 i = 0; i < pattern_len && range.x <= range.y; ++i)
368  {
369  const symbol_type c = pattern[i];
370  if (c > fmi.symbol_count()) // there is an N here. no match
371  return make_vector(index_type(1),index_type(0));
372 
373  const range_type c_rank = rank(
374  fmi,
375  make_vector( range.x-1, range.y ),
376  c );
377 
378  range.x = fmi.L2(c) + c_rank.x + 1;
379  range.y = fmi.L2(c) + c_rank.y;
380  }
381  return range;
382 }
383 
384 // computes the inverse psi function at a given index, without using the reduced SA
385 //
386 // \param fmi FM-index
387 // \param i query index
388 // \return base inverse psi function value and offset
389 //
390 template <
391  typename TRankDictionary,
392  typename TSuffixArray,
393  typename TL2>
398 {
399  typedef fm_index<TRankDictionary,TSuffixArray,TL2> FMIndexType;
400  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
401 // typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
402 
403  NVBIO_CUDA_ASSERT( i <= fmi.length() );
404  typename FMIndexType::bwt_type bwt = fmi.bwt();
405 
406  if (i == fmi.primary())
407  return 0;
408 
409  const index_type k = i < fmi.primary() ? i : i-1;
410 
411  const uint8 c = bwt[k];
412  return fmi.L2(c) +
413  rank( fmi.rank_dict(), k, c );
414 }
415 
416 // computes the inverse psi function at a given index
417 //
418 // \param fmi FM-index
419 // \param i query index
420 // \return base inverse psi function value and offset
421 //
422 template <
423  typename TRankDictionary,
424  typename TSuffixArray,
425  typename TL2>
430 {
431  typedef fm_index<TRankDictionary,TSuffixArray,TL2> FMIndexType;
432  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
433 // typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
434 
435  NVBIO_CUDA_ASSERT( i <= fmi.length() );
436  index_type j = i;
437  index_type t = 0;
438  index_type suffix;
439 
440  typename FMIndexType::suffix_array_type sa = fmi.sa();
441  typename FMIndexType::bwt_type bwt = fmi.bwt();
442 
443  while (sa.fetch( j, suffix ) == false)
444  {
445  if (j != fmi.primary())
446  {
447  const uint8 c = j < fmi.primary() ? bwt[j] : bwt[j-1];
448  j = fmi.L2(c) + rank( fmi, j, c );
449  NVBIO_CUDA_ASSERT( j <= fmi.length() );
450  }
451  else
452  j = 0;
453 
454  ++t;
455  }
456  return make_vector( j, t );
457 }
458 
459 // given a suffix array index i, return its linear coordinate (or equivalently, return the
460 // linear coordinate of the suffix that prefixes the i-th row of the BWT matrix).
461 //
462 // \param fmi FM-index
463 // \param i query index
464 // \return position of the suffix that prefixes the query index
465 //
466 template <
467  typename TRankDictionary,
468  typename TSuffixArray,
469  typename TL2>
474 {
475  typedef fm_index<TRankDictionary,TSuffixArray,TL2> FMIndexType;
476  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
477 // typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
478 
479  NVBIO_CUDA_ASSERT( i <= fmi.length() );
480  index_type j = i;
481  index_type t = 0;
482  index_type suffix;
483 
484  typename FMIndexType::suffix_array_type sa = fmi.sa();
485  typename FMIndexType::bwt_type bwt = fmi.bwt();
486 
487  while (sa.fetch( j, suffix ) == false)
488  {
489  if (j != fmi.primary())
490  {
491  const uint8 c = j < fmi.primary() ? bwt[j] : bwt[j-1];
492  j = fmi.L2(c) + rank( fmi, j, c );
493  NVBIO_CUDA_ASSERT( j <= fmi.length() );
494  }
495  else
496  j = 0;
497 
498  ++t;
499  }
500  return (suffix + t) /*% (fmi.length()+1)*/;
501 }
502 
503 // given a suffix array index i, return the position of the closest suffix in the sampled SA,
504 // and its relative offset
505 //
506 // \param fmi FM-index
507 // \param i query index
508 // \return a pair formed by the position of the suffix that prefixes the query index
509 // in the sampled SA and its relative offset
510 //
511 template <
512  typename TRankDictionary,
513  typename TSuffixArray,
514  typename TL2>
519 {
520  typedef fm_index<TRankDictionary,TSuffixArray,TL2> FMIndexType;
521  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
522 // typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
523 
524  NVBIO_CUDA_ASSERT( i <= fmi.length() );
525  index_type j = i;
526  index_type t = 0;
527 
528  typename FMIndexType::suffix_array_type sa = fmi.sa();
529  typename FMIndexType::bwt_type bwt = fmi.bwt();
530 
531  while (sa.has( j ) == false)
532  {
533  if (j != fmi.primary())
534  {
535  const uint8 c = j < fmi.primary() ? bwt[j] : bwt[j-1];
536  j = fmi.L2(c) + rank( fmi, j, c );
537  NVBIO_CUDA_ASSERT( j <= fmi.length() );
538  }
539  else
540  j = 0;
541 
542  ++t;
543  }
544  return make_vector( j, t );
545 }
546 
547 // given a sampled suffix array index i and an offset j, return the corresponding linear coordinate SSA[i]+j
548 //
549 // \param fmi FM-index
550 // \param iter iterator to the sampled SA
551 // \return final linear coordinate
552 //
553 template <
554  typename TRankDictionary,
555  typename TSuffixArray,
556  typename TL2>
561 {
562  typedef fm_index<TRankDictionary,TSuffixArray,TL2> FMIndexType;
563  typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::index_type index_type;
564 // typedef typename fm_index<TRankDictionary,TSuffixArray,TL2>::range_type range_type;
565 
566  typename FMIndexType::suffix_array_type sa = fmi.sa();
567  index_type suffix; sa.fetch( it.x, suffix );
568  return suffix + it.y;
569 }
570 
571 #ifdef __CUDACC__
572 #if defined(MOD_NAMESPACE)
573 MOD_NAMESPACE_BEGIN
574 #endif
575 // indexing operator
576 //
577 NVBIO_FORCEINLINE NVBIO_DEVICE uint32 count_table_texture::operator[] (const uint32 i) const
578 {
579 #if USE_TEX
580  return binary_cast<uint32>( tex1Dfetch( s_count_table_tex, i ) );
581 #else
582  return 0;
583 #endif
584 }
585 
586 // bind texture
587 NVBIO_FORCEINLINE NVBIO_HOST void count_table_texture::bind(const uint32* count_table)
588 {
589 #if USE_TEX
590  cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<uint32>();
591  s_count_table_tex.normalized = false;
592  s_count_table_tex.filterMode = cudaFilterModePoint;
593  cudaBindTexture( 0, &s_count_table_tex, count_table, &channel_desc, 256*sizeof(uint32) );
594 #endif
595 }
596 
597 // unbind texture
598 NVBIO_FORCEINLINE NVBIO_HOST void count_table_texture::unbind()
599 {
600 #if USE_TEX
601  cudaUnbindTexture( &s_count_table_tex );
602 #endif
603 }
604 #if defined(MOD_NAMESPACE)
605 MOD_NAMESPACE_END
606 #endif
607 #endif
608 
609 } // namespace nvbio