NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
suffix.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
31 
32 
33 namespace nvbio {
34 
37 
40 
44 
48 
52 
56 
59 
66 template <
67  typename StringType,
68  typename CoordType,
69  uint32 CoordDim>
70 struct SuffixCore {};
71 
77 template <
78  typename StringType,
79  typename CoordType>
80 struct SuffixCore<StringType,CoordType,1u>
81 {
82  typedef StringType string_type;
83  typedef CoordType coord_type;
85 
86  typedef typename std::iterator_traits<string_type>::value_type symbol_type;
87  typedef typename std::iterator_traits<string_type>::value_type value_type;
88  typedef typename std::iterator_traits<string_type>::reference reference;
89 
93 
98 
103  const string_type string,
104  const coord_type suffix) :
105  m_string( string ),
106  m_coords( suffix ) {}
107 
111  uint32 size() const { return nvbio::length( m_string ) - m_coords; }
112 
116  uint32 length() const { return size(); }
117 
121  symbol_type operator[] (const uint32 i) const { return m_string[ m_coords + i ]; }
122 
126  reference operator[] (const uint32 i) { return m_string[ m_coords + i ]; }
127 
131  coord_type coords() const { return m_coords; }
132 
135  iterator begin() { return m_string.begin() + m_coords; }
136 
139  iterator end() { return m_string.end(); }
140 
143  const_iterator begin() const { return m_string.begin() + m_coords; }
144 
147  const_iterator end() const { return m_string.end(); }
148 
151 };
152 
158 template <
159  typename StringType,
160  typename CoordType>
161 struct SuffixCore<StringType,CoordType,2u>
162 {
163  typedef StringType string_type;
164  typedef CoordType coord_type;
166 
167  typedef typename std::iterator_traits<string_type>::value_type symbol_type;
168  typedef typename std::iterator_traits<string_type>::value_type value_type;
169  typedef typename std::iterator_traits<string_type>::reference reference;
170 
174 
179 
184  const string_type string,
185  const coord_type suffix) :
186  m_string( string ),
187  m_coords( suffix ) {}
188 
192  uint32 size() const { return nvbio::length( m_string ) - m_coords.y; }
193 
197  uint32 length() const { return size(); }
198 
202  symbol_type operator[] (const uint32 i) const { return m_string[ m_coords.y + i ]; }
203 
207  reference operator[] (const uint32 i) { return m_string[ m_coords.y + i ]; }
208 
212  coord_type coords() const { return m_coords; }
213 
216  iterator begin() { return m_string.begin() + m_coords.y; }
217 
220  iterator end() { return m_string.end(); }
221 
224  const_iterator begin() const { return m_string.begin() + m_coords.y; }
225 
228  const_iterator end() const { return m_string.end(); }
229 
232 };
233 
234 
236 
243 template <
244  typename StringType,
245  typename CoordType>
246 struct Suffix : SuffixCore< StringType, CoordType, vector_traits<CoordType>::DIM >
247 {
249 
251  typedef StringType string_type;
252  typedef CoordType coord_type;
254 
255  typedef typename std::iterator_traits<string_type>::value_type symbol_type;
256  typedef typename std::iterator_traits<string_type>::value_type value_type;
257  typedef typename std::iterator_traits<string_type>::reference reference;
258 
262 
266  Suffix() {}
267 
272  const string_type string,
273  const coord_type infix) : core_type( string, infix ) {}
274 };
275 
284 template <typename StringType, typename CoordType>
286 Suffix<StringType,CoordType> make_suffix(const StringType string, const CoordType coords)
287 {
288  return Suffix<StringType,CoordType>( string, coords );
289 }
290 
293 
300 template <
301  typename SequenceType,
302  typename SuffixIterator,
303  uint32 CoordDim>
304 struct SuffixSetCore {};
305 
311 template <
312  typename SequenceType,
313  typename SuffixIterator>
314 struct SuffixSetCore<SequenceType,SuffixIterator,1u>
315 {
316  typedef SequenceType sequence_type;
317  typedef SuffixIterator suffix_iterator;
318 
319  typedef typename std::iterator_traits<SuffixIterator>::value_type coord_type;
321 
326 
331  const uint32 size,
332  const sequence_type sequence,
333  const suffix_iterator suffixes) :
334  m_size( size ),
335  m_sequence( sequence ),
336  m_suffixes( suffixes ) {}
337 
341  uint32 size() const { return m_size; }
342 
346  string_type operator[] (const uint32 i) const
347  {
348  const coord_type coords = m_suffixes[i];
349  return string_type( m_sequence, coords );
350  }
351 
355 };
356 
362 template <
363  typename SequenceType,
364  typename SuffixIterator>
365 struct SuffixSetCore<SequenceType,SuffixIterator,2u>
366 {
367  typedef SequenceType sequence_type;
368  typedef SuffixIterator suffix_iterator;
369 
370  typedef typename sequence_type::string_type base_string_type;
371  typedef typename std::iterator_traits<SuffixIterator>::value_type coord_type;
373 
378 
383  const uint32 size,
384  const sequence_type sequence,
385  const suffix_iterator suffixes) :
386  m_size( size ),
387  m_sequence( sequence ),
388  m_suffixes( suffixes ) {}
389 
393  uint32 size() const { return m_size; }
394 
398  string_type operator[] (const uint32 i) const
399  {
400  const coord_type coords = m_suffixes[i];
401  return string_type( m_sequence[ coords.x ], coords );
402  }
403 
407 };
408 
410 
413 template <typename StringType, typename CoordType>
416 
419 template <typename StringType, typename CoordType, uint32 CoordDim>
421 uint32 length(const SuffixCore<StringType,CoordType,CoordDim>& suffix) { return suffix.length(); }
422 
438 template <
439  typename SequenceType,
440  typename SuffixIterator>
441 struct SuffixSet : public SuffixSetCore<
442  SequenceType,
443  SuffixIterator,
444  vector_traits<typename std::iterator_traits<SuffixIterator>::value_type>::DIM>
445 {
446  typedef SuffixSetCore<
447  SequenceType,
448  SuffixIterator,
450 
451  typedef SequenceType sequence_type;
452  typedef SuffixIterator suffix_iterator;
454 
455  typedef typename base_type::coord_type coord_type;
456  typedef typename base_type::string_type string_type;
457 
460 
465 
470  const uint32 size,
471  const sequence_type sequence,
472  const suffix_iterator suffixes) :
473  base_type( size, sequence, suffixes ) {}
474 
477  const_iterator begin() const { return const_iterator(*this,0u); }
478 
481  const_iterator end() const { return const_iterator(*this,base_type::size()); }
482 
485  iterator begin() { return iterator(*this,0u); }
486 
489  iterator end() { return iterator(*this,base_type::size()); }
490 };
491 
494 template <typename string_set_type>
496  const string_set_type string_set,
497  const uint2 suffix1,
498  const uint2 suffix2)
499 {
500  typedef typename string_set_type::string_type string_type;
501  typedef Suffix<string_type,uint32> suffix_type;
502 
503  // check whether both inputs represent the same suffix
504  if (suffix1.y == suffix2.y &&
505  suffix1.x == suffix2.x)
506  return 0;
507 
508  const suffix_type string1 = make_suffix( string_set[suffix1.y], suffix1.x );
509  const suffix_type string2 = make_suffix( string_set[suffix2.y], suffix2.x );
510 
511  const uint32 len1 = nvbio::length( string1 );
512  const uint32 len2 = nvbio::length( string2 );
513 
514  const uint32 min_len = nvbio::min( len1, len2 );
515 
516  // compare character by character
517  int32 cmp = 0;
518  for (uint32 j = 0; j < min_len; ++j)
519  {
520  const uint8 c_i = string1[j];
521  const uint8 c_n = string2[j];
522  if (c_i < c_n)
523  {
524  cmp = -1;
525  break;
526  }
527  if (c_i > c_n)
528  {
529  cmp = 1;
530  break;
531  }
532  }
533 
534  // break ties...
535  if (cmp == 0)
536  {
537  if (len1 < len2) // $ is smaller than any other character => s1 < s2
538  cmp = -1;
539  else if (len2 < len1) // $ is smaller than any other character => s2 < s1
540  cmp = 1;
541  else // $_1 < $_2 ? -1 : 1
542  cmp = suffix1.y < suffix2.y ? -1 : 1;
543  }
544  return cmp;
545 }
546 
549 
550 } // namespace nvbio
551 
552 //#include <nvbio/basic/suffix_inl.h>