NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Classes | Functions
Rank Dictionaries

Detailed Description

A rank dictionary is a data-structure which, given a text and a sparse occurrence table, can answer, in O(1) time, queries of the kind "how many times does character c occurr in the substring text[0:i] ?"

NOTE: the O(1) time refers to the complexity in the length n of the text; more properly, if the alphabet contains s characters (i.e. if the number of bits per symbol is b = log(s)), the complexity is O(log(s)). Similarly, the amount of space needed for a sparse occurrence table is O(n s). In other words, the amount of space is exponential in the number of bits per character.
For a more compact data structure requiring O(n log(s)) storage, useful with larger alphabets, please refer to Wavelet Trees.

Classes

struct  nvbio::rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable >
 

Functions

template<uint32 SYMBOL_SIZE, uint32 K, typename SymbolIterator , typename IndexType >
void build_occurrence_table (SymbolIterator begin, SymbolIterator end, IndexType *occ, IndexType *cnt=NULL)
 
template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE uint8 
text (const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &dict, const uint32 i)
 
template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE uint8 
text (const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &dict, const uint64 i)
 
template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE IndexType 
rank (const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &dict, const IndexType i, const uint32 c)
 
template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE vector_type
< IndexType, 2 >::type 
rank (const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &dict, const typename vector_type< IndexType, 2 >::type range, const uint32 c)
 
template<uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE vector_type
< IndexType, 4 >::type 
rank4 (const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &dict, const IndexType i)
 
template<uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
rank4 (const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &dict, const uint2 range, uint4 *outl, uint4 *outh)
 
template<uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
rank4 (const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &dict, const uint64_2 range, uint64_4 *outl, uint64_4 *outh)
 
template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
rank_dictionary< SYMBOL_SIZE,
K, TextString, OccIterator,
CountTable >::vector_type 
rank_all (const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &dict, const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::index_type i)
 
template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
rank_all (const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &dict, const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::index_type i, typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type *out)
 
template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
rank_all (const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &dict, const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::range_type range, typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type *outl, typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type *outh)
 

Function Documentation

template<uint32 SYMBOL_SIZE, uint32 K, typename SymbolIterator , typename IndexType >
void build_occurrence_table ( SymbolIterator  begin,
SymbolIterator  end,
IndexType *  occ,
IndexType *  cnt = NULL 
)
related

Build a sampled occurrence table for a given string, storing a set of symbol counters every K elements of the original string. The table must contain ((n+K-1)/K)*N_SYMBOLS entries, where N_SYMBOLS = 2 ^ SYMBOL_SIZE.

Template Parameters
SYMBOL_SIZEsymbol size, in bits
Ksampling frequency
SymbolIteratorthe input string iterator
IndexTypethe integer type used to store indices

Optionally save the table of the global counters as well.

Parameters
beginsymbol sequence begin
endsymbol sequence end
occoutput occurrence map
cntoptional table of the global counters

Definition at line 43 of file rank_dictionary_inl.h.

template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE IndexType rank ( const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &  dict,
const IndexType  i,
const uint32  c 
)
related

fetch the number of occurrences of character c in the substring [0,i]

Parameters
dictthe rank dictionary
ithe end of the query range [0,i]
cthe query character

Definition at line 589 of file rank_dictionary_inl.h.

template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE vector_type< IndexType, 2 >::type rank ( const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &  dict,
const typename vector_type< IndexType, 2 >::type  range,
const uint32  c 
)
related

fetch the number of occurrences of character c in the substrings [0,l] and [0,r]

Parameters
dictthe rank dictionary
rangethe ends of the query ranges [0,range.x] and [0,range.y]
cthe query character
template<uint32 K, typename TextString , typename OccIterator , typename CountTable , typename IndexType >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE vector_type< IndexType, 4 >::type rank4 ( const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &  dict,
const IndexType  i 
)
related

fetch the number of occurrences of all characters c in the substring [0,i]

Parameters
dictthe rank dictionary
ithe end of the query range [0,i]

this function is deprecated: please use rank_all()

Definition at line 614 of file rank_dictionary_inl.h.

template<uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void rank4 ( const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &  dict,
const uint2  range,
uint4 *  outl,
uint4 *  outh 
)
related

fetch the number of occurrences of all characters in the substrings [0,l] and [0,r]

Parameters
dictthe rank dictionary
rangethe ends of the query ranges [0,range.x] and [0,range.y]
outlthe output count of all characters in the first range
outlthe output count of all characters in the second range

this function is deprecated: please use rank_all()

Definition at line 627 of file rank_dictionary_inl.h.

template<uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void rank4 ( const rank_dictionary< 2, K, TextString, OccIterator, CountTable > &  dict,
const uint64_2  range,
uint64_4 outl,
uint64_4 outh 
)
related

fetch the number of occurrences of all characters in the substrings [0,l] and [0,r]

Parameters
dictthe rank dictionary
rangethe ends of the query ranges [0,range.x] and [0,range.y]
outlthe output count of all characters in the first range
outlthe output count of all characters in the second range

this function is deprecated: please use rank_all()

Definition at line 640 of file rank_dictionary_inl.h.

template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type rank_all ( const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &  dict,
const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::index_type  i 
)
related

fetch the number of occurrences of all characters c in the substring [0,i]

Parameters
dictthe rank dictionary
ithe end of the query range [0,i]
template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void rank_all ( const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &  dict,
const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::index_type  i,
typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type out 
)
related

fetch the number of occurrences of all characters c in the substring [0,i]

Parameters
dictthe rank dictionary
ithe end of the query range [0,i]
outthe output count of all characters
template<uint32 SYMBOL_SIZE, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void rank_all ( const rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable > &  dict,
const typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::range_type  range,
typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type outl,
typename rank_dictionary< SYMBOL_SIZE, K, TextString, OccIterator, CountTable >::vector_type outh 
)
related

fetch the number of occurrences of all characters in the substrings [0,l] and [0,r]

Parameters
dictthe rank dictionary
rangethe ends of the query ranges [0,range.x] and [0,range.y]
outlthe output count of all characters in the first range
outlthe output count of all characters in the second range
template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint8 text ( const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &  dict,
const uint32  i 
)
related

fetch the text character at position i in the rank dictionary

Definition at line 185 of file rank_dictionary_inl.h.

template<uint32 SYMBOL_SIZE_T, uint32 K, typename TextString , typename OccIterator , typename CountTable >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint8 text ( const rank_dictionary< SYMBOL_SIZE_T, K, TextString, OccIterator, CountTable > &  dict,
const uint64  i 
)
related

fetch the text character at position i in the rank dictionary

Definition at line 193 of file rank_dictionary_inl.h.