NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Modules | Classes | Functions
FMIndex

Detailed Description

Modules

 Rank Dictionaries
 
 Sampled Suffix Arrays
 

Classes

struct  nvbio::FMIndexFilter< system_tag, fm_index_type >
 
struct  nvbio::FMIndexFilter< host_tag, fm_index_type >
 
struct  nvbio::FMIndexFilter< device_tag, fm_index_type >
 
struct  nvbio::FMIndexFilterHost< fm_index_type >
 
struct  nvbio::FMIndexFilterDevice< fm_index_type >
 
struct  nvbio::fm_index< TRankDictionary, TSuffixArray, TL2 >
 
struct  nvbio::MEMFilter< system_tag, fm_index_type >
 
struct  nvbio::MEMRange< coord_type >
 
struct  nvbio::MEMHit< coord_type >
 
struct  nvbio::MEMFilter< host_tag, fm_index_type >
 
struct  nvbio::MEMFilter< device_tag, fm_index_type >
 
struct  nvbio::MEMFilterHost< fm_index_type >
 
struct  nvbio::MEMFilterDevice< fm_index_type >
 

Functions

template<typename FMIndex , typename String , typename Stack , typename Delegate >
NVBIO_HOST_DEVICE void nvbio::hamming_backtrack (const FMIndex fmi, const String pattern, const uint32 len, const uint32 seed, const uint32 mismatches, Stack stack, Delegate &delegate)
 
template<typename TRankDictionary1 , typename TSuffixArray1 , typename TRankDictionary2 , typename TSuffixArray2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
nvbio::extend_forward (const fm_index< TRankDictionary1, TSuffixArray1 > &f_fmi, const fm_index< TRankDictionary2, TSuffixArray2 > &r_fmi, typename fm_index< TRankDictionary1, TSuffixArray1 >::range_type &f_range, typename fm_index< TRankDictionary2, TSuffixArray2 >::range_type &r_range, uint8 c)
 
template<typename TRankDictionary1 , typename TSuffixArray1 , typename TRankDictionary2 , typename TSuffixArray2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
nvbio::extend_backwards (const fm_index< TRankDictionary1, TSuffixArray1 > &f_fmi, const fm_index< TRankDictionary2, TSuffixArray2 > &r_fmi, typename fm_index< TRankDictionary1, TSuffixArray1 >::range_type &f_range, typename fm_index< TRankDictionary2, TSuffixArray2 >::range_type &r_range, uint8 c)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
nvbio::rank_all (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type k, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *out)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
nvbio::rank_all (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type range, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *outl, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *outh)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::index_type 
nvbio::basic_inv_psi (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type i)
 
template<typename pattern_type , typename fm_index_type , typename delegate_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE uint32 
nvbio::find_kmems (const uint32 pattern_len, const pattern_type pattern, const uint32 x, const fm_index_type f_index, const fm_index_type r_index, delegate_type &handler, const uint32 min_intv=1u, const uint32 min_span=1u)
 
template<typename pattern_type , typename fm_index_type , typename delegate_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE uint32 
nvbio::find_threshold_kmems (const uint32 pattern_len, const pattern_type pattern, const uint32 x, const fm_index_type f_index, const fm_index_type r_index, delegate_type &handler, const uint32 min_intv=1u, const uint32 min_span=1u)
 
template<typename system_tag , typename fm_index_type >
uint32 nvbio::string_batch_bound (const MEMFilter< system_tag, fm_index_type > &filter, const uint32 mem_count)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::index_type 
rank (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type k, uint8 c)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
rank (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type range, uint8 c)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
TRankDictionary::vec4_type 
rank4 (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type k)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void 
rank4 (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type range, typename TRankDictionary::vec4_type *outl, typename TRankDictionary::vec4_type *outh)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
match (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const Iterator pattern, const uint32 pattern_len)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
match (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const Iterator pattern, const uint32 pattern_len, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type range)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
match_reverse (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const Iterator pattern, const uint32 pattern_len)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
inv_psi (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type i)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::index_type 
locate (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type i)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::range_type 
locate_ssa_iterator (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type i)
 
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE fm_index
< TRankDictionary,
TSuffixArray, TL2 >
::index_type 
lookup_ssa_iterator (const fm_index< TRankDictionary, TSuffixArray, TL2 > &fmi, const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type it)
 

Function Documentation

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index<TRankDictionary,TSuffixArray,TL2>::index_type nvbio::basic_inv_psi ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  i 
)
template<typename TRankDictionary1 , typename TSuffixArray1 , typename TRankDictionary2 , typename TSuffixArray2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void nvbio::extend_backwards ( const fm_index< TRankDictionary1, TSuffixArray1 > &  f_fmi,
const fm_index< TRankDictionary2, TSuffixArray2 > &  r_fmi,
typename fm_index< TRankDictionary1, TSuffixArray1 >::range_type &  f_range,
typename fm_index< TRankDictionary2, TSuffixArray2 >::range_type &  r_range,
uint8  c 
)

backwards extension using a bidirectional FM-index, extending the range of a pattern P to the pattern cP

Note: extension can be performed without a sampled suffix array, so that there's no need to store two of them; in practice, the FM-indices can also be of type fm_index <RankDictionary,null_type>.
Parameters
f_fmiforward FM-index
r_fmireverse FM-index
f_rangecurrent forward range
r_rangecurrent reverse range
cquery character

Definition at line 105 of file bidir_inl.h.

template<typename TRankDictionary1 , typename TSuffixArray1 , typename TRankDictionary2 , typename TSuffixArray2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void nvbio::extend_forward ( const fm_index< TRankDictionary1, TSuffixArray1 > &  f_fmi,
const fm_index< TRankDictionary2, TSuffixArray2 > &  r_fmi,
typename fm_index< TRankDictionary1, TSuffixArray1 >::range_type &  f_range,
typename fm_index< TRankDictionary2, TSuffixArray2 >::range_type &  r_range,
uint8  c 
)

forward extension using a bidirectional FM-index, extending the range of a pattern P to the pattern Pc.

Note: extension can be performed without a sampled suffix array, so that there's no need to store two of them; in practice, the FM-indices can also be of type fm_index <RankDictionary,null_type>.
Parameters
f_fmiforward FM-index
r_fmireverse FM-index
f_rangecurrent forward range
r_rangecurrent reverse range
cquery character

Definition at line 48 of file bidir_inl.h.

template<typename pattern_type , typename fm_index_type , typename delegate_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint32 nvbio::find_kmems ( const uint32  pattern_len,
const pattern_type  pattern,
const uint32  x,
const fm_index_type  f_index,
const fm_index_type  r_index,
delegate_type &  handler,
const uint32  min_intv = 1u,
const uint32  min_span = 1u 
)

find all SMEMs (Super Maximal Extension Matches) covering a given base of a pattern

Template Parameters
pattern_typethe pattern string type
delegate_typethe delegate output handler, must implement the following interface:
interface MEMHandler
{
typedef typename fm_index_type::range_type range_type;
// output an FM-index range referring to the forward index,
// together with its corresponding span on the pattern
void output(
const range_type range, // output SA range
const uint2 span); // output pattern span
};
Parameters
pattern_lenthe length of the query pattern
patternthe query pattern
xthe base of the query pattern to cover with MEMs
f_indexthe forward FM-index to match against
r_indexthe reverse FM-index to match against
handlerthe output handler
min_intvthe minimum SA interval size
min_spanthe minimum pattern span size
Returns
the right-most end of the MEMs covering x, or x itself if no MEM was found

Definition at line 38 of file mem_inl.h.

template<typename pattern_type , typename fm_index_type , typename delegate_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint32 nvbio::find_threshold_kmems ( const uint32  pattern_len,
const pattern_type  pattern,
const uint32  x,
const fm_index_type  f_index,
const fm_index_type  r_index,
delegate_type &  handler,
const uint32  min_intv = 1u,
const uint32  min_span = 1u 
)

find k-MEMs (k-Maximal Extension Matches) covering a given base of a pattern, for all the "threshold" values of k, i.e. the values assumed by the number k of occurrences in each MEM's SA range. In other words, this function saves all k-MEMs encountered when extending a span covering x either left or right causes a change in the number of occurrences k, for each possible value of k.

Template Parameters
pattern_typethe pattern string type
delegate_typethe delegate output handler, must implement the following interface:
interface MEMHandler
{
typedef typename fm_index_type::range_type range_type;
// output an FM-index range referring to the forward index,
// together with its corresponding span on the pattern
void output(
const range_type range, // output SA range
const uint2 span); // output pattern span
};
Parameters
pattern_lenthe length of the query pattern
patternthe query pattern
xthe base of the query pattern to cover with MEMs
f_indexthe forward FM-index to match against
r_indexthe reverse FM-index to match against
handlerthe output handler
min_intvthe minimum SA interval size
min_spanthe minimum pattern span size
Returns
the right-most end of the MEMs covering x, or x itself if no MEM was found

Definition at line 179 of file mem_inl.h.

template<typename FMIndex , typename String , typename Stack , typename Delegate >
NVBIO_HOST_DEVICE void nvbio::hamming_backtrack ( const FMIndex  fmi,
const String  pattern,
const uint32  len,
const uint32  seed,
const uint32  mismatches,
Stack  stack,
Delegate &  delegate 
)

perform approximate matching using the Hamming distance by backtracking

Template Parameters
Stringa string iterator
Stackan uint4 array to be used as a backtracking stack
Delegatea delegate functor used to process hits, must implement the following interface:
struct Delegate
{
// process a series of hits identified by their SA range
void operator() (const uint2 range);
}
Parameters
fmithe FM-index
patternthe search pattern
lenthe pattern length
seedthe length of the seed to search with exact matching
mismatchesthe maximum number of mismatches allowed
stackthe stack storage
delegatethe delegate functor invoked on hits

Definition at line 62 of file backtrack.h.

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type inv_psi ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  i 
)
related

computes the inverse psi function at a given index

Parameters
fmiFM-index
iquery index
Returns
base inverse psi function value and offset
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type locate ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  i 
)
related

return the linear coordinate of the suffix that prefixes the i-th row of the BWT matrix.

Parameters
fmiFM-index
iquery index
Returns
position of the suffix that prefixes the query index
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type locate_ssa_iterator ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  i 
)
related

return the position of the suffix that prefixes the i-th row of the BWT matrix in the sampled SA, and its relative offset

Parameters
fmiFM-index
iquery index
Returns
a pair formed by the position of the suffix that prefixes the query index in the sampled SA and its relative offset
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type lookup_ssa_iterator ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  it 
)
related

return the position of the suffix that prefixes the i-th row of the BWT matrix.

Parameters
fmiFM-index
iteriterator to the sampled SA
Returns
final linear coordinate
template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type match ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const Iterator  pattern,
const uint32  pattern_len 
)
related

return the range of occurrences of a pattern in the given FM-index.

Parameters
fmiFM-index
patternquery string
pattern_lenquery string length

Definition at line 286 of file fmindex_inl.h.

template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type match ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const Iterator  pattern,
const uint32  pattern_len,
const typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  range 
)
related

return the range of occurrences of a pattern in the given FM-index.

Parameters
fmiFM-index
patternquery string
pattern_lenquery string length
rangestart range
template<typename TRankDictionary , typename TSuffixArray , typename TL2 , typename Iterator >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type match_reverse ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
const Iterator  pattern,
const uint32  pattern_len 
)
related

return the range of occurrences of a reversed pattern in the given FM-index.

Parameters
fmiFM-index
patternquery string
pattern_lenquery string length

Definition at line 355 of file fmindex_inl.h.

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type rank ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  k,
uint8  c 
)
related

return the number of occurrences of c in the range [0,k] of the given FM-index.

Parameters
fmiFM-index
krange search delimiter
cquery character
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type rank ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  range,
uint8  c 
)
related

return the number of occurrences of c in the ranges [0,l] and [0,r] of the given FM-index.

Parameters
fmiFM-index
rangerange query [l,r]
cquery character
template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE TRankDictionary::vec4_type rank4 ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  k 
)
related

return the number of occurrences of all characters in the range [0,k] of the given FM-index.

Parameters
fmiFM-index
krange search delimiter

this function is deprecated: please use rank_all()

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void rank4 ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  range,
typename TRankDictionary::vec4_type *  outl,
typename TRankDictionary::vec4_type *  outh 
)
related

return the number of occurrences of all characters in the ranges [0,l] and [0,r] of the given FM-index.

Parameters
fmiFM-index
rangerange query [l,r]
outlfirst output
outhsecond output

this function is deprecated: please use rank_all()

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void nvbio::rank_all ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::index_type  k,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *  out 
)

return the number of occurrences of all characters in the range [0,k] of the given FM-index.

Parameters
fmiFM-index
krange search delimiter

Definition at line 199 of file fmindex_inl.h.

template<typename TRankDictionary , typename TSuffixArray , typename TL2 >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void nvbio::rank_all ( const fm_index< TRankDictionary, TSuffixArray, TL2 > &  fmi,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::range_type  range,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *  outl,
typename fm_index< TRankDictionary, TSuffixArray, TL2 >::vector_type *  outh 
)

return the number of occurrences of all characters in the ranges [0,l] and [0,r] of the given FM-index.

Parameters
fmiFM-index
rangerange query [l,r]
outlfirst output
outhsecond output

Definition at line 237 of file fmindex_inl.h.

template<typename system_tag , typename fm_index_type >
uint32 nvbio::string_batch_bound ( const MEMFilter< system_tag, fm_index_type > &  filter,
const uint32  mem_count 
)

find the index i of the furthermost string such that filter.first_hit( j ) <= mem_count for each j < i

Definition at line 1537 of file mem_inl.h.