NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Modules | Namespaces | Classes | Enumerations | Functions
Alignment Module

Detailed Description

This module contains a series of functions to perform string alignment. The the kind of alignment performed is selected according to an Aligner.

Modules

 Alignment Type
 
 Algorithm Tags
 
 Aligner Tags
 
 Aligners
 
 Batch Schedulers
 
 Batch Alignments
 
 Utilities
 
 Alignment Sinks
 
 AlignmentDetail
 

Namespaces

 nvbio::aln
 

Classes

struct  nvbio::aln::Alignment< ScoreType >
 
struct  nvbio::bowtie2::cuda::RoundedQualCost< T >
 
struct  nvbio::bowtie2::cuda::QualCost< T >
 
struct  nvbio::bowtie2::cuda::ConstantCost< T >
 
struct  nvbio::bowtie2::cuda::EditDistanceScoringScheme
 
struct  nvbio::bowtie2::cuda::SmithWatermanScoringScheme< MMCost, NCost >
 
struct  nvbio::bowtie2::cuda::make_aligner_dispatch< TYPE, scheme_type >
 
struct  nvbio::bowtie2::cuda::make_aligner_dispatch< LocalAlignment, scheme_type >
 
struct  nvbio::bowtie2::cuda::make_aligner_dispatch< EndToEndAlignment, scheme_type >
 
struct  nvbio::bowtie2::cuda::UberScoringScheme
 
struct  nvbio::bowtie2::cuda::ScoringSchemeSelector< ScoringTagType >
 
struct  nvbio::bowtie2::cuda::ScoringSchemeSelector< edit_distance_scoring_tag >
 
struct  nvbio::bowtie2::cuda::ScoringSchemeSelector< smith_waterman_scoring_tag >
 
struct  nvbio::bowtie2::cuda::ScoringSchemeTag< ScoringSchemeType >
 
struct  nvbio::bowtie2::cuda::ScoringSchemeTag< EditDistanceScoringScheme >
 
struct  nvbio::bowtie2::cuda::ScoringSchemeTag< SmithWatermanScoringScheme< MMCost, NCost > >
 

Enumerations

enum  nvbio::aln::DirectionVector {
  nvbio::aln::SUBSTITUTION = 0u, nvbio::aln::INSERTION = 1u, nvbio::aln::DELETION = 2u, nvbio::aln::SINK = 3u,
  nvbio::aln::INSERTION_EXT = 4u, nvbio::aln::DELETION_EXT = 8u, nvbio::aln::HMASK = 3u, nvbio::aln::EMASK = 4u,
  nvbio::aln::FMASK = 8u
}
 
enum  nvbio::aln::State { nvbio::aln::HSTATE = 0, nvbio::aln::ESTATE = 1, nvbio::aln::FSTATE = 2 }
 
enum  nvbio::bowtie2::cuda::AlignmentType { nvbio::bowtie2::cuda::EndToEndAlignment = 0, nvbio::bowtie2::cuda::LocalAlignment = 1 }
 
enum  nvbio::bowtie2::cuda::CostType { nvbio::bowtie2::cuda::ROUNDED_QUAL_COST = 1, nvbio::bowtie2::cuda::QUAL_COST = 2, nvbio::bowtie2::cuda::CONSTANT_COST = 3 }
 

Functions

template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE bool 
nvbio::aln::banded_alignment_score (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, sink_type &sink)
 
template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE bool 
nvbio::aln::banded_alignment_score (const aligner_type aligner, const pattern_string pattern, const text_string text, const int32 min_score, sink_type &sink)
 
template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE int32 
nvbio::aln::banded_alignment_score (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score)
 
template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE int32 
nvbio::aln::banded_alignment_score (const aligner_type aligner, const pattern_string pattern, const text_string text, const int32 min_score)
 
template<uint32 BAND_LEN, uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type , typename checkpoints_type , typename submatrix_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE Alignment
< int32 > 
nvbio::aln::banded_alignment_traceback (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, backtracer_type &backtracer, checkpoints_type checkpoints, submatrix_type submatrix)
 
template<typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type , typename column_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE bool 
nvbio::aln::alignment_score (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, sink_type &sink, column_type column)
 
template<uint32 MAX_TEXT_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE bool 
nvbio::aln::alignment_score (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, sink_type &sink)
 
template<uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type , typename checkpoints_type , typename submatrix_type , typename column_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE Alignment
< int32 > 
nvbio::aln::alignment_traceback (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, backtracer_type &backtracer, checkpoints_type checkpoints, submatrix_type submatrix, column_type column)
 
template<uint32 MAX_PATTERN_LEN, uint32 MAX_TEXT_LEN, uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type >
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE Alignment
< int32 > 
nvbio::aln::alignment_traceback (const aligner_type aligner, const pattern_string pattern, const qual_string quals, const text_string text, const int32 min_score, backtracer_type &backtracer)
 
template<AlignmentType TYPE, typename scheme_type >
make_aligner_dispatch< TYPE,
scheme_type >::type 
nvbio::bowtie2::cuda::make_aligner (const scheme_type &scheme)
 
SmithWatermanScoringScheme nvbio::bowtie2::cuda::load_scoring_scheme (const char *name, const AlignmentType type)
 

Enumeration Type Documentation

Enumerator
EndToEndAlignment 
LocalAlignment 

Definition at line 53 of file scoring.h.

Enumerator
ROUNDED_QUAL_COST 
QUAL_COST 
CONSTANT_COST 

Definition at line 58 of file scoring.h.

A representation of the DP direction flow vectors

Enumerator
SUBSTITUTION 
INSERTION 
DELETION 
SINK 
INSERTION_EXT 
DELETION_EXT 
HMASK 
EMASK 
FMASK 

Definition at line 139 of file alignment_base.h.

A representation of the Gotoh traceback matrix states

Enumerator
HSTATE 
ESTATE 
FSTATE 

Definition at line 154 of file alignment_base.h.

Function Documentation

template<typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type , typename column_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool nvbio::aln::alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
sink_type &  sink,
column_type  column 
)

Compute the alignment score between a pattern and a text string with full DP alignment.

This is a low-level function, requiring all needed temporary storage to be passed from the caller. The purpose is allowing the caller to allocate such storage (possibly among kernel threads) using different strategies.

Example:

const_string text = make_string("AAAAGGGTGCTCAA");
const_string pattern = make_string("GGGTGCTCAA");
typedef aln::GotohAligner<aln::LOCAL,aln::SimpleGotohScheme> aligner_type;
typedef aln::column_storage_type<aligner_type>::type cell_type;
cell_type column[14]; // a column as big as the text
const aln::SimpleGotohScheme scoring( // build a Gotoh scoring scheme
2, // match bonus
-1, // mismatch penalty
-1, // gap open penalty
-1 ) // gap extension penalty
aln::Best2Sink<int32> best2; // keep the best 2 scores
aln::make_gotoh_aligner<aln::LOCAL>( scoring ), // build a local Gotoh aligner
pattern, // pattern string
aln::trivial_quality_string(), // pattern qualities
text, // text string
-255, // minimum accepted score
best2, // alignment sink
column ); // temporary column storage
Template Parameters
aligner_typean Aligner algorithm
pattern_stringa string representing the pattern.
qual_stringan array representing the pattern qualities.
text_stringa string representing the text.
sink_typean Alignment Sink.
column_typean array-like class defining operator[], used to represent a matrix column; the type of the matrix cells depends on the aligner, and can be obtained as typename column_storage_type<aligner_type>::type
Parameters
aligneralignment algorithm
patternpattern string
qualsquality string
texttext string
min_scorethreshold alignment score
sinkoutput sink
columntemporary storage for a matrix column, must be at least as large as the text

Definition at line 65 of file alignment_inl.h.

template<uint32 MAX_TEXT_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool nvbio::aln::alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
sink_type &  sink 
)

Compute the alignment score between a pattern and a text string with full DP alignment.

This is a high level function, allocating all necessary temporary storage in local memory. In order to size the allocations properly, the function requires compile-time knowledge of the maximum text length.

Example:

const_string text = make_string("AAAAGGGTGCTCAA");
const_string pattern = make_string("GGGTGCTCAA");
const aln::SimpleGotohScheme scoring( // build a Gotoh scoring scheme
2, // match bonus
-1, // mismatch penalty
-1, // gap open penalty
-1 ) // gap extension penalty
aln::Best2Sink<int32> best2; // keep the best 2 scores
aln::alignment_score<14>( // MAX_TEXT_LEN
aln::make_gotoh_aligner<aln::LOCAL>( scoring ), // build a local Gotoh aligner
pattern, // pattern string
aln::trivial_quality_string(), // pattern qualities
text, // text string
-255, // minimum accepted score
best2 ); // alignment sink
Template Parameters
MAX_TEXT_LENspecifies the maximum text length, used to allocate statically the necessary temporary storage.
Parameters
aligneralignment algorithm
patternpattern string
qualsquality string
texttext string
min_scorethreshold alignment score
sinkoutput sink

Definition at line 103 of file alignment_inl.h.

template<uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type , typename checkpoints_type , typename submatrix_type , typename column_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Alignment< int32 > nvbio::aln::alignment_traceback ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
backtracer_type &  backtracer,
checkpoints_type  checkpoints,
submatrix_type  submatrix,
column_type  column 
)

Backtrace an optimal alignment using a full DP algorithm.

This is a low-level function, requiring all needed temporary storage to be passed from the caller. The purpose is allowing the caller to allocate such storage (possibly among kernel threads) using different strategies.

Example:

const uint32 TEXT_LEN = 64;
const uint32 PATTERN_LEN = 16;
const_string text = make_random_string(TEXT_LEN);
const_string pattern = make_random_string(PATTERN_LEN);
typedef aln::GotohAligner<aln::LOCAL,aln::SimpleGotohScheme> aligner_type;
typedef aln::column_storage_type<aligner_type>::type cell_type;
const uint32 CHECKPOINTS = 16;
// column storage
cell_type column[TEXT_LEN]; // a column as big as the text
// checkpoints storage
cell_type checkpoints[TEXT_LEN*util::divide_ri(PATTERN_LEN,CHECKPOINTS) ];
// submatrix storage (packed using a few bits per cell)
typedef PackedStream<uint32*,uint8,SUBMATRIX_BITS,false> submatrix_type;
uint32 submatrix_storage[ util::divide_ri(TEXT_LEN*CHECKPOINTS*SUBMATRIX_BITS,32) ];
submatrix_type submatrix( submatrix_storage );
const aln::SimpleGotohScheme scoring( // build a Gotoh scoring scheme
2, // match bonus
-1, // mismatch penalty
-1, // gap open penalty
-1 ) // gap extension penalty
MyBacktracer backtracer; // my backtracing context
aln::make_gotoh_aligner<aln::LOCAL>( scoring ), // build a local Gotoh aligner
pattern, // pattern string
aln::trivial_quality_string(), // pattern qualities
text, // text string
-255, // minimum accepted score
backtracer, // backtracing context
checkpoints, // temporary checkpoints storage
submatrix, // temporary submatrix storage
column ); // temporary column storage
Template Parameters
CHECKPOINTSnumber of DP rows between each checkpoint
aligner_typean Aligner algorithm
pattern_stringa string representing the pattern.
qual_stringan array representing the pattern qualities.
text_stringa string representing the text.
checkpoints_typean array-like class defining operator[], used to represent a reduced DP score matrix, containing all the matrix columns whose index is a multiple of CHECKPOINTS; the type of the matrix cells depends on the aligner, and can be obtained as typename checkpoint_storage_type<aligner_type>::type; the array must contain at least text.length()*((pattern.length() + CHECKPOINTS-1)/CHECKPOINTS) entries
submatrix_typean array-like class defining operator[], used to represent a temporary DP flow submatrix, containing all the matrix flow cells between two checkpointed columns. the number of bits needed for the submatrix cells depends on the aligner, and can be obtained as direction_vector_traits<aligner_type>::BITS
backtracer_typea model of Backtracer Model.
column_typean array-like class defining operator[], used to represent a matrix column; the type of the matrix cells depends on the aligner, and can be obtained as typename column_storage_type<aligner_type>::type
Parameters
aligneralignment algorithm
patternpattern to be aligned
qualspattern quality scores
texttext to align the pattern to
min_scoreminimum accepted score
backtracerbacktracking delegate
checkpointstemporary checkpoints storage
submatrixtemporary submatrix storage
columntemporary storage for a matrix column, must be at least as large as the text
Returns
reported alignment

Definition at line 365 of file alignment_inl.h.

template<uint32 MAX_PATTERN_LEN, uint32 MAX_TEXT_LEN, uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Alignment< int32 > nvbio::aln::alignment_traceback ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
backtracer_type &  backtracer 
)

Backtrace an optimal alignment using a full DP algorithm.

This is a high level function, allocating all necessary temporary storage in local memory. In order to size the allocations properly, the function requires compile-time knowledge of the maximum pattern and text length.

Template Parameters
MAX_PATTERN_LENmaximum pattern length
MAX_TEXT_LENmaximum text length
CHECKPOINTSnumber of DP rows between each checkpoint
aligner_typean Aligner algorithm
pattern_stringa string representing the pattern.
qual_stringan array representing the pattern qualities.
text_stringa string representing the text.
backtracer_typea model of Backtracer Model.
Parameters
aligneralignment algorithm
patternpattern to be aligned
qualspattern quality scores
texttext to align the pattern to
min_scoreminimum accepted score
backtracerbacktracking delegate
Returns
reported alignment

Definition at line 498 of file alignment_inl.h.

template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool nvbio::aln::banded_alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
sink_type &  sink 
)

Compute the alignment score between a pattern and a text string with banded DP alignment.

Template Parameters
BAND_LENsize of the DP band
Parameters
aligneralignment algorithm
patternpattern string
qualsquality string
texttext string
min_scorethreshold alignment score
sinkoutput sink

Definition at line 58 of file banded_inl.h.

template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool nvbio::aln::banded_alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const text_string  text,
const int32  min_score,
sink_type &  sink 
)

Compute the alignment score between a pattern and a text string with banded DP alignment.

Template Parameters
BAND_LENsize of the DP band
Parameters
aligneralignment algorithm
patternpattern string
texttext string
min_scorethreshold alignment score
sinkoutput sink

Definition at line 92 of file banded_inl.h.

template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 nvbio::aln::banded_alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score 
)

Compute the alignment score between a pattern and a text string with banded DP alignment.

Template Parameters
BAND_LENsize of the DP band
Parameters
aligneralignment algorithm
patternpattern string
qualsquality string
texttext string
min_scorethreshold alignment score
Returns
best alignment score

Definition at line 126 of file banded_inl.h.

template<uint32 BAND_LEN, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename sink_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 nvbio::aln::banded_alignment_score ( const aligner_type  aligner,
const pattern_string  pattern,
const text_string  text,
const int32  min_score 
)

Compute the alignment score between a pattern and a text string with banded DP alignment.

Template Parameters
BAND_LENsize of the DP band
Parameters
aligneralignment algorithm
patternpattern string
texttext string
min_scorethreshold alignment score
Returns
best alignment score

Definition at line 160 of file banded_inl.h.

template<uint32 BAND_LEN, uint32 CHECKPOINTS, typename aligner_type , typename pattern_string , typename qual_string , typename text_string , typename backtracer_type , typename checkpoints_type , typename submatrix_type >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Alignment< int32 > nvbio::aln::banded_alignment_traceback ( const aligner_type  aligner,
const pattern_string  pattern,
const qual_string  quals,
const text_string  text,
const int32  min_score,
backtracer_type &  backtracer,
checkpoints_type  checkpoints,
submatrix_type  submatrix 
)

Backtrace an alignment using a banded DP algorithm.

Template Parameters
BAND_LENsize of the DP band
CHECKPOINTSnumber of DP rows between each checkpoint
aligner_typean Aligner algorithm
pattern_stringa string representing the pattern.
qual_stringan array representing the pattern qualities.
text_stringa string representing the text.
checkpoints_typean array-like class defining operator[], used to represent a reduced DP score matrix, containing all the matrix columns whose index is a multiple of CHECKPOINTS; the type of the matrix cells depends on the aligner, and can be obtained as typename checkpoint_storage_type<aligner_type>::type
submatrix_typean array-like class defining operator[], used to represent a temporary DP flow submatrix, containing all the matrix flow cells between two checkpointed columns. the number of bits needed for the submatrix cells depends on the aligner, and can be obtained as direction_vector_traits<aligner_type>::BITS
backtracer_typea model of Backtracer Model.
Parameters
aligneralignment algorithm
patternpattern to be aligned
qualspattern quality scores
texttext to align the pattern to
min_scoreminimum accepted score
backtracerbacktracking delegate
checkpointstemporary checkpoints storage
submatrixtemporary submatrix storage
Returns
reported alignment

Definition at line 363 of file banded_inl.h.

SmithWatermanScoringScheme nvbio::bowtie2::cuda::load_scoring_scheme ( const char *  name,
const AlignmentType  type 
)
inline

load a Smith-Waterman scoring scheme from disk

Definition at line 37 of file scoring_inl.h.

template<AlignmentType TYPE, typename scheme_type >
make_aligner_dispatch<TYPE,scheme_type>::type nvbio::bowtie2::cuda::make_aligner ( const scheme_type &  scheme)

a helper function to make an aligner given the AlignmentType (LocalAlignment|EndToEndAlignment) and the scheme (EditDistanceScoringScheme|SmithWatermanScoringScheme)

Definition at line 374 of file scoring.h.