NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
assembly_graph.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 #include "assembly_types.h"
30 
31 using namespace nvbio;
32 
33 #define ASSEMBLY_MIN_BASE_QUALITY 6
34 
36 {
37  uint32 kmer_size; // kmer_size used to build the graph
41 
42  // global CSR representation
48  D_VectorU32 edge_counts; // edge multiplicities
49  D_VectorU32 edge_weights; // transitions probabilities
50  D_VectorU8 edge_ref_flags; // flags each edge if the reference haplotype contains it
51 
54 
55  // local active region subgraphs
59 
60  // topological sort
61  D_VectorU32 topo_sorted_uids; // topologically sorted nodes
63 
64  // properties
66  D_VectorU8 cycle_presence; // each sub-graph is marked 1 if it contains a cycle
67 
68 
69  // kernel view
70  struct view
71  {
72  D_VectorU32::plain_view_type node_adjancency_map;
73  D_VectorU32::plain_view_type node_adj_offests;
74  D_VectorU32::plain_view_type node_out_degrees;
75  D_VectorU32::plain_view_type node_in_degrees;
76  D_VectorU32::plain_view_type edge_counts;
77  D_VectorU32::plain_view_type edge_weights;
78 
79  D_VectorU32::plain_view_type subgraph_source_ids;
80  D_VectorU32::plain_view_type subgraph_sink_ids;
81  D_VectorU32::plain_view_type subgraph_n_nodes;
82 
83  D_VectorU32::plain_view_type topo_sorted_uids;
84  D_VectorU32::plain_view_type topo_sorted_offsets;
85 
86  D_VectorU8::plain_view_type cycle_presence;
87 
88  D_VectorU32::plain_view_type node_in_adjancency_map;
89  D_VectorU32::plain_view_type node_in_adj_offests;
90  };
91 
92  operator view() {
93  view v = {
94  plain_view(node_adjancency_map),
95  plain_view(node_adj_offests),
96  plain_view(node_out_degrees),
97  plain_view(node_in_degrees),
98  plain_view(edge_counts),
99  plain_view(edge_weights),
100  plain_view(subgraph_source_ids),
101  plain_view(subgraph_sink_ids),
102  plain_view(subgraph_n_nodes),
103  plain_view(topo_sorted_uids),
104  plain_view(topo_sorted_offsets),
105  plain_view(cycle_presence),
106  plain_view(node_in_adjancency_map),
107  plain_view(node_in_adj_offests)
108  };
109  return v;
110  }
111 
112 
113  // construction
114  bool construct_graph(const D_SequenceSet& sequence_set, const D_VectorU32& d_active_region_ids,
115  const uint32 kmer_size, const bool allow_low_complexity);
116  void compute_edge_weights();
117  void compute_in_degrees();
118  void compute_in_adjacencies();
119  void compute_subgraph_n_nodes();
120  void compute_complexity();
121 
122  // validation
123  bool is_low_complexity(const uint32 subgraph_id) { return low_complexity[subgraph_id];}
124  bool has_cycles(const uint32 subgraph_id) { return cycle_presence[subgraph_id];}
125 
126  // search
127  void topological_sort();
128  void find_k_best_paths(const uint32 k);
129 
130  // debugging
131  void print_dot_graph(const D_SequenceSet& sequence_set);
132 };
133 
134 #include "assembly_graph_inl.h"
135