NVBIO
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
assembly_graph_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 #include <fstream>
30 #include "kmers.h"
31 
32 #define TOPO_SORT_LOCAL_NODE_SET_SIZE 200
33 #define MAX_IN_DEGREE 20
34 
36 {
40 };
41 
43 {
46 
48  void operator() (const uint32 graph_id)
49  {
50  const uint32 source = graph.subgraph_source_ids[graph_id];
51  const uint32 n_nodes = graph.subgraph_n_nodes[graph_id];
52  uint32 L_offset = graph.topo_sorted_offsets[graph_id]; // ordered UID output offset
53  uint32 n_sorted = 0; // number of nodes sorted so far
54 
55  uint32 S[TOPO_SORT_LOCAL_NODE_SET_SIZE]; // set of nodes with in-degree 0
56  S[0] = source;
57  uint32 s_first = 0;
58  uint32 s_last = 1;
59 
60  while(s_last - s_first != 0) {
61  const uint32 n = S[s_first];
62  // remove from S
63  s_first++;
64 
65  // add to ordered list
66  graph.topo_sorted_uids[L_offset + n_sorted] = n;
67  n_sorted++;
68 
69  // traverse the out neighbors
70  uint32 out_degree = graph.node_out_degrees[n];
71  uint32 nbr_offset = graph.node_adj_offests[n];
72  for(uint32 i = 0; i < out_degree; i++) {
73  uint32 m = graph.node_adjancency_map[nbr_offset + i];
75  if(graph.node_in_degrees[m] == 0) {
76  // add to S
77  if(s_first > 0) {
78  s_first--;
79  S[s_first] = m;
80  } else {
81  if(s_last >= TOPO_SORT_LOCAL_NODE_SET_SIZE) {
82  printf("ERROR: Topological sort exceeded max locally allocated set size!\n");
83  graph.cycle_presence[graph_id] = true; //TODO: handle this case
84  }
85  S[s_last] = m;
86  s_last++;
87  }
88  }
89  }
90  }
91 
92  if(n_sorted != n_nodes) {
93  printf("Found cycle! %u %u \n", n_sorted, n_nodes);
94  graph.cycle_presence[graph_id] = true; // found a cycle!
95  }
96  graph.cycle_presence[graph_id] = false;
97  }
98 };
99 
101 {
102  const uint32 k;
104 
106  find_k_best_paths_functor(debruijn_graph::view _graph, const uint32 _k, uint32* _topk_scores) :
107  graph_functor(_graph), k(_k), topk_scores(_topk_scores) { }
108 
110  void operator() (const uint32 graph_id)
111  {
112  const uint32 n_nodes = graph.subgraph_n_nodes[graph_id];
113  const uint32 order_offset = graph.topo_sorted_offsets[graph_id];
114 
115  for(uint32 i = 0; i < n_nodes; i++) { // for each node in topological order
116  const uint32 n = graph.topo_sorted_uids[order_offset + i];
117  const uint32 in_degree = graph.node_in_degrees[n];
118  const uint32 in_nbr_offset = graph.node_in_adj_offests[n];
119 
120  // compute the top k paths to n
121  uint8 nbr_top_offsets[MAX_IN_DEGREE]; // keep track of how many paths consumed per in-neighbor
122  for(uint32 j = 0; j < k; j++) {
123  uint32 max_score = 0;
124  bool set_score = false;
125  for(uint32 v = 0; v < in_degree; v++) {
126  const uint32 m = graph.node_in_adjancency_map[in_nbr_offset + v];
127  uint32 j_score_offset = nbr_top_offsets[v]*n_nodes + m;
128  if(topk_scores[j_score_offset] == 0) continue;
129  set_score = true;
130 
131  // find edge weight (TODO: pre-compute this)
132  uint32 edge_weight = 0;
133  for(uint32 l = 0; l < graph.node_out_degrees[m]; l++) {
135  edge_weight = graph.edge_weights[graph.node_adj_offests[m + l]];
136  break;
137  }
138  }
139 
140  uint32 new_score = topk_scores[j_score_offset] + edge_weight;
141  if(new_score > max_score) {
142  max_score = new_score;
143  nbr_top_offsets[v]++;
144  }
145  }
146  if(set_score) {
147  topk_scores[j*n_nodes + n] = max_score;
148  // TODO: keep pointers
149  } else {
150  break; // consumed all the paths to this node
151  }
152  }
153  }
154  }
155 };
156 
158 {
161 
163  void operator() (const uint32 node_uid)
164  {
165  const uint32 nbr_offset = graph.node_adjancency_map[node_uid];
166  const uint32 out_degree = graph.node_in_degrees[node_uid];
167 
168  uint32 total_support = 0; // TODO: compute as a reduction
169  for(uint32 i = 0; i < out_degree; i++) {
170  total_support += graph.edge_counts[nbr_offset + i];
171  }
172 
173  for(uint32 i = 0; i < out_degree; i++) {
174 
175  graph.edge_weights[nbr_offset + i] = log10((float)graph.edge_counts[nbr_offset + i]) - log10((float)total_support);
176  }
177 
178  }
179 };
180 
181 struct flip_edges : public graph_functor
182 {
185 
186  NVBIO_DEVICE // TODO: add atomicCAS to atomics.h for both host and device
187  void operator() (const uint32 node_uid)
188  {
189  const uint32 nbr_offset = graph.node_adjancency_map[node_uid];
190  const uint32 n_nbrs = graph.node_in_degrees[node_uid];
191 
192  for(uint32 i = 0; i < n_nbrs; i++) {
193  const uint32 nbr_uid = graph.node_adjancency_map[nbr_offset + i];
194  const uint32 offset = graph.node_in_adj_offests[nbr_uid];
195  // atomically get an index
196  uint32 idx = 0;
197  uint32 val = atomicCAS(&graph.node_in_adjancency_map[idx], (uint32) -1, node_uid);
198  while(val != (uint32) -1) {
199  idx++;
200  val = atomicCAS(&graph.node_in_adjancency_map[idx], (uint32) -1, node_uid);
201  }
202  }
203 
204  }
205 };
206 
207 // converts a coordinate to the corresponding sequence -- used for debugging
208 template <typename string_set_type>
210 {
211  typedef typename string_set_type::string_type sequence;
212  const string_set_type string_set;
216 
218  coord_to_seq(const uint32 _kmer_size, const string_set_type _string_set, const SequenceSetKmerCoord* _nodes, uint8* _sequences) :
219  kmer_size(_kmer_size), string_set(_string_set), nodes(_nodes),
220  sequences(_sequences) {}
221 
223  void operator() (const uint64 node_idx) const
224  {
225  const SequenceSetKmerCoord node_coord = nodes[node_idx];
226  const sequence seq = string_set[node_coord.x];
227  const uint32 seq_pos = node_coord.y;
228  const uint64 offset = kmer_size * node_idx;
229 
230  for (uint32 i = 0; i < kmer_size; i++) {
231  uint8 c = dna_to_char(seq[seq_pos + i]);
232  sequences[offset + i] = c;
233  }
234  }
235 };
236 
237 
238 
239 template <typename string_set_type>
241 {
242  const string_set_type string_set;
250 
252  extract_active_region_source_sink(const string_set_type _string_set, const uint32 _kmer_size,
253  const SequenceSetKmerCoord* _coords,
254  const uint32* _seq_active_region_ids, const uint32* _global_to_UID_map, const uint32* _global_to_sorted_map,
255  uint32* _source_ids, uint32* _sink_ids):
256  string_set(_string_set), kmer_size(_kmer_size),
257  coords(_coords), seq_active_region_ids(_seq_active_region_ids),
258  global_to_UID_map(_global_to_UID_map), global_to_sorted_map(_global_to_sorted_map),
259  source_ids(_source_ids), sink_ids(_sink_ids) { }
260 
262  void operator() (const uint32 global_coord_idx) const
263  {
264  const SequenceSetKmerCoord source_coord = coords[global_to_sorted_map[global_coord_idx]];
265 
266  // determine if the coord is the source coord of a reference sequence (first seq in each region)
267  if(global_coord_idx != 0 && coords[global_to_sorted_map[global_coord_idx - 1]].w == source_coord.w) return;
268  // set the source
269  source_ids[source_coord.w] = global_to_UID_map[source_coord.z];
270 
271  // find and set the sink
272  const uint32 seq_len = string_set[source_coord.x].length();
273  const uint32 n_kmers = seq_len - kmer_size + 1;
274  const SequenceSetKmerCoord sink_coord = coords[global_to_sorted_map[global_coord_idx + n_kmers - 1]]; // last coord of the ref
275  sink_ids[sink_coord.w] = global_to_UID_map[sink_coord.z];
276  }
277 };
278 
279 // construct a debruijn assembly graph with the given kmer length
280 bool debruijn_graph::construct_graph(const D_SequenceSet& d_sequence_set, const D_VectorU32& d_active_region_ids,
281  const uint32 _kmer_size, bool allow_low_complexity)
282 {
283  kmer_size = _kmer_size;
284  D_KmerSet<D_SequenceSet> graph_kmer_set(d_sequence_set, d_active_region_ids);
285 
286  // --- NODE extraction ---
287  graph_kmer_set.kmer_size = kmer_size;
288  graph_kmer_set.gen_kmer_coords();
289  graph_kmer_set.init_alloc_temp_space();
290  graph_kmer_set.gen_kmer_64b_keys();
291  graph_kmer_set.sort_kmers_by_64b_keys();
292  graph_kmer_set.partition_kmers_by_uniqueness();
293  graph_kmer_set.gen_global_to_sorted_id_map();
294  graph_kmer_set.mark_unique_kmers();
295  graph_kmer_set.extract_super_kmers(); // handle kmers that are repeats
297  graph_kmer_set.string_set,
298  graph_kmer_set.n_super_coords,
299  graph_kmer_set.super_coords,
300  graph_kmer_set.super_prefix_uids,
301  graph_kmer_set.super_suffix_uids);
302 
303  // extract repeat kmer nodes (as well as U-R, R-R, and R-U repeat edges)
304  D_VectorU32 repeat_edges_from_uids;
305  D_VectorU32 repeat_edges_to_uids;
306  D_VectorU32 repeat_edge_counts;
307  graph_kmer_set.collapse_and_extract_non_overlaps(nodes, repeat_edges_from_uids, repeat_edges_to_uids, repeat_edge_counts);
308 
309  // extract unique kmer nodes
310  thrust::copy_n(
311  thrust::make_permutation_iterator(
312  thrust::make_permutation_iterator( // unique coordinates
313  graph_kmer_set.coords.begin(),
314  graph_kmer_set.kmers_64b_unique_idxs.begin()),
315  thrust::make_permutation_iterator(
316  graph_kmer_set.global_to_UID_map.begin(), // global id -> UID
317  thrust::make_transform_iterator( // coord -> global ids
318  thrust::make_permutation_iterator( // unique coordinates
319  graph_kmer_set.coords.begin(),
320  graph_kmer_set.kmers_64b_unique_idxs.begin()),
321  get_global_id()))),
322  graph_kmer_set.n_unique,
323  nodes.begin());
324 
325  // extract the source and sink subgraph nodes (for each active region)
326  /*n_subgraphs = d_active_region_ids[d_active_region_ids.size()-1] + 1;
327  subgraph_source_ids.resize(n_subgraphs);
328  subgraph_sink_ids.resize(n_subgraphs);
329  thrust::for_each(
330  thrust::make_counting_iterator(0u),
331  thrust::make_counting_iterator(0u) + graph_kmer_set.n_kmers,
332  extract_active_region_source_sink<D_SequenceSet>(d_sequence_set, kmer_size,
333  plain_view(graph_kmer_set.coords), plain_view(d_active_region_ids),
334  plain_view(graph_kmer_set.global_to_UID_map), plain_view(graph_kmer_set.global_to_sorted_id_map),
335  plain_view(subgraph_source_ids), plain_view(subgraph_sink_ids)));*/
336 
337  printf("Number of kmers %u \n", graph_kmer_set.n_kmers);
338  printf("Number distinct kmers %u \n", graph_kmer_set.n_distinct);
339  printf("Number unique kmers %u \n", graph_kmer_set.n_unique);
340 
341  // --- ADJACENCY MAP construction ---
342  uint32 n_unique_nodes = graph_kmer_set.n_unique;
343  n_nodes = graph_kmer_set.n_unique + graph_kmer_set.n_repeat;
344  node_adj_offests.resize(n_nodes + 1);
345  node_out_degrees.resize(n_nodes); // number of outgoing edges per nodes
346 
347  // generate edge (k+1)mers
348  graph_kmer_set.kmer_size = kmer_size + 1;
349  graph_kmer_set.gen_kmer_coords();
350  graph_kmer_set.filter_coords_by_prefix_uniqueness(graph_kmer_set.global_unique_flag_map); // keep only the edges starting with a unique kmer
351  graph_kmer_set.gen_kmer_64b_keys();
352  graph_kmer_set.segmented_sort_kmers_by_64b_keys(); // to guarantee same prefix edges are consecutive by region
353  graph_kmer_set.count_kmers(); // U-U and U-R edge counts
354 
355  // get the prefix node UID of each U-U and U-R edge
356  D_VectorU32 unique_prefix_node_uids(graph_kmer_set.n_unique);
359  graph_kmer_set.kmers_64b_unique_idxs.begin(),
362  graph_kmer_set.kmers_64b_unique_idxs.begin() + graph_kmer_set.n_unique,
364  unique_prefix_node_uids.begin(),
365  global_to_uid(plain_view(graph_kmer_set.global_to_UID_map)));
366 
367  D_VectorU32 unique_suffix_node_uids(graph_kmer_set.n_unique); // TODO: fuse with above
370  graph_kmer_set.kmers_64b_unique_idxs.begin(),
373  graph_kmer_set.kmers_64b_unique_idxs.begin() + graph_kmer_set.n_unique,
375  unique_suffix_node_uids.begin(),
376  global_to_uid(plain_view(graph_kmer_set.global_to_UID_map)));
377 
378  // count the number of outgoing edges per unique kmer (i.e. of type U-U and U-R)
379  // reduce by UID (edges starting with the same unique kmer will be consecutive)
380  D_VectorU32 unique_prefix_uids_idx(n_unique_nodes);
381  D_VectorU32 unique_prefix_counts(n_unique_nodes);
382  uint32 n_unique_prefix_uids = thrust::reduce_by_key(
383  thrust::make_counting_iterator(0),
384  thrust::make_counting_iterator(0) + graph_kmer_set.n_unique,
385  thrust::constant_iterator<uint32>(1),
386  unique_prefix_uids_idx.begin(),
387  unique_prefix_counts.begin(),
388  kmer_uid_eq(plain_view(unique_prefix_node_uids))).first - unique_prefix_uids_idx.begin();
389 
390  // scatter counts based on UIDs
391  thrust::scatter(
392  unique_prefix_counts.begin(),
393  unique_prefix_counts.begin() + n_unique_prefix_uids,
394  thrust::make_permutation_iterator(
395  unique_prefix_node_uids.begin(),
396  unique_prefix_uids_idx.begin()),
397  node_out_degrees.begin());
398 
399  // count the number of outgoing edges per repeat kmer (i.e. of type U-R, R-R and R-U)
400  thrust::sort_by_key(
401  repeat_edges_from_uids.begin(),
402  repeat_edges_from_uids.end(),
403  thrust::make_zip_iterator(thrust::make_tuple(repeat_edges_to_uids.begin(), repeat_edge_counts.begin())));
404 
405  D_VectorU32 repeat_prefix_uids_idx(repeat_edges_from_uids.size());
406  D_VectorU32 repeat_prefix_counts(repeat_edges_from_uids.size());
407  uint32 n_repeat_prefix_uids = thrust::reduce_by_key(
408  thrust::make_counting_iterator(0),
409  thrust::make_counting_iterator(0) + repeat_edges_from_uids.size(),
410  thrust::constant_iterator<uint32>(1),
411  repeat_prefix_uids_idx.begin(),
412  repeat_prefix_counts.begin(),
413  kmer_uid_eq (plain_view(repeat_edges_from_uids))).first - repeat_prefix_uids_idx.begin();
414 
415  // we need to discard the beginning of this vector corresponding to U-R edges
416  // by counting the number of unique prefixes
417  thrust::device_vector<uint8> temp_storage;
418  uint32 n_unique_prefixes = cuda::reduce(
419  n_repeat_prefix_uids,
421  thrust::make_permutation_iterator(
422  repeat_edges_from_uids.begin(),
423  repeat_prefix_uids_idx.begin()),
424  is_unique_uid(n_unique_nodes)),
425  thrust::plus<uint32>(),
426  temp_storage);
427 
428  // scatter counts based on UIDs
429  thrust::scatter(
430  repeat_prefix_counts.begin() + n_unique_prefixes,
431  repeat_prefix_counts.begin() + n_repeat_prefix_uids,
432  thrust::make_permutation_iterator(
433  repeat_edges_from_uids.begin(),
434  repeat_prefix_uids_idx.begin() + n_unique_prefixes),
435  node_out_degrees.begin());
436 
437  // prefix sum to get the offsets
439  n_nodes,
440  node_out_degrees.begin(),
441  node_adj_offests.begin() + 1,
442  thrust::plus<uint32>(),
443  temp_storage);
444 
445  // fill out the adjacency map: U-R, R-R, R-U edges
448  edge_counts.resize(n_edges);
450  repeat_prefix_uids_idx.begin(),
451  repeat_prefix_uids_idx.begin() + n_repeat_prefix_uids,
454  plain_view(repeat_edges_from_uids),
455  plain_view(repeat_edges_to_uids),
456  plain_view(repeat_edge_counts),
459 
460  // fill out U-U edges
462  unique_prefix_uids_idx.begin(),
463  unique_prefix_uids_idx.begin() + n_unique_prefix_uids,
466  plain_view(unique_prefix_node_uids),
467  plain_view(unique_suffix_node_uids),
468  plain_view(graph_kmer_set.kmers_counts),
471 
472 // // TODO: prune low weight chains
473 // // TODO: merge chains
474 
475  return true;
476 }
477 
478 // --- note: the functionality below has not yet been fully tested
479 
481 {
482  edge_weights.resize(n_edges);
484  thrust::make_counting_iterator(0u),
485  thrust::make_counting_iterator(0u) + n_nodes,
486  edge_weights_functor(*this));
487 }
488 
489 // compute the in-degree of each node in the graph
490 // the number of times a node UID occurs in the adjacency map
491 // is the node's in-degree
493 {
494  // sort the adjacency map
496  thrust::sort(adj_map.begin(), adj_map.end());
497 
498  D_VectorU32 distinct_node_UIDs(n_nodes);
499  D_VectorU32 counts(n_nodes);
500  uint32 n_distinct = thrust::reduce_by_key(
501  adj_map.begin(),
502  adj_map.end(),
503  thrust::make_constant_iterator<uint32>(1),
504  distinct_node_UIDs.begin(),
505  counts.begin()).first - distinct_node_UIDs.begin();
506 
507  node_in_degrees.resize(n_nodes);
508  thrust::scatter(
509  counts.begin(),
510  counts.begin() + n_distinct,
511  distinct_node_UIDs.begin(),
512  node_in_degrees.begin());
513 
514  // prefix sum to get the offsets
515  thrust::device_vector<uint8> temp_storage;
516  node_in_adj_offests.resize(n_nodes + 1);
518  n_nodes,
519  node_in_degrees.begin(),
520  node_in_adj_offests.begin() + 1,
521  thrust::plus<uint32>(),
522  temp_storage);
523 }
524 
526 {
527  // sort the adjacency map
529  thrust::fill(node_in_adjancency_map.begin(), node_in_adjancency_map.end(), (uint32) -1);
530 
532  thrust::make_counting_iterator(0u),
533  thrust::make_counting_iterator(0u) + n_nodes,
534  flip_edges(*this));
535 }
536 
538 {
539  // sort the adjacency map
540  D_VectorU32 active_region_ids(n_nodes);
542  nodes.begin(),
543  nodes.begin() + n_nodes,
544  active_region_ids.begin(),
545  get_kmer_reg_id());
546 
547  thrust::sort(active_region_ids.begin(), active_region_ids.end());
548 
549  D_VectorU32 active_regions(n_subgraphs);
550  D_VectorU32 counts(n_subgraphs);
551  uint32 n_distinct = thrust::reduce_by_key(
552  active_region_ids.begin(),
553  active_region_ids.end(),
554  thrust::make_constant_iterator<uint32>(1),
555  active_regions.begin(),
556  counts.begin()).first - active_regions.begin();
557 
559  thrust::scatter(
560  counts.begin(),
561  counts.begin() + n_distinct,
562  active_regions.begin(),
563  subgraph_n_nodes.begin());
564 
565  // prefix sum to get the offsets
566  thrust::device_vector<uint8> temp_storage;
567  topo_sorted_offsets.resize(n_subgraphs + 1);
569  n_subgraphs,
570  subgraph_n_nodes.begin(),
571  topo_sorted_offsets.begin() + 1,
572  thrust::plus<uint32>(),
573  temp_storage);
574 }
575 
577 {
578  //returns true if n_repeats * 4 > n_unique
579 }
580 
581 // performs a topological sort of the nodes in each subgraph
582 // for each subgraph: all nodes are on a path from source to sink
584 {
587 
588  topo_sorted_uids.resize(n_nodes);
589  cycle_presence.resize(n_subgraphs);
591  thrust::make_counting_iterator(0u),
592  thrust::make_counting_iterator(0u) + n_subgraphs,
593  topological_sort_functor(*this));
594 }
595 
597 {
600 
601  // allocate top k space for each node
602  D_VectorU32 topk_scores(k*n_nodes);
604  thrust::make_counting_iterator(0u),
605  thrust::make_counting_iterator(0u) + n_subgraphs,
606  find_k_best_paths_functor(*this, k, plain_view(topk_scores)));
607 }
608 
609 // writes a DOT graph representation
611 {
612  D_VectorU8 d_labels(kmer_size*n_nodes);
614  thrust::make_counting_iterator<uint64>(0u),
615  thrust::make_counting_iterator<uint64>(0u) + n_nodes,
617  H_VectorU8 labels = d_labels;
618 
619  std::ofstream dot_file;
620  dot_file.open("cuda_test_graph.txt");
621  for(uint64 i = 0; i < n_nodes; i++) {
622  uint32 n_out_edges = node_adj_offests[i+1] - node_adj_offests[i];
623  uint64 offset = node_adj_offests[i];
624 
625  uint64 label_offset = i*kmer_size;
626  std::string node_seq(labels.begin() + label_offset, labels.begin() + label_offset + kmer_size);
627 
628  for(uint32 j = 0; j < n_out_edges; j++) {
629  uint64 nbr_uid = node_adjancency_map[offset + j];
630  //uint32 count = edge_counts[offset + j];
631 
632  label_offset = nbr_uid*kmer_size;
633  std::string nbr_seq(labels.begin() + label_offset, labels.begin() + label_offset + kmer_size);
634 
635  dot_file << node_seq << "_" << i << "\t" << nbr_seq << "_" << nbr_uid << "\n";
636  }
637  }
638  dot_file.close();
639 
640  dot_file.open("cuda_test_graph.dot");
641 
642  dot_file << "digraph assemblyGraphs { \n";
643  for(uint64 i = 0; i < n_nodes; i++) {
644  uint32 n_out_edges = node_adj_offests[i+1] - node_adj_offests[i];
645  uint64 offset = node_adj_offests[i];
646 
647  uint64 label_offset = i*kmer_size;
648  std::string node_seq(labels.begin() + label_offset, labels.begin() + label_offset + kmer_size);
649 
650  for(uint32 j = 0; j < n_out_edges; j++) {
651  uint64 nbr_uid = node_adjancency_map[offset + j];
652  uint32 count = edge_counts[offset + j];
653 
654  label_offset = nbr_uid*kmer_size;
655  std::string nbr_seq(labels.begin() + label_offset, labels.begin() + label_offset + kmer_size);
656 
657  dot_file << node_seq << "_" << i << " -> " << nbr_seq << "_" << nbr_uid << " [label=\"" << count<< "\"];\n";
658  }
659  }
660 
661  for(uint64 i = 0; i < n_nodes; i++) {
662  uint64 offset = i*kmer_size;
663  std::string node_seq(labels.begin() + offset, labels.begin() + offset + kmer_size);
664  dot_file << node_seq << "_" << i << " [label=\"" << node_seq << "\",shape=box];\n";
665  }
666  dot_file << "}";
667  dot_file.close();
668 }