41 struct get_kmer_seq_id :
public thrust::unary_function<SequenceSetKmerCoord,uint32>
60 struct get_kmer_size :
public thrust::unary_function<SequenceSetKmerCoord,uint32>
65 return kmer_coord.z - kmer_coord.y;
69 struct get_global_id :
public thrust::unary_function<SequenceSetKmerCoord,uint32>
119 return coord.z + coord.x;
137 return coord.z + coord.x;
155 return coord.z + coord.x + 1;
219 void operator() (
const thrust::tuple<uint32, SequenceSetKmerCoord> idx_coord)
const
221 const uint32 idx = thrust::get<0>(idx_coord);
228 template <
typename string_set_type>
231 typedef typename string_set_type::string_type
sequence;
246 const uint32 seq_pos = kmer_coord.y;
247 const uint32 seq_len = seq.length();
300 return ci.y == cj.y && ci.x == cj.x;
320 template <
typename string_set_type>
323 typedef typename string_set_type::string_type
sequence;
343 const uint32 kmer_pos = kmer_coord.y;
344 const uint32 kmer_size = kmer_coord.z - kmer_coord.y;
346 const uint32 seq_len = seq.length();
349 for (
uint32 j = 0; j < kmer_size; j++) {
362 thrust::tuple<uint32, uint32>
operator() (
const thrust::tuple<uint32, uint32>& x,
const thrust::tuple<uint32, uint32>& y)
const
364 return thrust::tuple<uint32, uint32>(thrust::get<0>(x) + thrust::get<0>(y), thrust::get<1>(x) + thrust::get<1>(y));
374 bool operator() (
const thrust::tuple<uint32, uint32>& nSeq_nOcc)
const
376 return thrust::get<0>(nSeq_nOcc) >= thrust::get<1>(nSeq_nOcc);
418 void operator() (
const thrust::tuple<uint32, uint32>& uid_kid)
const
420 const uint32 uid = thrust::get<0>(uid_kid);
421 const uint32 kid = thrust::get<1>(uid_kid);
483 const uint32* _unique_kmer_idxs,
uint8* _unique_flag_map,
uint32* _unique_UID_map) :
496 for(
uint32 i = 1; i < count; i++) {
514 void operator() (
const thrust::tuple<uint8, uint64>& uflag_prefix)
const
516 const uint8 is_unique_prefix = thrust::get<0>(uflag_prefix);
517 const uint32 kmer_id = thrust::get<1>(uflag_prefix);
519 if((is_unique_prefix == 1) && (kmer_id != (
uint32) -1)) {
531 template <
typename string_set_type>
534 typedef typename string_set_type::string_type
sequence;
550 const uint8* _is_unique_map,
const uint32* _global_UID_map,
const uint32* _sorted_idx_map,
563 const uint32 seq_len = seq.length();
570 while(i <= coord.y) {
572 super_coord.y = coord.y - i;
580 if(super_coord.y != (
uint32) -1) {
583 while(i < seq_len - coord.y -
kmer_size + 1) {
599 template <
typename string_set_type>
602 typedef typename string_set_type::string_type
sequence;
614 const uint32 kmer_pos = kmer_coord.y;
615 const uint32 kmer_size = kmer_coord.z - kmer_coord.y;
617 const uint32 seq_len = seq.length();
620 for (
uint32 k = 0; k < kmer_size; k++) {
624 kmer_seq[kmer_size] =
'\0';
626 printf(
"id: %llu, reg %u, seq %s \n", kmer_idx, kmer_coord.w, kmer_seq);
637 return chain_prefix_id == (
uint32) -1;
646 return suffix_id != (
uint32) -1;
677 thrust::tuple<SequenceSetKmerCoord, uint32, uint32>
operator() (
678 const thrust::tuple<SequenceSetKmerCoord, uint32, uint32>& x,
679 const thrust::tuple<SequenceSetKmerCoord, uint32, uint32>& y)
const
695 template <
typename string_set_type>
698 typedef typename string_set_type::string_type
sequence;
713 if(coord_curr.w != coord_prev.w)
return 0;
717 const uint32 max_overlap =
718 (coord_curr.z - coord_curr.y) > (coord_prev.z - coord_prev.y) ?
719 (coord_prev.z - coord_prev.y) : (coord_curr.z - coord_curr.y);
722 for (
uint32 j = 0; j < max_overlap; j++)
724 if(seq_curr[coord_curr.y + j] == seq_prev[coord_prev.y +j]) {
748 const uint32* _suffix_kmer_ids,
757 uint32 len = coord.z - coord.y;
768 return n_kmers -
overlaps[idx] - n_unique;
781 const uint32* _suffix_kmer_ids,
790 uint32 len = coord.z - coord.y;
794 if(overlaps[idx] == 0) {
795 n_repeat_kmers_to_extract--;
798 n_repeat_kmers_to_extract--;
802 uint32 n_adj = n_repeat_kmers_to_extract;
811 template <
typename string_set_type>
814 typedef typename string_set_type::string_type
sequence;
833 const uint32* _chain_pref_gids,
const uint32* _chain_suffix_gids,
834 const uint32* _chain_offsets,
835 const uint32* _adj_extraction_offsets,
836 const uint32* _chain_overlaps,
849 seeder(_kmer_size, 1u) { }
856 const uint32 seq_len = seq.length();
863 uint32 len = chain_coord.z - chain_coord.y;
867 if(
chain_overlaps[idx] == n_kmers || (n_kmers_to_extract + unique_suffix == 0))
return;
869 const uint32 kmer_id = (chain_coord.z - unique_suffix) - n_kmers_to_extract -
kmer_size + 1;
870 for (
uint32 j = 0; j < n_kmers_to_extract; j++) {
871 const uint2 kmer =
seeder.
seed(seq_len, kmer_id + j);
872 kmers_out[offset + j] = make_uint4(chain_coord.x, kmer.x, kmer.y, chain_coord.w);
875 if(j < n_kmers_to_extract - 1) {
878 if(unique_suffix == 1) {
883 if(unique_suffix == 1) {
893 uint32 pred_chain_idx = idx - 1;
923 if(n_to_update <= 0)
return;
925 uint32 pred_chain_idx = idx - 1;
926 while(n_to_update > 0) {
928 if(pred_overlap >= n_to_update) {
932 const uint32 n_update_pred = n_to_update - (pred_overlap > 0 ? pred_overlap : 0);
934 for(
uint32 i = 0; i < n_update_pred; i++) {
937 n_to_update -= n_update_pred;
971 while(i < num_edges) {
972 if(prefix_idx < i)
break;
974 if(uid != from_uid)
break;
1011 while(i < num_edges) {
1012 if(prefix_idx < i)
break;
1014 if(uid != from_uid)
break;
1028 template <
typename string_set_type>
1031 typedef typename string_set_type::string_type
sequence;
1040 uint8* _sequences) :
1049 const uint32 seq_pos = coord.y;
1052 for (
uint32 i = 0; i < coord.z - coord.y; i++) {
1060 template <
typename string_set_type>
1070 thrust::make_counting_iterator<uint32>(0u),
1071 thrust::make_counting_iterator<uint32>(0u) + n_kmers,
1077 template <
typename string_set_type>
1080 kmers_64b.resize(n_kmers);
1083 coords.begin() + n_kmers,
1090 template <
typename string_set_type>
1095 thrust::stable_sort_by_key(
1097 kmers_64b.begin() + n_kmers,
1104 template <
typename string_set_type>
1105 template <
typename meta_iterator_type>
1108 thrust::stable_sort_by_key(
1110 kmers_64b.begin() + n_kmers,
1111 thrust::make_zip_iterator(thrust::make_tuple(coords.begin(), meta_data)));
1116 template <
typename string_set_type>
1121 thrust::stable_sort_by_key(
1123 kmers_64b.begin() + n_kmers,
1126 D_VectorU32::iterator region_ids = get_scratch_space(n_kmers);
1129 coords.begin() + n_kmers,
1133 thrust::stable_sort_by_key(
1135 region_ids + n_kmers,
1136 thrust::make_zip_iterator(thrust::make_tuple(coords.begin(), kmers_64b.begin())));
1138 reset_scratch_space();
1144 template <
typename string_set_type>
1147 thrust::stable_sort_by_key(
1149 kmers_64b.begin() + n_kmers,
1154 coords.begin() + n_kmers,
1157 thrust::stable_sort_by_key(
1159 seq_ids.begin() + n_kmers,
1160 thrust::make_zip_iterator(thrust::make_tuple(kmers_64b.begin(), coords.begin())));
1167 template <
typename string_set_type>
1168 template <
typename meta_iterator_type>
1171 thrust::stable_sort_by_key(
1173 kmers_64b.begin() + n_kmers,
1174 thrust::make_zip_iterator(thrust::make_tuple(coords.begin(), meta_data)));
1178 coords.begin() + n_kmers,
1181 thrust::stable_sort_by_key(
1183 seq_ids.begin() + n_kmers,
1184 thrust::make_zip_iterator(thrust::make_tuple(kmers_64b.begin(), coords.begin(), meta_data)));
1190 template <
typename string_set_type>
1197 const uint32 SYMBOL_SIZE = 4u;
1198 const uint32 SYMBOLS_PER_WORD = WORD_BITS/SYMBOL_SIZE;
1201 uint32 max_length = fixed_kmer_size;
1202 if(fixed_kmer_size == 0u) {
1207 thrust::maximum<uint32>());
1211 const uint32 max_words = (max_length + SYMBOLS_PER_WORD-1)/SYMBOLS_PER_WORD;
1216 indices.resize(2*n_kmers);
1218 thrust::make_counting_iterator<uint32>(0u),
1219 thrust::make_counting_iterator<uint32>(n_kmers),
1225 sort_buffers.selector = 0;
1232 for (
int32 word_idx = max_words-1; word_idx >= 0; --word_idx) {
1235 indices.begin() + sort_buffers.selector * n_kmers,
1236 indices.begin() + sort_buffers.selector * n_kmers + n_kmers,
1237 radices.begin() + sort_buffers.selector * n_kmers,
1244 sort_enactor.
sort(n_kmers, sort_buffers);
1247 return indices.begin() + sort_buffers.selector * n_kmers;
1250 template <
typename string_set_type>
1263 sorted_ids + n_coords,
1265 super_coords.begin());
1268 sorted_ids + n_coords,
1269 thrust::make_zip_iterator(thrust::make_tuple(temp_pref.begin(), temp_suf.begin())),
1270 thrust::make_zip_iterator(thrust::make_tuple(super_prefix_ids.begin(), super_suffix_ids.begin())));
1275 super_coords.begin(),
1276 super_coords.begin() + n_coords,
1280 thrust::stable_sort_by_key(
1282 ids.begin() + n_coords,
1283 thrust::make_zip_iterator(thrust::make_tuple(super_coords.begin(), super_prefix_ids.begin(), super_suffix_ids.begin())));
1288 template <
typename string_set_type>
1292 kmers_64b_distinct.resize(n_kmers);
1293 kmers_counts.resize(n_kmers);
1298 kmers_64b_distinct.begin(),
1299 kmers_counts.begin(),
1305 template <
typename string_set_type>
1309 kmers_64b_unique_idxs.resize(n_kmers);
1310 kmers_counts.resize(n_kmers);
1312 thrust::counting_iterator<uint32>(0),
1313 thrust::counting_iterator<uint32>(0) + n_kmers,
1314 thrust::constant_iterator<uint32>(1),
1315 kmers_64b_unique_idxs.begin(),
1316 kmers_counts.begin(),
1323 template <
typename string_set_type>
1327 D_VectorU32::iterator distinct_idxs_per_seq = get_scratch_space(n_kmers);
1328 D_VectorU32::iterator count_per_seq = get_scratch_space(n_kmers);
1329 thrust::counting_iterator<uint64> ids(0);
1333 thrust::constant_iterator<uint32>(1),
1334 distinct_idxs_per_seq,
1338 D_VectorU32::iterator distinct_idxs = get_scratch_space(n_distinct_per_seq);
1339 D_VectorU32::iterator seq_count = get_scratch_space(n_distinct_per_seq);
1340 D_VectorU32::iterator kmer_counts = get_scratch_space(n_distinct_per_seq);
1342 distinct_idxs_per_seq,
1343 distinct_idxs_per_seq + n_distinct_per_seq,
1344 thrust::make_zip_iterator(thrust::make_tuple(thrust::constant_iterator<uint32>(1), count_per_seq)),
1346 thrust::make_zip_iterator(thrust::make_tuple(seq_count, kmer_counts)),
1351 kmers_64b_unique_idxs.resize(n_distinct);
1352 kmers_counts.resize(n_distinct);
1354 thrust::make_zip_iterator(thrust::make_tuple(distinct_idxs, kmer_counts)),
1355 thrust::make_zip_iterator(thrust::make_tuple(distinct_idxs + n_distinct, kmer_counts + n_distinct)),
1356 thrust::make_zip_iterator(thrust::make_tuple(seq_count, kmer_counts)),
1357 thrust::make_zip_iterator(thrust::make_tuple(kmers_64b_unique_idxs.begin(), kmers_counts.begin())),
1358 is_unique_kmer()) - thrust::make_zip_iterator(thrust::make_tuple(kmers_64b_unique_idxs.begin(), kmers_counts.begin()));
1360 reset_scratch_space();
1363 template <
typename string_set_type>
1368 kmers_64b_unique_idxs.begin(),
1369 kmers_64b_unique_idxs.begin() + n_unique,
1372 global_unique_flag_map.resize(n_kmers);
1374 global_to_sorted_id_map.begin(),
1375 global_to_sorted_id_map.begin() + n_kmers,
1376 unique_map_sorted.begin(),
1377 global_unique_flag_map.begin());
1380 template <
typename string_set_type>
1384 thrust::fill(id_map_sorted.begin(), id_map_sorted.begin() + n_kmers, (
uint32) -1);
1386 thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator<uint32>(0u), kmers_64b_unique_idxs.begin())),
1387 thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator<uint32>(0u) + n_unique, kmers_64b_unique_idxs.begin() + n_unique)),
1390 global_to_UID_map.resize(n_kmers);
1392 global_to_sorted_id_map.begin(),
1393 global_to_sorted_id_map.begin() + n_kmers,
1394 id_map_sorted.begin(),
1395 global_to_UID_map.begin());
1398 template <
typename string_set_type>
1401 global_to_sorted_id_map.resize(n_kmers);
1403 thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator<uint32>(0u), coords.begin())),
1404 thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator<uint32>(0u) + n_kmers, coords.begin() + n_kmers)),
1408 template <
typename string_set_type>
1411 global_unique_flag_map.resize(n_kmers);
1412 global_to_UID_map.resize(n_kmers);
1413 thrust::fill(global_to_UID_map.begin(), global_to_UID_map.begin() + n_kmers, (
uint32) -1);
1415 thrust::make_counting_iterator<uint32>(0u),
1416 thrust::make_counting_iterator<uint32>(0u) + n_unique,
1432 template <
typename string_set_type>
1438 coords.begin() + n_kmers,
1439 thrust::make_permutation_iterator(
1442 unique_pref_coords.begin(),
1443 thrust::identity<uint8>()) - unique_pref_coords.begin();
1445 thrust::copy(unique_pref_coords.begin(), unique_pref_coords.begin() + n_unique_coords, coords.begin());
1446 n_kmers = n_unique_coords;
1453 template <
typename string_set_type>
1457 kmers_64b_repeat_idxs.resize(n_kmers);
1459 thrust::make_counting_iterator<uint32>(0u),
1460 thrust::make_counting_iterator<uint32>(0u) + n_kmers,
1461 global_unique_flag_map.begin(),
1462 kmers_64b_repeat_idxs.begin(),
1463 thrust::logical_not<uint8>()) - kmers_64b_repeat_idxs.begin();
1469 thrust::make_counting_iterator(0),
1470 thrust::make_counting_iterator(0) + n_chains,
1488 super_coords.resize(n_chains);
1489 super_prefix_uids.resize(n_chains);
1490 super_suffix_uids.resize(n_chains);
1492 thrust::make_counting_iterator(0u),
1493 thrust::make_counting_iterator(0u) + n_chains,
1494 thrust::make_zip_iterator(thrust::make_tuple(super_kmers.begin(), prefix_uids.begin(), suffix_uids.begin())),
1495 thrust::make_discard_iterator(),
1496 thrust::make_zip_iterator(thrust::make_tuple(super_coords.begin(), super_prefix_uids.begin(), super_suffix_uids.begin())),
1498 collapse_same_start_kmers()).second - thrust::make_zip_iterator(thrust::make_tuple(super_coords.begin(), super_prefix_uids.begin(), super_suffix_uids.begin()));
1504 n_super_coords = thrust::remove_copy_if(
1505 thrust::make_zip_iterator(thrust::make_tuple(
1506 super_coords.begin(),
1507 super_prefix_uids.begin(),
1508 super_suffix_uids.begin())),
1509 thrust::make_zip_iterator(thrust::make_tuple(
1510 super_coords.begin() + n_collapsed_chains,
1511 super_prefix_uids.begin() + n_collapsed_chains,
1512 super_suffix_uids.begin() + n_collapsed_chains)),
1513 super_prefix_uids.begin(),
1514 thrust::make_zip_iterator(thrust::make_tuple(
1524 thrust::copy(t1.begin(), t1.begin() + n_chains, super_coords.begin());
1525 thrust::copy(t2.begin(), t2.begin() + n_chains, super_prefix_uids.begin());
1526 thrust::copy(t3.begin(), t3.begin() + n_chains, super_suffix_uids.begin());
1539 reset_scratch_space();
1545 template <
typename string_set_type>
1549 uint32 n_super_kmers = n_super_coords;
1553 D_VectorU32 max_predecessor_overlaps(n_super_kmers);
1555 thrust::make_counting_iterator<uint32>(1u),
1556 thrust::make_counting_iterator<uint32>(0u) + n_super_kmers,
1557 max_predecessor_overlaps.begin() + 1,
1563 thrust::device_vector<uint8> temp_storage;
1564 D_VectorU32 chain_extraction_offsets(n_super_kmers + 1);
1568 thrust::make_counting_iterator<uint32>(0u),
1572 plain_view(max_predecessor_overlaps), kmer_size)),
1573 chain_extraction_offsets.begin() + 1,
1574 thrust::plus<uint32>(),
1577 D_VectorU32 adj_extraction_offsets(n_super_kmers + 1);
1581 thrust::make_counting_iterator<uint32>(0u),
1585 plain_view(max_predecessor_overlaps), kmer_size)),
1586 adj_extraction_offsets.begin() + 1,
1587 thrust::plus<uint32>(),
1591 n_repeat = chain_extraction_offsets[n_super_kmers];
1592 uint32 n_prefsuf_to_extract = adj_extraction_offsets[n_super_kmers];
1599 kmers_out.resize(n_unique + n_repeat);
1600 prefix_ids_out.resize(n_prefsuf_to_extract);
1601 suffix_ids_out.resize(n_prefsuf_to_extract);
1603 thrust::make_counting_iterator<uint32>(0u),
1604 thrust::make_counting_iterator<uint32>(0u) + n_super_kmers,
1613 kmer_size, n_unique,
1620 adj_counts.resize(n_prefsuf_to_extract);
1621 thrust::fill(adj_counts.begin(), adj_counts.begin() + n_prefsuf_to_extract, 1);
1623 thrust::make_counting_iterator<uint32>(1u),
1624 thrust::make_counting_iterator<uint32>(0u) + n_super_kmers,