32 using namespace nvbio;
34 #define MAX_TEMP_SPACE_BATCHES 5
37 template <
typename string_set_type>
73 D_KmerSet(): kmer_size(0), n_kmers(0), n_distinct(0), n_unique(0), n_repeat(0), n_super_coords(0),
74 n_alloc(0), selector(0) {}
76 string_set(_string_set), kmer_size(_kmer_size),
77 n_kmers(0), n_distinct(0), n_unique(0), n_repeat(0), n_super_coords(0),
78 n_alloc(0), selector(0) { }
80 string_set(_string_set), active_region_ids(_active_region_ids),
81 kmer_size(0), n_kmers(0), n_distinct(0), n_unique(0), n_repeat(0), n_super_coords(0),
82 n_alloc(0), selector(0) { }
84 void gen_kmer_coords();
85 void gen_kmer_64b_keys();
86 void sort_kmers_by_64b_keys();
87 void segmented_sort_kmers_by_64b_keys();
88 template <
typename meta_iterator_type>
89 void sort_kmers_by_64b_keys_meta(
const meta_iterator_type meta_data);
90 void sort_kmers_by_64b_keys_seqid();
91 template <
typename meta_iterator_type>
92 void sort_kmers_by_64b_keys_seqid_meta(
const meta_iterator_type meta_data);
93 void count_kmers_rle();
95 void count_distinct_by_prefix(
D_VectorU32& prefix_unique_id_map);
96 void partition_kmers_by_uniqueness();
97 void gen_prefix_map();
98 void gen_global_unique_map();
99 void gen_global_UID_map();
100 void gen_global_to_sorted_id_map();
101 void mark_unique_kmers();
102 void filter_coords_by_prefix_uniqueness(
const D_VectorU8& unique_map);
103 void extract_super_kmers();
106 void count_distinct_by_prefix();
112 scratch_u32.resize(n_alloc*n_kmers);
118 printf(
"Requested more memory than batch size \n");
122 return scratch_u32.begin() + (selector-1)*n_kmers;