NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
string_set.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/strings/string.h>
35 #include <nvbio/basic/iterator.h>
36 
37 
38 namespace nvbio {
39 
209 
216 
219 
225 
228 
233 
234 template <typename StringSetType>
236 {
237  typedef typename StringSetType::system_tag system_tag;
238  typedef typename StringSetType::string_type value_type;
239  typedef typename StringSetType::string_type reference;
240  typedef typename StringSetType::string_type* pointer;
242  typedef typename if_equal<
246 
250 
253  NVBIO_FORCEINLINE NVBIO_HOST_DEVICE StringSetIterator(StringSetType _string_set, const uint32 _idx) : string_set(_string_set), idx(_idx) {}
254 
259 
264 
268 
272 
273  StringSetType string_set;
275 };
276 
279 template <typename StringSet>
282  const StringSetIterator<StringSet>& it1,
283  const StringSetIterator<StringSet>& it2)
284 {
285  return it1.idx - it2.idx;
286 }
287 
290 template <typename StringSet>
294  const int32 d)
295 {
296  return StringSetIterator<StringSet>( it.string_set, it.idx + d );
297 }
298 
301 template <typename StringSet>
305  const int32 d)
306 {
307  return StringSetIterator<StringSet>( it.string_set, it.idx - d );
308 }
309 
312 template <typename StringSet>
316  const int32 d)
317 {
318  it.idx += d;
319  return it;
320 }
321 
324 template <typename StringSet>
328  const int32 d)
329 {
330  it.idx -= d;
331  return it;
332 }
333 
336 template <typename StringSet>
339  const StringSetIterator<StringSet>& it1,
340  const StringSetIterator<StringSet>& it2)
341 {
342  return it1.idx < it2.idx;
343 }
344 
347 template <typename StringSet>
350  const StringSetIterator<StringSet>& it1,
351  const StringSetIterator<StringSet>& it2)
352 {
353  return it1.idx > it2.idx;
354 }
355 
358 template <typename StringSet>
361  const StringSetIterator<StringSet>& it1,
362  const StringSetIterator<StringSet>& it2)
363 {
364  return it1.idx < it2.idx;
365 }
366 
369 template <typename StringSet>
372  const StringSetIterator<StringSet>& it1,
373  const StringSetIterator<StringSet>& it2)
374 {
375  return it1.idx >= it2.idx;
376 }
377 
380 template <typename StringSet>
383  const StringSetIterator<StringSet>& it1,
384  const StringSetIterator<StringSet>& it2)
385 {
386  return it1.idx == it2.idx;
387 }
388 
391 template <typename StringSet>
394  const StringSetIterator<StringSet>& it1,
395  const StringSetIterator<StringSet>& it2)
396 {
397  return it1.idx != it2.idx;
398 }
399 
479 template <typename StringIterator, typename OffsetIterator>
481 {
483  typedef typename std::iterator_traits<StringIterator>::value_type symbol_type;
485  typedef StringIterator symbol_iterator;
486  typedef OffsetIterator offset_iterator;
488 
491 
496 
505  const uint32 size,
506  const StringIterator string,
507  const OffsetIterator offsets) :
508  m_size( size ),
509  m_string( string ),
510  m_offsets( offsets ) {}
511 
515  uint32 size() const { return m_size; }
516 
521  {
522  const typename std::iterator_traits<OffsetIterator>::value_type offset = m_offsets[i];
523 
524  return string_type(
525  m_offsets[i+1] - offset,
526  m_string + offset );
527  }
528 
531  const_iterator begin() const { return const_iterator(*this,0u); }
532 
535  const_iterator end() const { return const_iterator(*this,size()); }
536 
539  iterator begin() { return iterator(*this,0u); }
540 
543  iterator end() { return iterator(*this,size()); }
544 
548  symbol_iterator base_string() const { return m_string; }
549 
553  offset_iterator offsets() const { return m_offsets; }
554 
555 private:
556  uint32 m_size;
557  StringIterator m_string;
558  OffsetIterator m_offsets;
559 };
560 
565 template <typename StringIterator, typename OffsetIterator>
567  const uint32 size,
568  const StringIterator string,
569  const OffsetIterator offsets)
570 {
572  size,
573  string,
574  offsets );
575 }
576 
612 template <typename StringIterator, typename RangeIterator>
614 {
616  typedef typename std::iterator_traits<StringIterator>::value_type symbol_type;
618  typedef StringIterator symbol_iterator;
619  typedef RangeIterator range_iterator;
621 
624 
629 
638  const uint32 size,
639  const StringIterator string,
640  const RangeIterator ranges) :
641  m_size( size ),
642  m_string( string ),
643  m_ranges( ranges ) {}
644 
648  uint32 size() const { return m_size; }
649 
654  {
655  const uint2 range = m_ranges[i];
656 
657  return string_type(
658  range.y - range.x,
659  m_string + range.x );
660  }
661 
664  const_iterator begin() const { return const_iterator(*this,0u); }
665 
668  const_iterator end() const { return const_iterator(*this,size()); }
669 
672  iterator begin() { return iterator(*this,0u); }
673 
676  iterator end() { return iterator(*this,size()); }
677 
681  symbol_iterator base_string() const { return m_string; }
682 
686  range_iterator ranges() const { return m_ranges; }
687 
688 private:
689  uint32 m_size;
690  StringIterator m_string;
691  RangeIterator m_ranges;
692 };
693 
698 template <typename StringIterator, typename RangeIterator>
700  const uint32 size,
701  const StringIterator string,
702  const RangeIterator offsets)
703 {
705  size,
706  string,
707  offsets );
708 }
709 
728 template <
729  typename StreamIterator,
730  typename SymbolType,
731  uint32 SYMBOL_SIZE_T,
732  bool BIG_ENDIAN_T,
733  typename LengthIterator>
735 {
737  typedef SymbolType symbol_type;
738 
739  typedef StreamIterator stream_iterator;
744  typedef LengthIterator length_iterator;
746 
747  static const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
748  static const bool BIG_ENDIAN = BIG_ENDIAN_T;
749 
754 
764  const uint32 size,
765  const uint32 stride,
766  const StreamIterator stream,
767  const LengthIterator lengths) :
768  m_size( size ),
769  m_stride( stride ),
770  m_stream( stream ),
771  m_lengths( lengths ) {}
772 
776  uint32 size() const { return m_size; }
777 
781  uint32 stride() const { return m_stride; }
782 
787  {
788  const uint32 length = m_lengths[i];
789 
790  const strided_stream_iterator base_iterator( m_stream + i, m_stride );
791  const packed_stream_type packed_stream( base_iterator );
792 
793  return string_type(
794  length,
795  packed_stream );
796  }
797 
801  stream_iterator base_stream() const { return m_stream; }
802 
806  length_iterator lengths() const { return m_lengths; }
807 
808 private:
809  uint32 m_size;
810  uint32 m_stride;
811  StreamIterator m_stream;
812  LengthIterator m_lengths;
813 };
814 
833 template <
834  typename StringIterator,
835  typename LengthIterator>
837 {
839  typedef typename std::iterator_traits<StringIterator>::value_type symbol_type;
840 
841  typedef StringIterator symbol_iterator;
844  typedef LengthIterator length_iterator;
846 
849 
854 
864  const uint32 size,
865  const uint32 stride,
866  const StringIterator string,
867  const LengthIterator lengths) :
868  m_size( size ),
869  m_stride( stride ),
870  m_string( string ),
871  m_lengths( lengths ) {}
872 
876  uint32 size() const { return m_size; }
877 
881  uint32 stride() const { return m_stride; }
882 
887  {
888  const uint32 length = m_lengths[i];
889 
890  const strided_symbol_iterator base_iterator( m_string + i, m_stride );
891 
892  return string_type(
893  length,
894  base_iterator );
895  }
896 
899  const_iterator begin() const { return const_iterator(*this,0u); }
900 
903  const_iterator end() const { return const_iterator(*this,size()); }
904 
907  iterator begin() { return iterator(*this,0u); }
908 
911  iterator end() { return iterator(*this,size()); }
912 
916  symbol_iterator base_string() const { return m_string; }
917 
921  length_iterator lengths() const { return m_lengths; }
922 
923 private:
924  uint32 m_size;
925  uint32 m_stride;
926  StringIterator m_string;
927  LengthIterator m_lengths;
928 };
929 
932 template <typename string_set_type>
934 {
937 
941  string_set_length_functor(const string_set_type _string_set) : string_set(_string_set) {}
942 
946  uint32 operator() (const uint32 i) const { return string_set[i].length(); }
947 
948  const string_set_type string_set;
949 };
950 
953 
954 namespace cuda {
955 
958 
961 
967 template <
968  typename InStringSet,
969  typename StringIterator,
970  typename OffsetIterator>
971 void copy(
972  const InStringSet& in_string_set,
974 
980 template <
981  typename InStringSet,
982  typename StringIterator,
983  typename LengthIterator>
984 void copy(
985  const InStringSet& in_string_set,
987 
993 template <
994  typename InStringSet,
995  typename StreamIterator,
996  typename SymbolType,
997  uint32 SYMBOL_SIZE_T,
998  bool BIG_ENDIAN_T,
999  typename LengthIterator>
1000 void copy(
1001  const InStringSet& in_string_set,
1003 
1006 
1007 } // namespace cuda
1008 
1011 
1014 
1020 template <
1021  typename InStringSet,
1022  typename StringIterator,
1023  typename OffsetIterator>
1024 void copy(
1025  const InStringSet& in_string_set,
1026  ConcatenatedStringSet<StringIterator,OffsetIterator>& out_string_set);
1027 
1033 template <
1034  typename InStringSet,
1035  typename StringIterator,
1036  typename LengthIterator>
1037 void copy(
1038  const InStringSet& in_string_set,
1039  StridedStringSet<StringIterator,LengthIterator>& out_string_set);
1040 
1046 template <
1047  typename InStringSet,
1048  typename StreamIterator,
1049  typename SymbolType,
1050  uint32 SYMBOL_SIZE_T,
1051  bool BIG_ENDIAN_T,
1052  typename LengthIterator>
1053 void copy(
1054  const InStringSet& in_string_set,
1055  StridedPackedStringSet<StreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T,LengthIterator>& out_string_set);
1056 
1059 
1060 
1061 template <
1062  typename StreamIterator,
1063  typename SymbolType,
1064  uint32 SYMBOL_SIZE_T,
1065  bool BIG_ENDIAN_T,
1066  typename LengthIterator,
1067  typename value_type>
1069 {
1070 };
1071 
1072 template <
1073  typename StreamIterator,
1074  typename SymbolType,
1075  uint32 SYMBOL_SIZE_T,
1076  bool BIG_ENDIAN_T,
1077  typename LengthIterator>
1079  StreamIterator,
1080  SymbolType,
1081  SYMBOL_SIZE_T,
1082  BIG_ENDIAN_T,
1083  LengthIterator,
1084  uint4>
1085 {
1091 
1093  const ConcatenatedStringSet<
1095  LengthIterator> string_set)
1096  {
1097  cached_packed_stream_type cached_packed_stream(
1099  uint4_iterator( cached_base_iterator( string_set.base_string().stream() ) )
1100  )
1101  );
1102 
1103  return cached_string_set(
1104  string_set.size(),
1105  cached_packed_stream,
1106  string_set.offsets() );
1107  }
1108 };
1109 
1110 template <
1111  typename StreamIterator,
1112  typename SymbolType,
1113  uint32 SYMBOL_SIZE_T,
1114  bool BIG_ENDIAN_T,
1115  typename LengthIterator>
1117  StreamIterator,
1118  SymbolType,
1119  SYMBOL_SIZE_T,
1120  BIG_ENDIAN_T,
1121  LengthIterator,
1122  uint32>
1123 {
1127 
1129  const ConcatenatedStringSet<
1131  LengthIterator> string_set)
1132  {
1133  cached_packed_stream_type cached_packed_stream(
1135  string_set.base_string().stream() )
1136  );
1137 
1138  return cached_string_set(
1139  string_set.size(),
1140  cached_packed_stream,
1141  string_set.offsets() );
1142  }
1143 };
1144 
1145 template <
1146  typename StreamIterator,
1147  typename SymbolType,
1148  uint32 SYMBOL_SIZE_T,
1149  bool BIG_ENDIAN_T,
1150  typename LengthIterator,
1151  typename value_type>
1153 {
1154 };
1155 
1156 template <
1157  typename StreamIterator,
1158  typename SymbolType,
1159  uint32 SYMBOL_SIZE_T,
1160  bool BIG_ENDIAN_T,
1161  typename LengthIterator>
1163  StreamIterator,
1164  SymbolType,
1165  SYMBOL_SIZE_T,
1166  BIG_ENDIAN_T,
1167  LengthIterator,
1168  uint4>
1169 {
1175 
1177  const SparseStringSet<
1179  LengthIterator> string_set)
1180  {
1181  cached_packed_stream_type cached_packed_stream(
1183  uint4_iterator( cached_base_iterator( string_set.base_string().stream() ) )
1184  )
1185  );
1186 
1187  return cached_string_set(
1188  string_set.size(),
1189  cached_packed_stream,
1190  string_set.ranges() );
1191  }
1192 };
1193 
1194 template <
1195  typename StreamIterator,
1196  typename SymbolType,
1197  uint32 SYMBOL_SIZE_T,
1198  bool BIG_ENDIAN_T,
1199  typename LengthIterator>
1201  StreamIterator,
1202  SymbolType,
1203  SYMBOL_SIZE_T,
1204  BIG_ENDIAN_T,
1205  LengthIterator,
1206  uint32>
1207 {
1211 
1213  const SparseStringSet<
1215  LengthIterator> string_set)
1216  {
1217  cached_packed_stream_type cached_packed_stream(
1219  string_set.base_string().stream() )
1220  );
1221 
1222  return cached_string_set(
1223  string_set.size(),
1224  cached_packed_stream,
1225  string_set.ranges() );
1226  }
1227 };
1228 
1232 template <
1233  typename StreamIterator,
1234  typename SymbolType,
1235  uint32 SYMBOL_SIZE_T,
1236  bool BIG_ENDIAN_T,
1237  typename LengthIterator>
1238 typename CachedPackedSparseStringSet<
1239  StreamIterator,
1240  SymbolType,
1241  SYMBOL_SIZE_T,
1242  BIG_ENDIAN_T,
1243  LengthIterator,
1244  typename std::iterator_traits<StreamIterator>::value_type>::cached_string_set
1246  const SparseStringSet<
1248  LengthIterator> string_set)
1249 {
1251  StreamIterator,
1252  SymbolType,
1253  SYMBOL_SIZE_T,
1254  BIG_ENDIAN_T,
1255  LengthIterator,
1256  typename std::iterator_traits<StreamIterator>::value_type> Adapter;
1257 
1258  return Adapter::make( string_set );
1259 }
1260 
1264 template <
1265  typename StreamIterator,
1266  typename SymbolType,
1267  uint32 SYMBOL_SIZE_T,
1268  bool BIG_ENDIAN_T,
1269  typename LengthIterator>
1270 typename CachedPackedConcatStringSet<
1271  StreamIterator,
1272  SymbolType,
1273  SYMBOL_SIZE_T,
1274  BIG_ENDIAN_T,
1275  LengthIterator,
1276  typename std::iterator_traits<StreamIterator>::value_type>::cached_string_set
1278  const ConcatenatedStringSet<
1280  LengthIterator> string_set)
1281 {
1283  StreamIterator,
1284  SymbolType,
1285  SYMBOL_SIZE_T,
1286  BIG_ENDIAN_T,
1287  LengthIterator,
1288  typename std::iterator_traits<StreamIterator>::value_type> Adapter;
1289 
1290  return Adapter::make( string_set );
1291 }
1292 
1293 } // namespace nvbio
1294