NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sequence_encoder.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
31 
32 namespace nvbio {
33 namespace io {
34 
40 {
43  enum StrandOp
44  {
45  NO_OP = 0x0000,
46  REVERSE_OP = 0x0001,
47  COMPLEMENT_OP = 0x0002,
49  };
50 
53  SequenceDataEncoder(const Alphabet alphabet) : m_alphabet( alphabet ) {}
54 
57  virtual ~SequenceDataEncoder() {}
58 
61  virtual void reserve(const uint32 n_reads, const uint32 n_bps) {}
62 
73  virtual void push_back(
74  const uint32 in_sequence_len,
75  const char* name,
76  const uint8* base_pairs,
77  const uint8* quality,
78  const QualityEncoding quality_encoding,
79  const uint32 max_sequence_len,
80  const uint32 trim3,
81  const uint32 trim5,
82  const StrandOp conversion_flags)
83  {
84  const uint32 trimmed_len = in_sequence_len > trim3 + trim5 ?
85  in_sequence_len - trim3 - trim5 : 0u;
86 
87  // truncate sequence
88  const uint32 sequence_len = nvbio::min( trimmed_len, max_sequence_len );
89 
90  // keep stats, needed for the implementation of io::skip()
91  m_info.m_sequence_stream_len += sequence_len;
92  m_info.m_n_seqs++;
93  }
94 
97  virtual void begin_batch(void) { m_info = SequenceDataInfo(); }
98 
101  virtual void end_batch(void) {}
102 
105  virtual const SequenceDataInfo* info() const { return &m_info; }
106 
109  Alphabet alphabet() const { return m_alphabet; }
110 
111 private:
112  Alphabet m_alphabet;
113  SequenceDataInfo m_info;
114 };
115 
118 SequenceDataEncoder* create_encoder(const Alphabet alphabet, SequenceDataHost* data);
119 
120 } // namespace io
121 } // namespace nvbio