NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
output_debug.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio/basic/numbers.h>
30 #include <crc/crc.h>
31 
32 namespace nvbio {
33 namespace io {
34 
35 DebugOutput::DebugOutput(const char *file_name, AlignmentType alignment_type, BNT bnt)
36  : OutputFile(file_name, alignment_type, bnt)
37 {
38  if (alignment_type == PAIRED_END)
39  {
40  char fname[256];
41  char *mate_number;
42 
43  NVBIO_CUDA_ASSERT(strlen(file_name) < sizeof(fname));
44  strncpy(fname, file_name, sizeof(fname));
45 
46  mate_number = strstr(fname, "#");
47  if (mate_number)
48  {
49  *mate_number = '1';
50  }
51 
52  fp = gzopen(fname, "wb");
53 
54  if (mate_number)
55  {
56  *mate_number = '2';
57  fp_opposite_mate = gzopen(fname, "wb");
58  } else {
59  fp_opposite_mate = fp;
60  }
61  } else {
62  fp = gzopen(file_name, "wb");
63  fp_opposite_mate = NULL;
64  }
65 }
66 
68 {
69  if (fp)
70  {
71  gzclose(fp);
72  }
73 
74  if (fp_opposite_mate && fp != fp_opposite_mate)
75  {
76  gzclose(fp_opposite_mate);
77  }
78 
79  fp = NULL;
80  fp_opposite_mate = NULL;
81 }
82 
83 
85 {
86  for(uint32 c = 0; c < batch.count; c++)
87  {
88  AlignmentData mate_1 = get(batch, c);
90 
91  process_one_alignment(mate_1, mate_2);
92  }
93 }
94 
96 {
97  for(uint32 c = 0; c < batch.count; c++)
98  {
99  AlignmentData mate_1 = get_mate(batch, c, MATE_1);
100  AlignmentData mate_2 = get_mate(batch, c, MATE_2);
101 
102  process_one_alignment(mate_1, mate_2);
103  }
104 }
105 
107 {
108  if (fp)
109  {
110  gzclose(fp);
111  }
112 
113  if (fp_opposite_mate && fp != fp_opposite_mate)
114  {
115  gzclose(fp_opposite_mate);
116  }
117 
118  fp = NULL;
119  fp_opposite_mate = NULL;
120 }
121 
122 void DebugOutput::output_alignment(gzFile& fp, const struct DbgAlignment& al, const struct DbgInfo& info)
123 {
124  gzwrite(fp, &al, sizeof(al));
125  if (al.alignment_pos)
126  {
127  gzwrite(fp, &info, sizeof(info));
128  }
129 }
130 
131 void DebugOutput::process_one_alignment(const AlignmentData& mate_1, const AlignmentData& mate_2)
132 {
133  DbgAlignment al;
134  DbgInfo info;
135 
136  const uint32 mapq = mate_1.mapq;
137 
138  // process the first mate
139  process_one_mate(al, info, mate_1, mate_2, mapq);
140  output_alignment(fp, al, info);
141 
142  if (alignment_type == PAIRED_END)
143  {
144  // process the second mate
145  process_one_mate(al, info, mate_2, mate_1, mapq);
146  output_alignment(fp_opposite_mate, al, info);
147  }
148 }
149 
150 // fill out al and info for a given mate
151 // this does *not* fill in mapq, as that requires knowledge of which mate is the anchor as well as second-best scores
152 void DebugOutput::process_one_mate(DbgAlignment& al,
153  DbgInfo& info,
154  const AlignmentData& alignment,
155  const AlignmentData& mate,
156  const uint32 mapq)
157 {
158  // output read_id as CRC of read name
159  al.read_id = crcCalc(alignment.read_name, strlen(alignment.read_name));
160  al.read_len = alignment.read_len;
161 
162  if (alignment.aln->is_aligned())
163  {
164  // setup alignment information
165  const uint32 seq_index = uint32(std::upper_bound(
168  alignment.cigar_pos ) - bnt.sequence_index) - 1u;
169 
170  al.alignment_pos = alignment.cigar_pos - int32(bnt.sequence_index[ seq_index ]) + 1u;
171  info.flag = (alignment.aln->mate() ? DbgInfo::READ_2 : DbgInfo::READ_1) |
172  (alignment.aln->is_rc() ? DbgInfo::REVERSE : 0u);
173 
174  if (alignment_type == PAIRED_END)
175  {
176  if (alignment.aln->is_paired()) // FIXME: this should be other_mate.is_concordant()
177  info.flag |= DbgInfo::PROPER_PAIR;
178 
179  if (mate.aln->is_aligned() == false)
180  info.flag |= DbgInfo::MATE_UNMAPPED;
181  }
182 
183  const uint32 ref_cigar_len = reference_cigar_length(alignment.cigar, alignment.cigar_len);
184  if (alignment.cigar_pos + ref_cigar_len > bnt.sequence_index[ seq_index+1 ])
185  {
186  // flag UNMAPPED as this alignment bridges two adjacent reference sequences
187  info.flag |= DbgInfo::UNMAPPED;
188  }
189 
190  uint32 n_mm;
191  uint32 n_gapo;
192  uint32 n_gape;
193 
194  analyze_md_string(alignment.mds_vec, n_mm, n_gapo, n_gape);
195 
196  info.ref_id = seq_index;
197  info.mate = alignment.aln->mate();
198  info.score = alignment.aln->score();
199  info.mapQ = mapq;
200  info.ed = alignment.aln->ed();
201  info.subs = count_symbols(Cigar::SUBSTITUTION, alignment.cigar, alignment.cigar_len);
202  info.ins = count_symbols(Cigar::INSERTION, alignment.cigar, alignment.cigar_len);
203  info.dels = count_symbols(Cigar::DELETION, alignment.cigar, alignment.cigar_len);
204  info.mms = n_mm;
205  info.gapo = n_gapo;
206  info.gape = n_gape;
207  info.sec_score = Field_traits<int16>::min(); // TODO!
208  info.has_second = 0;
209  /*
210  if (info.sec_score == alignment.second_best->is_aligned())
211  {
212  info.sec_score = alignment.second_best->score();
213  info.has_second = 1;
214  } else {
215  info.sec_score = Field_traits<int16>::min();
216  info.has_second = 0;
217  }
218  */
219 
220  info.pad = 0x69;
221  } else {
222  // unmapped alignment
223  al.alignment_pos = 0;
224  }
225 }
226 
227 } // namespace io
228 } // namespace nvbio