NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pe_analyzer.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio-aln-diff/html.h>
30 #include <nvbio-aln-diff/utils.h>
31 
32 namespace nvbio {
33 namespace alndiff {
34 
35 PEAnalyzer::PEAnalyzer(Filter& filter, const bool id_check) :
36  m_filter( filter ),
37  m_id_check( id_check )
38 {
39  n = 0;
40  n_mismatched = 0;
41 }
42 
44  const AlignmentPair& alnL,
45  const AlignmentPair& alnR)
46 {
47  if ((m_id_check && alnL[0].read_id != alnR[0].read_id) ||
48  (m_id_check && alnL[1].read_id != alnR[1].read_id) ||
49  alnL[0].read_len != alnR[0].read_len ||
50  alnL[1].read_len != alnR[1].read_len)
51  {
52  n_mismatched++;
53  return;
54  }
55 
56  mapped.push( alnL.is_mapped(), alnR.is_mapped() );
57  paired.push( alnL.is_mapped_paired(), alnR.is_mapped_paired() );
58  unique.push( alnL.is_unique_paired(), alnR.is_unique_paired() );
60 
61  if ((alnL.is_mapped_paired() == true) && (alnR.is_mapped_paired() == false)) paired_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
62  if ((alnR.is_mapped_paired() == true) && (alnL.is_mapped_paired() == false)) paired_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
63  if ((alnL.is_unique_paired() == true) && (alnR.is_unique_paired() == false)) unique_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
64  if ((alnR.is_unique_paired() == true) && (alnL.is_unique_paired() == false)) unique_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
65  if ((alnL.is_ambiguous_paired() == true) && (alnR.is_ambiguous_paired() == false)) ambiguous_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
66  if ((alnR.is_ambiguous_paired() == true) && (alnL.is_ambiguous_paired() == false)) ambiguous_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
67 
68  // edit-distance correlation stats
69  //if (alnL.is_mapped_paired()) sec_ed_by_ed_l.push( alnL.ed()+1, alnL.has_second() ? alnL.sec_ed()+1 : 0u );
70  //else sec_ed_by_ed_l.push( 0u, 0u );
71 
72  // edit-distance correlation stats
73  //if (alnR.is_mapped_paired()) sec_ed_by_ed_r.push( alnR.ed()+1, alnR.has_second() ? alnR.sec_ed()+1 : 0u );
74  //else sec_ed_by_ed_r.push( 0u, 0u );
75 
76  if (alnL.is_mapped_paired() && alnR.is_mapped_paired())
77  {
78  const uint32 mapQ_bin = log_bin( alnR.mapQ() );
79 
80  uint32 read_flags = 0u;
81 
82  if ((alnL[0].ref_id != alnR[0].ref_id) &&
83  (alnL[1].ref_id != alnR[1].ref_id))
84  {
85  n_different_ref12.push( mapQ_bin );
86  n_different_ref.push( mapQ_bin );
87  n_distant12.push( mapQ_bin );
88  n_distant.push( mapQ_bin );
89 
90  if ((alnL.is_unique_paired() == true) &&
91  (alnR.is_unique_paired() == true))
92  {
93  n_different_ref_unique.push( mapQ_bin );
94  n_distant_unique.push( mapQ_bin );
95  }
96  if ((alnL.is_ambiguous_paired() == false) &&
97  (alnR.is_ambiguous_paired() == false))
98  {
100  n_distant_not_ambiguous.push( mapQ_bin );
101  }
102 
103  read_flags |= Filter::DISTANT;
104  read_flags |= Filter::DIFFERENT_REF;
105  }
106  else if (alnL[0].ref_id != alnR[0].ref_id)
107  {
108  n_different_ref1.push( mapQ_bin );
109  n_different_ref.push( mapQ_bin );
110  n_distant1.push( mapQ_bin );
111  n_distant.push( mapQ_bin );
112 
113  if ((alnL.is_unique_paired() == true) &&
114  (alnR.is_unique_paired() == true))
115  {
116  n_different_ref_unique.push( mapQ_bin );
117  n_distant_unique.push( mapQ_bin );
118  }
119  if ((alnL.is_ambiguous_paired() == false) &&
120  (alnR.is_ambiguous_paired() == false))
121  {
123  n_distant_not_ambiguous.push( mapQ_bin );
124  }
125 
126  read_flags |= Filter::DISTANT;
127  read_flags |= Filter::DIFFERENT_REF;
128  }
129  else if (alnL[1].ref_id != alnR[1].ref_id)
130  {
131  n_different_ref2.push( mapQ_bin );
132  n_different_ref.push( mapQ_bin );
133  n_distant2.push( mapQ_bin );
134  n_distant.push( mapQ_bin );
135 
136  if ((alnL.is_unique_paired() == true) &&
137  (alnR.is_unique_paired() == true))
138  {
139  n_different_ref_unique.push( mapQ_bin );
140  n_distant_unique.push( mapQ_bin );
141  }
142  if ((alnL.is_ambiguous_paired() == false) &&
143  (alnR.is_ambiguous_paired() == false))
144  {
146  n_distant_not_ambiguous.push( mapQ_bin );
147  }
148 
149  read_flags |= Filter::DISTANT;
150  read_flags |= Filter::DIFFERENT_REF;
151  }
152  else if (alndiff::distant( alnL[0], alnR[0] ) && alndiff::distant( alnL[1], alnR[1] ))
153  {
154  n_distant12.push( mapQ_bin );
155  n_distant.push( mapQ_bin );
156 
157  if ((alnL.is_unique_paired() == true) &&
158  (alnR.is_unique_paired() == true))
159  n_distant_unique.push( mapQ_bin );
160  if ((alnL.is_ambiguous_paired() == false) &&
161  (alnR.is_ambiguous_paired() == false))
162  n_distant_not_ambiguous.push( mapQ_bin );
163 
164  read_flags |= Filter::DISTANT;
165  }
166  else if (alndiff::distant( alnL[0], alnR[0] ))
167  {
168  n_distant1.push( mapQ_bin );
169  n_distant.push( mapQ_bin );
170 
171  if ((alnL.is_unique_paired() == true) &&
172  (alnR.is_unique_paired() == true))
173  n_distant_unique.push( mapQ_bin );
174  if ((alnL.is_ambiguous_paired() == false) &&
175  (alnR.is_ambiguous_paired() == false))
176  n_distant_not_ambiguous.push( mapQ_bin );
177 
178  read_flags |= Filter::DISTANT;
179  }
180  else if (alndiff::distant( alnL[1], alnR[1] ))
181  {
182  n_distant2.push( mapQ_bin );
183  n_distant.push( mapQ_bin );
184 
185  if ((alnL.is_unique_paired() == true) &&
186  (alnR.is_unique_paired() == true))
187  n_distant_unique.push( mapQ_bin );
188  if ((alnL.is_ambiguous_paired() == false) &&
189  (alnR.is_ambiguous_paired() == false))
190  n_distant_not_ambiguous.push( mapQ_bin );
191 
192  read_flags |= Filter::DISTANT;
193  }
194 
195  if ((alnL[0].is_rc() != alnR[0].is_rc()) &&
196  (alnL[1].is_rc() != alnR[1].is_rc()))
197  {
198  n_discordant12.push( mapQ_bin );
199  n_discordant.push( mapQ_bin );
200 
201  if ((alnL.is_unique_paired() == true) &&
202  (alnR.is_unique_paired() == true))
203  n_discordant_unique.push( mapQ_bin );
204  if ((alnL.is_ambiguous_paired() == false) &&
205  (alnR.is_ambiguous_paired() == false))
206  n_discordant_not_ambiguous.push( mapQ_bin );
207 
208  read_flags |= Filter::DISCORDANT;
209  }
210  if (alnL[0].is_rc() != alnR[0].is_rc())
211  {
212  n_discordant1.push( mapQ_bin );
213  n_discordant.push( mapQ_bin );
214 
215  if ((alnL.is_unique_paired() == true) &&
216  (alnR.is_unique_paired() == true))
217  n_discordant_unique.push( mapQ_bin );
218  if ((alnL.is_ambiguous_paired() == false) &&
219  (alnR.is_ambiguous_paired() == false))
220  n_discordant_not_ambiguous.push( mapQ_bin );
221 
222  read_flags |= Filter::DISCORDANT;
223  }
224  else if (alnL[1].is_rc() != alnR[1].is_rc())
225  {
226  n_discordant2.push( mapQ_bin );
227  n_discordant.push( mapQ_bin );
228 
229  if ((alnL.is_unique_paired() == true) &&
230  (alnR.is_unique_paired() == true))
231  n_discordant_unique.push( mapQ_bin );
232  if ((alnL.is_ambiguous_paired() == false) &&
233  (alnR.is_ambiguous_paired() == false))
234  n_discordant_not_ambiguous.push( mapQ_bin );
235 
236  read_flags |= Filter::DISCORDANT;
237  }
238 
239  const uint32 length_bin = read_length_bin( alnL.read_len() );
240 
241  // generic stats
242  {
243  // score
244  m_filter( al_stats.higher_score.push( alnL.score(), alnR.score(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::SCORE, alnL.read_id() );
245 
246  // ed
247  m_filter( al_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::ED, alnL.read_id() );
248 
249  // mapQ
250  m_filter( al_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::MAPQ, alnL.read_id() );
251 
252  // longer mapping
253  al_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
254 
255  al_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
256  m_filter( al_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::MMS, alnL.read_id() );
257  m_filter( al_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::INS, alnL.read_id() );
258  m_filter( al_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::DELS, alnL.read_id() );
259 
260  al_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
261  // TODO: this is a 2d quantity
262 
263  //if (alnL.has_second())
264  sec_score_by_score_l.push( log_bin( alnL.score() ), log_bin( alnL.score() - alnL.sec_score() ) );
265 
266  //if (alnR.has_second())
267  sec_score_by_score_r.push( log_bin( alnR.score() ), log_bin( alnR.score() - alnR.sec_score() ) );
268  }
269  if (read_flags & Filter::DISTANT)
270  {
271  // ed
272  distant_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() );
273 
274  // mapQ
275  distant_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() );
276 
277  // longer mapping
278  distant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
279 
280  distant_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
281  distant_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() );
282  distant_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() );
283  distant_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() );
284 
285  distant_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
286  // TODO: this is a 2d quantity
287  }
288  if (read_flags & Filter::DISCORDANT)
289  {
290  // ed
291  discordant_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() );
292 
293  // mapQ
294  discordant_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() );
295 
296  // longer mapping
297  discordant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
298 
299  discordant_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
300  discordant_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() );
301  discordant_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() );
302  discordant_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() );
303 
304  discordant_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
305  // TODO: this is a 2d quantity
306  }
307  }
308 
309  ++n;
310 }
311 
312 namespace {
313 
314 void generate_summary_header(FILE* html_output)
315 {
316  html::tr_object tr( html_output, NULL );
317  html::th_object( html_output, html::FORMATTED, NULL, "" );
318  html::th_object( html_output, html::FORMATTED, NULL, "better" );
319  html::th_object( html_output, html::FORMATTED, "class", "red", NULL, "worse" );
320 }
321 template <typename StatsType>
322 void generate_summary_cell(FILE* html_output, const std::string file_name, const char* type, const StatsType& stats, const uint32 n, const char* cls)
323 {
324  char link_name[1024];
325  sprintf( link_name, "<a href=\"%s\">%s</a>", local_file( file_name ), type );
326  html::tr_object tr( html_output, "class", cls, NULL );
327  html::th_object( html_output, html::FORMATTED, NULL, link_name );
328  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(stats.l.diff_hist.all_but(0))/float(n) );
329  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(stats.r.diff_hist.all_but(0))/float(n) );
330 }
331 
332 } // anonymous namespace
333 
334 void PEAnalyzer::generate_report(const char* aln_file_nameL, const char* aln_file_nameR, const char* report)
335 {
336  if (report == NULL)
337  return;
338 
339  const std::string mapped_bps_name = generate_file_name( report, "mapped-bps" );
340  const std::string score_name = generate_file_name( report, "score" );
341  const std::string ed_name = generate_file_name( report, "ed" );
342  const std::string mapQ_name = generate_file_name( report, "mapQ" );
343  const std::string mms_name = generate_file_name( report, "mms" );
344  const std::string ins_name = generate_file_name( report, "ins" );
345  const std::string dels_name = generate_file_name( report, "dels" );
346  const std::string pos_name = generate_file_name( report, "pos" );
347 
348  const std::string distant_mapped_bps_name = generate_file_name( report, "distant_stats.mapped-bps" );
349  const std::string distant_ed_name = generate_file_name( report, "distant_stats.ed" );
350  const std::string distant_mapQ_name = generate_file_name( report, "distant_stats.mapQ" );
351  const std::string distant_mms_name = generate_file_name( report, "distant_stats.mms" );
352  const std::string distant_ins_name = generate_file_name( report, "distant_stats.ins" );
353  const std::string distant_dels_name = generate_file_name( report, "distant_stats.dels" );
354  const std::string distant_pos_name = generate_file_name( report, "distant_stats.pos" );
355 
356  const std::string discordant_mapped_bps_name = generate_file_name( report, "discordant_stats.mapped-bps" );
357  const std::string discordant_ed_name = generate_file_name( report, "discordant_stats.ed" );
358  const std::string discordant_mapQ_name = generate_file_name( report, "discordant_stats.mapQ" );
359  const std::string discordant_mms_name = generate_file_name( report, "discordant_stats.mms" );
360  const std::string discordant_ins_name = generate_file_name( report, "discordant_stats.ins" );
361  const std::string discordant_dels_name = generate_file_name( report, "discordant_stats.dels" );
362  const std::string discordant_pos_name = generate_file_name( report, "discordant_stats.pos" );
363 
364  generate_table( mapped_bps_name.c_str(), "paired L & R", "mapped bps", "mapped bps diff", al_stats.longer_mapping.bin_type(), al_stats.longer_mapping.l, al_stats.longer_mapping.r, n, paired.L_and_R );
365  generate_table( score_name.c_str(), "paired L & R", "score", "score diff", al_stats.higher_score.bin_type(), al_stats.higher_score.l, al_stats.higher_score.r, n, paired.L_and_R );
366  generate_table( ed_name.c_str(), "paired L & R", "edit distance", "edit distance diff", al_stats.lower_ed.bin_type(), al_stats.lower_ed.l, al_stats.lower_ed.r, n, paired.L_and_R );
367  generate_table( mapQ_name.c_str(), "paired L & R", "mapQ", "mapQ diff", al_stats.higher_mapQ.bin_type(), al_stats.higher_mapQ.l, al_stats.higher_mapQ.r, n, paired.L_and_R );
368  generate_table( mms_name.c_str(), "paired L & R", "mms", "mms diff", al_stats.lower_mms.bin_type(), al_stats.lower_mms.l, al_stats.lower_mms.r, n, paired.L_and_R );
369  generate_table( ins_name.c_str(), "paired L & R", "ins", "ins diff", al_stats.lower_ins.bin_type(), al_stats.lower_ins.l, al_stats.lower_ins.r, n, paired.L_and_R );
370  generate_table( dels_name.c_str(), "paired L & R", "dels", "dels diff", al_stats.lower_dels.bin_type(), al_stats.lower_dels.l, al_stats.lower_dels.r, n, paired.L_and_R );
371  generate_table( pos_name.c_str(), "paired L & R", "position", "distance", al_stats.higher_pos.bin_type(), al_stats.higher_pos.l, al_stats.higher_pos.r, n, paired.L_and_R, false );
372 
373  generate_table( distant_mapped_bps_name.c_str(), "distant", "mapped bps", "mapped bps diff", distant_stats.longer_mapping.bin_type(), distant_stats.longer_mapping.l, distant_stats.longer_mapping.r, n, n_distant.count );
374  generate_table( distant_ed_name.c_str(), "distant", "edit distance", "edit distance diff", distant_stats.lower_ed.bin_type(), distant_stats.lower_ed.l, distant_stats.lower_ed.r, n, n_distant.count );
375  generate_table( distant_mapQ_name.c_str(), "distant", "mapQ", "mapQ diff", distant_stats.higher_mapQ.bin_type(), distant_stats.higher_mapQ.l, distant_stats.higher_mapQ.r, n, n_distant.count );
376  generate_table( distant_mms_name.c_str(), "distant", "mms", "mms diff", distant_stats.lower_mms.bin_type(), distant_stats.lower_mms.l, distant_stats.lower_mms.r, n, n_distant.count );
377  generate_table( distant_ins_name.c_str(), "distant", "ins", "ins diff", distant_stats.lower_ins.bin_type(), distant_stats.lower_ins.l, distant_stats.lower_ins.r, n, n_distant.count );
378  generate_table( distant_dels_name.c_str(), "distant", "dels", "dels diff", distant_stats.lower_dels.bin_type(), distant_stats.lower_dels.l, distant_stats.lower_dels.r, n, n_distant.count );
379  generate_table( distant_pos_name.c_str(), "distant", "position", "distance", distant_stats.higher_pos.bin_type(), distant_stats.higher_pos.l, distant_stats.higher_pos.r, n, n_distant.count, false );
380 
381  generate_table( discordant_mapped_bps_name.c_str(), "discordant", "mapped bps", "mapped bps diff", discordant_stats.longer_mapping.bin_type(), discordant_stats.longer_mapping.l, discordant_stats.longer_mapping.r, n, n_discordant.count );
382  generate_table( discordant_ed_name.c_str(), "discordant", "edit distance","edit distance diff", discordant_stats.lower_ed.bin_type(), discordant_stats.lower_ed.l, discordant_stats.lower_ed.r, n, n_discordant.count );
383  generate_table( discordant_mapQ_name.c_str(), "discordant", "mapQ", "mapQ diff", discordant_stats.higher_mapQ.bin_type(), discordant_stats.higher_mapQ.l, discordant_stats.higher_mapQ.r, n, n_discordant.count );
384  generate_table( discordant_mms_name.c_str(), "discordant", "mms", "mms diff", discordant_stats.lower_mms.bin_type(), discordant_stats.lower_mms.l, discordant_stats.lower_mms.r, n, n_discordant.count );
385  generate_table( discordant_ins_name.c_str(), "discordant", "ins", "ins diff", discordant_stats.lower_ins.bin_type(), discordant_stats.lower_ins.l, discordant_stats.lower_ins.r, n, n_discordant.count );
386  generate_table( discordant_dels_name.c_str(), "discordant", "dels", "dels diff", discordant_stats.lower_dels.bin_type(), discordant_stats.lower_dels.l, discordant_stats.lower_dels.r, n, n_discordant.count );
387  generate_table( discordant_pos_name.c_str(), "discordant", "position", "distance", discordant_stats.higher_pos.bin_type(), discordant_stats.higher_pos.l, discordant_stats.higher_pos.r, n, n_discordant.count, false );
388 
389  FILE* html_output = fopen( report, "w" );
390  if (html_output == NULL)
391  {
392  log_warning( stderr, "unable to write HTML report \"%s\"\n", report );
393  return;
394  }
395 
396  std::vector<std::string> log_bins;
397  for (uint32 y = 0; y < 11; ++y)
398  {
399  if (y == 0)
400  log_bins.push_back( "0" );
401  else if (y == 1)
402  log_bins.push_back( "1" );
403  else if (y == 2)
404  log_bins.push_back( "2" );
405  else
406  {
407  char buffer[16];
408  sprintf(buffer, "2^%u", y-1);
409  log_bins.push_back( buffer );
410  }
411  }
412  log_bins.push_back( "inf" );
413 
414  std::vector<std::string> ed_bins;
415  ed_bins.push_back("-");
416  for (uint32 y = 0; y < 16; ++y)
417  {
418  char buffer[16];
419  sprintf(buffer, "%u", y);
420  ed_bins.push_back( buffer );
421  }
422 
423  const Histogram<8> cum_different_ref = reverse_cumulative( n_different_ref );
424  const Histogram<8> cum_different_ref12 = reverse_cumulative( n_different_ref12 );
425  const Histogram<8> cum_different_ref1 = reverse_cumulative( n_different_ref1 );
426  const Histogram<8> cum_different_ref2 = reverse_cumulative( n_different_ref2 );
427  const Histogram<8> cum_different_ref_unique = reverse_cumulative( n_different_ref_unique );
428  const Histogram<8> cum_different_ref_not_ambiguous = reverse_cumulative( n_different_ref_not_ambiguous );
429 
430  const Histogram<8> cum_distant = reverse_cumulative( n_distant );
431  const Histogram<8> cum_distant12 = reverse_cumulative( n_distant12 );
432  const Histogram<8> cum_distant1 = reverse_cumulative( n_distant1 );
433  const Histogram<8> cum_distant2 = reverse_cumulative( n_distant2 );
434  const Histogram<8> cum_distant_unique = reverse_cumulative( n_distant_unique );
435  const Histogram<8> cum_distant_not_ambiguous = reverse_cumulative( n_distant_not_ambiguous );
436 
437  const Histogram<8> cum_discordant = reverse_cumulative( n_discordant );
438  const Histogram<8> cum_discordant12 = reverse_cumulative( n_discordant12 );
439  const Histogram<8> cum_discordant1 = reverse_cumulative( n_discordant1 );
440  const Histogram<8> cum_discordant2 = reverse_cumulative( n_discordant2 );
441  const Histogram<8> cum_discordant_unique = reverse_cumulative( n_discordant_unique );
442  const Histogram<8> cum_discordant_not_ambiguous = reverse_cumulative( n_discordant_not_ambiguous );
443 
444  const uint32 HI_MAPQ_BIN = 6; // >= 32
445  {
446  html::html_object html( html_output );
447  {
448  const char* meta_list = "<meta http-equiv=\"refresh\" content=\"5\" />";
449 
450  html::header_object hd( html_output, "nv-aln-diff report", html::style(), meta_list );
451  {
452  html::body_object body( html_output );
453 
454  //
455  // alignment stats
456  //
457  {
458  html::table_object table( html_output, "alignment-stats", "stats", "alignment stats" );
459  {
460  html::tr_object tr( html_output, NULL );
461  html::th_object( html_output, html::FORMATTED, NULL, "" );
462  html::th_object( html_output, html::FORMATTED, NULL, "L = %s", aln_file_nameL );
463  html::th_object( html_output, html::FORMATTED, NULL, "R = %s", aln_file_nameR );
464  html::th_object( html_output, html::FORMATTED, NULL, "L & R" );
465  html::th_object( html_output, html::FORMATTED, NULL, "L \\ R" );
466  html::th_object( html_output, html::FORMATTED, NULL, "R \\ L" );
467  }
468  {
469  html::tr_object tr( html_output, "class", "alt", NULL );
470  html::th_object( html_output, html::FORMATTED, NULL, "mapped");
471  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(mapped.L) / float(n) );
472  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(mapped.R) / float(n) );
473  html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(mapped.L_and_R) / float(n) );
474  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(mapped.L_not_R) / float(n) );
475  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(mapped.R_not_L) / float(n) );
476  }
477  {
478  html::tr_object tr( html_output, NULL );
479  html::th_object( html_output, html::FORMATTED, NULL, "paired");
480  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(paired.L) / float(n) );
481  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(paired.R) / float(n) );
482  html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(paired.L_and_R) / float(n) );
483  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(paired.L_not_R) / float(n), 100.0f * float(paired_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
484  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(paired.R_not_L) / float(n), 100.0f * float(paired_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
485  }
486  {
487  html::tr_object tr( html_output, "class", "alt", NULL );
488  html::th_object( html_output, html::FORMATTED, NULL, "unique");
489  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(unique.L) / float(n) );
490  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(unique.R) / float(n) );
491  html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(unique.L_and_R) / float(n) );
492  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.L_not_R) / float(n), 100.0f * float(unique_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
493  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.R_not_L) / float(n), 100.0f * float(unique_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
494  }
495  {
496  html::tr_object tr( html_output, NULL );
497  html::th_object( html_output, html::FORMATTED, NULL, "ambiguous");
498  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(ambiguous.L) / float(n) );
499  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(ambiguous.R) / float(n) );
500  html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(ambiguous.L_and_R) / float(n) );
501  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.L_not_R) / float(n), 100.0f * float(ambiguous_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
502  html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.R_not_L) / float(n), 100.0f * float(ambiguous_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
503  }
504  }
505  //
506  // discordance stats
507  //
508  {
509  html::table_object table( html_output, "discordance-stats", "stats", "discordance stats" );
510  {
511  html::tr_object tr( html_output, NULL );
512  html::th_object( html_output, html::FORMATTED, NULL, "" );
513  html::th_object( html_output, html::FORMATTED, NULL, "items" );
514  html::th_object( html_output, html::FORMATTED, NULL, "%% of total" );
515  html::th_object( html_output, html::FORMATTED, NULL, "mate 1" );
516  html::th_object( html_output, html::FORMATTED, NULL, "mate 2" );
517  html::th_object( html_output, html::FORMATTED, NULL, "mate 1 & 2" );
518  html::th_object( html_output, html::FORMATTED, NULL, "unique" );
519  html::th_object( html_output, html::FORMATTED, NULL, "not-ambiguous" );
520  }
521  {
522  html::tr_object tr( html_output, NULL );
523  html::th_object( html_output, html::FORMATTED, NULL, "different reference" );
524  html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_different_ref[0]) * 1.0e-6f, float(cum_different_ref[HI_MAPQ_BIN]) * 1.0e-6f );
525  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref[0]) / float(n), 100.0f * float(cum_different_ref[HI_MAPQ_BIN]) / float(n) );
526  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref1[0]) / float(n), 100.0f * float(cum_different_ref1[HI_MAPQ_BIN]) / float(n) );
527  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref2[0]) / float(n), 100.0f * float(cum_different_ref2[HI_MAPQ_BIN]) / float(n) );
528  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref12[0]) / float(n), 100.0f * float(cum_different_ref12[HI_MAPQ_BIN]) / float(n) );
529  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref_unique[0]) / float(n), 100.0f * float(cum_different_ref_unique[HI_MAPQ_BIN]) / float(n) );
530  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref_not_ambiguous[0]) / float(n), 100.0f * float(cum_different_ref_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
531  }
532  {
533  html::tr_object tr( html_output, "class", "alt", NULL );
534  html::th_object( html_output, html::FORMATTED, NULL, "distant" );
535  html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_distant[0]) * 1.0e-6f, float(cum_distant[HI_MAPQ_BIN]) * 1.0e-6f );
536  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant[0]) / float(n), 100.0f * float(cum_distant[HI_MAPQ_BIN]) / float(n) );
537  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant1[0]) / float(n), 100.0f * float(cum_distant1[HI_MAPQ_BIN]) / float(n) );
538  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant2[0]) / float(n), 100.0f * float(cum_distant2[HI_MAPQ_BIN]) / float(n) );
539  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant12[0]) / float(n), 100.0f * float(cum_distant12[HI_MAPQ_BIN]) / float(n) );
540  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant_unique[0]) / float(n), 100.0f * float(cum_distant_unique[HI_MAPQ_BIN]) / float(n) );
541  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant_not_ambiguous[0]) / float(n), 100.0f * float(cum_distant_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
542  }
543  {
544  html::tr_object tr( html_output, NULL );
545  html::th_object( html_output, html::FORMATTED, NULL, "discordant" );
546  html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_discordant[0]) * 1.0e-6f, float(cum_discordant[HI_MAPQ_BIN]) * 1.0e-6f );
547  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant[0]) / float(n), 100.0f * float(cum_discordant[HI_MAPQ_BIN]) / float(n) );
548  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant1[0]) / float(n), 100.0f * float(cum_discordant1[HI_MAPQ_BIN]) / float(n) );
549  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant2[0]) / float(n), 100.0f * float(cum_discordant2[HI_MAPQ_BIN]) / float(n) );
550  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant12[0]) / float(n), 100.0f * float(cum_discordant12[HI_MAPQ_BIN]) / float(n) );
551  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant_unique[0]) / float(n), 100.0f * float(cum_discordant_unique[HI_MAPQ_BIN]) / float(n) );
552  html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant_not_ambiguous[0]) / float(n), 100.0f * float(cum_discordant_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
553  }
554  }
555  //
556  // summary stats
557  //
558  {
559  html::table_object table( html_output, "summary-stats", "stats", "summary stats" );
560  // ------------------------------------------- generic -------------------------------------------------- //
561  generate_summary_header( html_output );
562  generate_summary_cell( html_output, mapped_bps_name, "mapped bases", al_stats.longer_mapping, n, "none" );
563  generate_summary_cell( html_output, score_name, "score", al_stats.higher_score, n, "alt" );
564  generate_summary_cell( html_output, ed_name, "edit distance",al_stats.lower_ed, n, "none" );
565  generate_summary_cell( html_output, mapQ_name, "mapQ", al_stats.higher_mapQ, n, "alt" );
566  generate_summary_cell( html_output, mms_name, "mismatches", al_stats.lower_mms, n, "none" );
567  generate_summary_cell( html_output, ins_name, "insertions", al_stats.lower_ins, n, "alt" );
568  generate_summary_cell( html_output, dels_name, "deletions", al_stats.lower_dels, n, "none" );
569  generate_summary_cell( html_output, pos_name, "distance", al_stats.higher_pos, n, "alt" );
570  // ------------------------------------------- distant -------------------------------------------------- //
571  generate_summary_header( html_output );
572  generate_summary_cell( html_output, distant_mapped_bps_name, "mapped bases [distant]", distant_stats.longer_mapping, n, "none" );
573  generate_summary_cell( html_output, distant_ed_name, "edit distance [distant]",distant_stats.lower_ed, n, "alt" );
574  generate_summary_cell( html_output, distant_mapQ_name, "mapQ [distant]", distant_stats.higher_mapQ, n, "none" );
575  generate_summary_cell( html_output, distant_mms_name, "mismatches [distant]", distant_stats.lower_mms, n, "alt" );
576  generate_summary_cell( html_output, distant_ins_name, "insertions [distant]", distant_stats.lower_ins, n, "none" );
577  generate_summary_cell( html_output, distant_dels_name, "deletions [distant]", distant_stats.lower_dels, n, "alt" );
578  generate_summary_cell( html_output, distant_pos_name, "distance [distant]", distant_stats.higher_pos, n, "none" );
579  // ------------------------------------------- discordant ---------------------------------------------- //
580  generate_summary_header( html_output );
581  generate_summary_cell( html_output, discordant_mapped_bps_name, "mapped bases [discordant]", discordant_stats.longer_mapping, n, "alt" );
582  generate_summary_cell( html_output, discordant_ed_name, "edit distance [discordant]",discordant_stats.lower_ed, n, "none" );
583  generate_summary_cell( html_output, discordant_mapQ_name, "mapQ [discordant]", discordant_stats.higher_mapQ, n, "alt" );
584  generate_summary_cell( html_output, discordant_mms_name, "mismatches [discordant]", discordant_stats.lower_mms, n, "none" );
585  generate_summary_cell( html_output, discordant_ins_name, "insertions [discordant]", discordant_stats.lower_ins, n, "alt" );
586  generate_summary_cell( html_output, discordant_dels_name, "deletions [discordant]", discordant_stats.lower_dels, n, "none" );
587  generate_summary_cell( html_output, discordant_pos_name, "distance [discordant]", discordant_stats.higher_pos, n, "alt" );
588  }
589 
590  // paired L not R
592  html_output,
593  "by mapQ",
594  "paired (L \\ R) vs (R \\ L)",
595  LOG,
598  n,
599  n );
600 
601  // unique L not R
603  html_output,
604  "by mapQ",
605  "unique (L \\ R) vs (R \\ L)",
606  LOG,
609  n,
610  n );
611 
612  // unique L not R
614  html_output,
615  "by mapQ",
616  "ambiguous (L \\ R) vs (R \\ L)",
617  LOG,
620  n,
621  n );
622 
623  // different reference by mapQ
625  html_output,
626  "paired L & R",
627  "different reference by mapQ",
628  LOG,
630  paired.L_and_R );
631 
632  // different reference by mapQ
634  html_output,
635  "paired L & R",
636  "distant by mapQ",
637  LOG,
638  n_distant,
639  paired.L_and_R );
640 
641  // discordant reference by mapQ
643  html_output,
644  "paired L & R",
645  "discordant by mapQ",
646  LOG,
647  n_discordant,
648  paired.L_and_R );
649 
650  // second score by score
652  html_output,
653  "paired L & R",
654  "second score",
657  "score",
658  log_bins,
659  log_bins,
660  n,
661  paired.L_and_R,
662  false );
663 
664  // second score by score
665  /*generate_table2d(
666  html_output,
667  "paired L & R",
668  "second ed",
669  sec_ed_by_ed_l,
670  sec_ed_by_ed_r,
671  "ed",
672  ed_bins,
673  ed_bins,
674  n,
675  n,
676  false );*/
677  }
678  }
679  }
680  fclose( html_output );
681 }
682 
683 } // namespace alndiff
684 } // namespace nvbio