40 fp = fopen(file_name,
"wt");
43 log_error(stderr,
"SamOutput: could not open %s for writing\n", file_name);
50 setvbuf(fp, NULL, _IOFBF, 256 * 1024);
62 void SamOutput::write_formatted_string(
const char *fmt, ...)
66 fwrite(
"\t", 1, 1, fp);
68 vfprintf(fp, fmt, args);
71 void SamOutput::write_string(
const char *str,
bool tab)
74 fwrite(
"\t", 1, 1, fp);
76 fwrite(str, 1, strlen(str), fp);
81 template <
typename T>
int itoa(
char *buf, T in)
84 bool negative =
false;
96 buf[len] =
"0123456789"[in % 10];
109 for(
int c = 0; c < len / 2; c++)
113 buf[c] = buf[len - c - 1];
114 buf[len - c - 1] = tmp;
124 void SamOutput::write_int(T i,
bool tab)
133 fwrite(
"\t", 1, 1, fp);
136 fwrite(str, len, 1, fp);
140 void SamOutput::write_tag(
const char *name, T value)
143 write_string(
":i:",
false);
144 write_int(value,
false);
148 void SamOutput::write_tag(
const char *name,
const char *value)
151 write_string(
":Z:",
false);
152 write_string(value,
false);
155 void SamOutput::linebreak(
void)
157 fwrite(
"\n", 1, 1, fp);
160 void SamOutput::output_header(
void)
162 write_string(
"@HD",
false);
163 write_string(
"VN:1.3");
169 write_string(
"@RG",
false);
170 write_formatted_string(
"ID:%s",
rg_id.c_str());
174 write_string(
rg_string.c_str(), false );
179 write_string(
"@PG",
false);
180 write_formatted_string(
"ID:%s",
pg_id.c_str());
181 write_formatted_string(
"PN:%s",
pg_name.c_str());
182 write_formatted_string(
"VN:%s",
pg_version.c_str());
183 write_formatted_string(
"CL:\"%s\"",
pg_args.c_str());
190 write_string(
"@SQ",
false);
199 uint32 SamOutput::generate_cigar_string(
struct SamAlignment& sam_align,
200 const AlignmentData& alignment)
202 char *output = sam_align.cigar;
205 for(
uint32 i = 0; i < alignment.cigar_len; i++)
207 const Cigar& cigar_entry = alignment.cigar[alignment.cigar_len - i - 1u];
208 const char cigar_op =
"MIDS"[cigar_entry.m_type];
212 len = itoa(output, cigar_entry.m_len);
219 assert((
unsigned long)(output - sam_align.cigar) < (
unsigned long) (
sizeof(sam_align.cigar) - 1));
223 read_len += cigar_entry.m_len;
233 uint32 SamOutput::generate_md_string(SamAlignment& sam_align,
const AlignmentData& alignment)
235 if (alignment.mds_vec == NULL)
237 log_warning(stderr,
" SAM: alignment %u from read %u has an empty MD string\n", alignment.aln_id, alignment.read_id);
238 sam_align.md_string[0] =
'\0';
241 const uint32 mds_len =
uint32(alignment.mds_vec[0]) | (
uint32(alignment.mds_vec[1]) << 8);
243 char *buffer = sam_align.md_string;
255 const uint8 op = alignment.mds_vec[i++];
260 uint8 l = alignment.mds_vec[i++];
263 while (i < mds_len && alignment.mds_vec[i] ==
MDS_MATCH)
264 l += alignment.mds_vec[i++];
266 buffer_len += itoa(buffer + buffer_len, l);
273 const char c =
dna_to_char(alignment.mds_vec[i++]);
274 buffer[buffer_len++] = c;
283 const uint8 l = alignment.mds_vec[i++];
287 sam_align.gape += l - 1;
294 const uint8 l = alignment.mds_vec[i++];
296 buffer[buffer_len++] =
'^';
297 for(
uint8 n = 0; n < l; n++)
299 buffer[buffer_len++] =
dna_to_char(alignment.mds_vec[i++]);
302 buffer[buffer_len++] =
'0';
305 sam_align.gape += l - 1;
310 }
while(i < mds_len);
312 buffer[buffer_len] =
'\0';
317 void SamOutput::output_alignment(
const struct SamAlignment& sam_align)
319 write_string(sam_align.qname,
false);
320 write_int((
uint32)sam_align.flags);
325 if (sam_align.flags & SAM_FLAGS_UNMAPPED)
328 write_string(
"*\t0\t0\t*\t*\t0\t0");
329 write_string(sam_align.seq);
330 write_string(sam_align.qual);
337 write_string(sam_align.rname);
338 write_int(sam_align.pos);
339 write_int(sam_align.mapq);
340 write_string(sam_align.cigar);
343 write_string(sam_align.rnext);
347 write_int(sam_align.pnext);
348 write_int(sam_align.tlen);
350 write_string(sam_align.seq);
351 write_string(sam_align.qual);
353 write_tag(
"NM", sam_align.ed);
354 write_tag(
"AS", sam_align.score);
355 if (sam_align.second_score_valid)
356 write_tag(
"XS", sam_align.second_score);
358 write_tag(
"XM", sam_align.mm);
359 write_tag(
"XO", sam_align.gapo);
360 write_tag(
"XG", sam_align.gape);
361 if (sam_align.md_string[0])
362 write_tag(
"MD", sam_align.md_string);
364 write_tag(
"MD",
"*");
369 uint32 SamOutput::process_one_alignment(
const AlignmentData& alignment,
370 const AlignmentData& mate)
373 SamAlignment sam_align;
387 sam_align.qname = alignment.read_name;
390 for(
uint32 i = 0; i < alignment.read_len; i++)
394 if (alignment.aln->m_rc)
397 s = complement(alignment.read_data[i]);
400 s = alignment.read_data[alignment.read_len - i - 1];
404 sam_align.seq[alignment.read_len] =
'\0';
407 for(
uint32 i = 0; i < alignment.read_len; i++)
411 if (alignment.aln[
MATE_1].m_rc)
412 q = alignment.qual[i];
414 q = alignment.qual[alignment.read_len - i - 1];
416 sam_align.qual[i] = q + 33;
418 sam_align.qual[alignment.read_len] =
'\0';
421 sam_align.mapq = alignment.mapq;
424 if (!(alignment.aln->is_aligned() || sam_align.mapq <
mapq_filter))
426 sam_align.flags = SAM_FLAGS_UNMAPPED;
428 sam_align.md_string[0] =
'\0';
431 output_alignment(sam_align);
436 sam_align.flags = (alignment.aln->mate() ? SAM_FLAGS_READ_2 : SAM_FLAGS_READ_1);
437 if (alignment.aln->m_rc)
438 sam_align.flags |= SAM_FLAGS_REVERSE;
444 sam_align.flags |= SAM_FLAGS_PAIRED;
446 if (mate.aln->is_concordant())
447 sam_align.flags |= SAM_FLAGS_PROPER_PAIR;
449 if (!mate.aln->is_aligned())
450 sam_align.flags |= SAM_FLAGS_MATE_UNMAPPED;
452 if (mate.aln->is_rc())
453 sam_align.flags |= SAM_FLAGS_MATE_REVERSE;
460 sam_align.flags |= SAM_FLAGS_UNMAPPED;
469 uint32 computed_cigar_len = generate_cigar_string(sam_align, alignment);
471 if (computed_cigar_len != alignment.read_len)
473 log_error(stderr,
"SAM output : cigar length doesn't match read %u (%u != %u)\n",
475 computed_cigar_len, alignment.read_len);
476 return sam_align.mapq;
481 if (mate.aln->is_aligned())
491 if (o_seq_index == seq_index)
492 sam_align.rnext =
"=";
497 if (o_seq_index != seq_index)
501 sam_align.tlen =
nvbio::max(mate.cigar_pos + o_ref_cigar_len,
502 alignment.cigar_pos + ref_cigar_len) -
503 nvbio::min(mate.cigar_pos, alignment.cigar_pos);
505 if (mate.cigar_pos < alignment.cigar_pos)
506 sam_align.tlen = -sam_align.tlen;
510 sam_align.rnext =
"=";
511 sam_align.pnext = (int)(alignment.cigar_pos -
bnt.
sequence_index[ seq_index ] + 1);
516 sam_align.rnext = NULL;
522 sam_align.ed = alignment.aln->ed();
523 sam_align.score = alignment.aln->score();
525 sam_align.second_score_valid =
false;
527 generate_md_string(sam_align, alignment);
530 output_alignment(sam_align);
532 return sam_align.mapq;
547 process_one_alignment(alignment, mate);
566 process_one_alignment(alignment, mate);
567 process_one_alignment(mate, alignment);