30 #include <zlib/zlib.h>
40 const uint32 truncate_read_len,
44 fp =
gzopen(read_file_name,
"rt");
48 log_error(stderr,
"unable to open SAM file %s\n", read_file_name);
64 bool ReadDataFile_SAM::readLine(
void)
77 linebuf = (
char *) malloc(linebuf_size);
81 log_error(stderr,
"out of memory reading SAM file\n");
88 start_file_pos =
gztell(fp);
94 linebuf[linebuf_size - 2] =
'\0';
97 ret =
gzgets(fp, &linebuf[cur_buf_pos], linebuf_size - cur_buf_pos);
105 if (linebuf[linebuf_size - 2] ==
'\0')
112 cur_buf_pos = linebuf_size - 1;
115 tmp = (
char *) realloc(linebuf, linebuf_size);
118 log_error(stderr,
"out of memory reading SAM file\n");
128 line_length =
gztell(fp) - start_file_pos;
131 if (linebuf[line_length - 1] ==
'\n')
133 assert(linebuf[line_length] ==
'\0');
134 linebuf[line_length - 1] =
'\0';
141 void ReadDataFile_SAM::rewindLine(
void)
168 if (linebuf[0] !=
'@')
174 delim = strchr(linebuf,
'\t');
178 if (strncmp(linebuf,
"@HD\t", strlen(
"@HD\t")) == 0)
180 ret = parseHeaderLine(delim + 1);
185 }
else if (strncmp(linebuf,
"@SQ\t", strlen(
"@SQ\t")) == 0)
187 ret = parseReferenceSequenceLine(delim + 1);
192 }
else if (strncmp(linebuf,
"@RG\t", strlen(
"@RG\t")) == 0) {
195 }
else if (strncmp(linebuf,
"@PG\t", strlen(
"@PG\t")) == 0) {
198 }
else if (strncmp(linebuf,
"@CO\t", strlen(
"@CO\t")) == 0) {
202 log_warning(stderr,
"SAM file warning: unknown header at line %d\n", numLines);
205 log_warning(stderr,
"SAM file warning: malformed line %d\n", numLines);
207 }
while(linebuf[0] ==
'@');
217 bool ReadDataFile_SAM::parseHeaderLine(
char *start)
219 char *version = NULL;
224 log_warning(stderr,
"SAM file warning (line %d): @HD not the first line in the header section\n", numLines);
230 delim = strchr(start,
'\t');
238 if (strncmp(start,
"VN:", strlen(
"VN:")) == 0)
241 }
else if (strncmp(start,
"SO:", strlen(
"SO:")) == 0) {
242 if(strcmp(&start[3],
"unknown") == 0)
245 }
else if (strcmp(&start[3],
"unsorted") == 0) {
247 }
else if (strcmp(&start[3],
"queryname") == 0) {
249 }
else if (strcmp(&start[3],
"coordinate") == 0) {
252 log_warning(stderr,
"SAM file warning (line %d): invalid sort order %s\n", numLines, &start[3]);
255 log_warning(stderr,
"SAM file warning (line %d): invalid tag %s in @HD\n", numLines, start);
270 log_warning(stderr,
"SAM file warning (line %d): header does not contain a version tag\n", numLines);
278 bool ReadDataFile_SAM::parseReferenceSequenceLine(
char *start)
280 char *seq_name = NULL;
281 char *seq_len = NULL;
287 delim = strchr(start,
'\t');
295 if (strncmp(start,
"SN:", strlen(
"SN:")) == 0)
297 if (seq_name != NULL)
299 log_warning(stderr,
"SAM file warning (line %d): multiple SN tags in @SQ record\n", numLines);
301 seq_name = &start[3];
303 }
else if (strncmp(start,
"LN:", strlen(
"LN:")) == 0) {
306 log_warning(stderr,
"SAM file warning (line %d): multiple LN tags in @SQ record\n", numLines);
322 if (seq_name == NULL || seq_len == NULL)
324 log_warning(stderr,
"SAM file warning (line %d): missing required tags in @SQ record\n", numLines);
330 uint64 len = strtol(seq_len, &endptr, 10);
332 uint64 len = strtoll(seq_len, &endptr, 10);
334 if (!endptr || endptr == seq_len || *endptr !=
'\0')
336 log_warning(stderr,
"SAM file warning (line %d): invalid sequence length in @SQ record\n", numLines);
339 sq_names.push_back(std::string(seq_name));
369 if (readLine() ==
false)
375 #define NEXT(prev, next) \
377 next = strchr(prev, '\t'); \
379 log_error(stderr, "Error parsing SAM file (line %d): incomplete alignment section\n", numLines); \
380 m_file_state = FILE_PARSE_ERROR; \
405 read_flags = strtol(flag, NULL, 0);