30 #include <zlib/zlib.h>
40 const char* read_file_name,
44 fp =
gzopen(read_file_name,
"rt");
48 log_error(stderr,
"unable to open SAM file %s\n", read_file_name);
62 bool SequenceDataFile_SAM::readLine(
void)
75 linebuf = (
char *) malloc(linebuf_size);
79 log_error(stderr,
"out of memory reading SAM file\n");
86 start_file_pos =
gztell(fp);
92 linebuf[linebuf_size - 2] =
'\0';
95 ret =
gzgets(fp, &linebuf[cur_buf_pos], linebuf_size - cur_buf_pos);
103 if (linebuf[linebuf_size - 2] ==
'\0')
110 cur_buf_pos = linebuf_size - 1;
113 tmp = (
char *) realloc(linebuf, linebuf_size);
116 log_error(stderr,
"out of memory reading SAM file\n");
126 line_length =
gztell(fp) - start_file_pos;
129 if (linebuf[line_length - 1] ==
'\n')
131 assert(linebuf[line_length] ==
'\0');
132 linebuf[line_length - 1] =
'\0';
139 void SequenceDataFile_SAM::rewindLine(
void)
166 if (linebuf[0] !=
'@')
172 delim = strchr(linebuf,
'\t');
176 if (strncmp(linebuf,
"@HD\t", strlen(
"@HD\t")) == 0)
178 ret = parseHeaderLine(delim + 1);
183 }
else if (strncmp(linebuf,
"@SQ\t", strlen(
"@SQ\t")) == 0)
185 ret = parseReferenceSequenceLine(delim + 1);
190 }
else if (strncmp(linebuf,
"@RG\t", strlen(
"@RG\t")) == 0) {
193 }
else if (strncmp(linebuf,
"@PG\t", strlen(
"@PG\t")) == 0) {
196 }
else if (strncmp(linebuf,
"@CO\t", strlen(
"@CO\t")) == 0) {
200 log_warning(stderr,
"SAM file warning: unknown header at line %d\n", numLines);
203 log_warning(stderr,
"SAM file warning: malformed line %d\n", numLines);
205 }
while(linebuf[0] ==
'@');
215 bool SequenceDataFile_SAM::parseHeaderLine(
char *start)
217 char *version = NULL;
222 log_warning(stderr,
"SAM file warning (line %d): @HD not the first line in the header section\n", numLines);
228 delim = strchr(start,
'\t');
236 if (strncmp(start,
"VN:", strlen(
"VN:")) == 0)
239 }
else if (strncmp(start,
"SO:", strlen(
"SO:")) == 0) {
240 if(strcmp(&start[3],
"unknown") == 0)
243 }
else if (strcmp(&start[3],
"unsorted") == 0) {
245 }
else if (strcmp(&start[3],
"queryname") == 0) {
247 }
else if (strcmp(&start[3],
"coordinate") == 0) {
250 log_warning(stderr,
"SAM file warning (line %d): invalid sort order %s\n", numLines, &start[3]);
253 log_warning(stderr,
"SAM file warning (line %d): invalid tag %s in @HD\n", numLines, start);
268 log_warning(stderr,
"SAM file warning (line %d): header does not contain a version tag\n", numLines);
276 bool SequenceDataFile_SAM::parseReferenceSequenceLine(
char *start)
278 char *seq_name = NULL;
279 char *seq_len = NULL;
285 delim = strchr(start,
'\t');
293 if (strncmp(start,
"SN:", strlen(
"SN:")) == 0)
295 if (seq_name != NULL)
297 log_warning(stderr,
"SAM file warning (line %d): multiple SN tags in @SQ record\n", numLines);
299 seq_name = &start[3];
301 }
else if (strncmp(start,
"LN:", strlen(
"LN:")) == 0) {
304 log_warning(stderr,
"SAM file warning (line %d): multiple LN tags in @SQ record\n", numLines);
320 if (seq_name == NULL || seq_len == NULL)
322 log_warning(stderr,
"SAM file warning (line %d): missing required tags in @SQ record\n", numLines);
328 uint64 len = strtol(seq_len, &endptr, 10);
330 uint64 len = strtoll(seq_len, &endptr, 10);
332 if (!endptr || endptr == seq_len || *endptr !=
'\0')
334 log_warning(stderr,
"SAM file warning (line %d): invalid sequence length in @SQ record\n", numLines);
337 sq_names.push_back(std::string(seq_name));
386 if (readLine() ==
false)
392 #define NEXT(prev, next) \
394 next = strchr(prev, '\t'); \
396 log_error(stderr, "Error parsing SAM file (line %d): incomplete alignment section\n", numLines); \
397 m_file_state = FILE_PARSE_ERROR; \
422 read_flags = strtol(flag, NULL, 0);