22 static void error(
const char *format, ...)
26 vfprintf(stderr, format, ap);
38 #define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF)
42 int l = strlen(fname);
43 int strcasecmp(
const char *s1,
const char *s2);
44 if (l>=7 && strcasecmp(fname+l-7,
".gff.gz") == 0)
return IS_GFF;
45 else if (l>=7 && strcasecmp(fname+l-7,
".bed.gz") == 0)
return IS_BED;
46 else if (l>=7 && strcasecmp(fname+l-7,
".sam.gz") == 0)
return IS_SAM;
47 else if (l>=7 && strcasecmp(fname+l-7,
".vcf.gz") == 0)
return IS_VCF;
48 else if (l>=4 && strcasecmp(fname+l-4,
".bcf") == 0)
return IS_BCF;
49 else if (l>=4 && strcasecmp(fname+l-4,
".bam") == 0)
return IS_BAM;
53 #define PRINT_HEADER 1
55 static int query_regions(
char **argv,
int argc,
int mode)
57 char *fname = argv[0];
60 if ( ftype &
IS_TXT || !ftype )
63 if ( !fp ) error(
"Could not read %s\n", fname);
65 if ( !tbx ) error(
"Could not load .tbi index of %s\n", fname);
77 for (i=1; i<argc; i++)
86 if (
hts_close(fp) ) error(
"hts_close returned non-zero status: %s\n", fname);
92 if ( !fp ) error(
"Could not read %s\n", fname);
94 if ( !out ) error(
"Could not open stdout\n", fname);
96 if ( !idx ) error(
"Could not load .csi index of %s\n", fname);
98 if ( !hdr ) error(
"Could not read the header: %s\n", fname);
106 for (i=1; i<argc; i++)
109 if ( !itr )
continue;
115 if (
hts_close(fp) ) error(
"hts_close returned non-zero status: %s\n", fname);
116 if (
hts_close(out) ) error(
"hts_close returned non-zero status for stdout\n");
121 error(
"Please use \"samtools view\" for querying BAM files.\n");
124 static int query_chroms(
char *fname)
128 if ( ftype &
IS_TXT || !ftype )
131 if ( !tbx ) error(
"Could not load .tbi index of %s\n", fname);
133 for (i=0; i<nseq; i++)
134 printf(
"%s\n", seq[i]);
141 if ( !fp ) error(
"Could not read %s\n", fname);
143 if ( !hdr ) error(
"Could not read the header: %s\n", fname);
146 if ( !idx ) error(
"Could not load .csi index of %s\n", fname);
148 for (i=0; i<nseq; i++)
149 printf(
"%s\n", seq[i]);
155 error(
"BAM: todo\n");
161 if ( ftype &
IS_TXT || !ftype )
175 if ( buffer[skip_until]==
'\n' )
184 if ( buffer[skip_until]!=conf->
meta_char )
break;
196 FILE *hdr = fopen(header,
"r");
197 if ( !hdr ) error(
"%s: %s", header,strerror(errno));
198 int page_size = getpagesize();
199 char *buf = valloc(page_size);
202 while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
204 if ( nread<page_size-1 && buf[nread-1]!=
'\n' ) buf[nread++] =
'\n';
205 if (
bgzf_write(bgzf_out, buf, nread) < 0) error(
"Error: %d\n",bgzf_out->
errcode);
207 if ( fclose(hdr) ) error(
"close failed: %s\n", header);
219 if ( nread<=0 )
break;
222 if (count != nread) error(
"Write failed, wrote %d instead of %d bytes.\n", count,(
int)nread);
228 error(
"todo: reheader BCF, BAM\n");
232 static int usage(
void)
234 fprintf(stderr,
"\n");
236 fprintf(stderr,
"Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n");
237 fprintf(stderr,
"Options:\n");
238 fprintf(stderr,
" -0, --zero-based coordinates are zero-based\n");
239 fprintf(stderr,
" -b, --begin INT column number for region start [4]\n");
240 fprintf(stderr,
" -c, --comment CHAR skip comment lines starting with CHAR [null]\n");
241 fprintf(stderr,
" -e, --end INT column number for region end (if no end, set INT to -b) [5]\n");
242 fprintf(stderr,
" -f, --force overwrite existing index without asking\n");
243 fprintf(stderr,
" -h, --print-header print also the header lines\n");
244 fprintf(stderr,
" -H, --only-header print only the header lines\n");
245 fprintf(stderr,
" -l, --list-chroms list chromosome names\n");
246 fprintf(stderr,
" -m, --min-shift INT set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
247 fprintf(stderr,
" -p, --preset STR gff, bed, sam, vcf, bcf, bam\n");
248 fprintf(stderr,
" -r, --reheader FILE replace the header with the content of FILE\n");
249 fprintf(stderr,
" -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n");
250 fprintf(stderr,
" -S, --skip-lines INT skip first INT lines [0]\n");
251 fprintf(stderr,
"\n");
255 int main(
int argc,
char *argv[])
257 int c, min_shift = -1, is_force = 0, list_chroms = 0, mode = 0;
259 char *reheader = NULL;
261 static struct option loptions[] =
264 {
"zero-based",0,0,
'0'},
265 {
"print-header",0,0,
'h'},
266 {
"only-header",0,0,
'H'},
272 {
"sequence",1,0,
's'},
273 {
"skip-lines",1,0,
'S'},
274 {
"list-chroms",0,0,
'l'},
275 {
"reheader",1,0,
'r'},
279 while ((c = getopt_long(argc, argv,
"hH?0b:c:e:fm:p:s:S:lr:", loptions,NULL)) >= 0)
283 case 'r': reheader = optarg;
break;
286 case 'l': list_chroms = 1;
break;
288 case 'b': conf.
bc = atoi(optarg);
break;
289 case 'e': conf.
ec = atoi(optarg);
break;
290 case 'c': conf.
meta_char = *optarg;
break;
291 case 'f': is_force = 1;
break;
292 case 'm': min_shift = atoi(optarg);
break;
294 if (strcmp(optarg,
"gff") == 0) conf_ptr = &
tbx_conf_gff;
295 else if (strcmp(optarg,
"bed") == 0) conf_ptr = &
tbx_conf_bed;
296 else if (strcmp(optarg,
"sam") == 0) conf_ptr = &
tbx_conf_sam;
297 else if (strcmp(optarg,
"vcf") == 0) conf_ptr = &
tbx_conf_vcf;
298 else error(
"The preset string not recognised: '%s'\n", optarg);
300 case 's': conf.
sc = atoi(optarg);
break;
301 case 'S': conf.
line_skip = atoi(optarg);
break;
302 default:
return usage();
306 if ( optind==argc )
return usage();
309 return query_chroms(argv[optind]);
312 return query_regions(&argv[optind], argc-optind, mode);
314 char *fname = argv[optind];
324 if ( min_shift <= 0 ) min_shift = 14;
328 if ( min_shift <= 0 ) min_shift = 14;
337 char *suffix = min_shift <= 0 ?
".tbi" : (ftype==
IS_BAM ?
".bai" :
".csi");
338 char *idx_fname = calloc(strlen(fname) + 5, 1);
339 strcat(strcpy(idx_fname, fname), suffix);
341 struct stat stat_tbi, stat_file;
342 if ( !is_force && stat(idx_fname, &stat_tbi)==0 )
347 stat(fname, &stat_file);
348 if ( stat_file.st_mtime <= stat_tbi.st_mtime )
349 error(
"[tabix] the index file exists. Please use '-f' to overwrite.\n");
357 if (
bcf_index_build(fname, min_shift)!=0 ) error(
"bcf_index_build failed: %s\n", fname);
362 if (
bam_index_build(fname, min_shift)!=0 ) error(
"bam_index_build failed: %s\n", fname);
365 if (
tbx_index_build(fname, min_shift, &conf)!=0 ) error(
"tbx_index_build failed: %s\n", fname);
370 if (
tbx_index_build(fname, min_shift, &conf) ) error(
"tbx_index_build failed: %s\n", fname);