NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
nvbio-less.cpp
Go to the documentation of this file.
1 // nvbio-less.cpp : Defines the entry point for the console application.
2 //
3 
4 #include <nvbio/basic/types.h>
6 #include <nvbio/basic/dna.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string>
10 
11 using namespace nvbio;
12 
13 struct Options
14 {
16  bool line_nums;
17 };
18 
19 template <typename WordType>
20 void read(FILE* file, const uint32 begin, const uint32 end, const Options opts)
21 {
22  const uint32 bps_per_word = sizeof(WordType) * 4;
23  const uint32 begin_word = begin / bps_per_word;
24  const uint32 end_word = (end+bps_per_word-1) / bps_per_word;
25 
26  const uint32 n_bps = end - begin;
27 
28  uint32 i_bps = 0u;
29  uint32 line_num = 0u;
30 
31  fseek( file, begin_word * sizeof(WordType), SEEK_CUR );
32  for (uint32 w = begin_word; w < end_word; w += 1024)
33  {
34  const uint32 n_words = std::min( end_word - w, 1024u );
35 
36  WordType words[1024];
37  fread( words, sizeof(WordType), n_words, file );
38 
40 
41  stream_type stream( words );
42 
43  for (uint32 i = 0; i < n_words * bps_per_word && i_bps < n_bps; ++i, ++i_bps)
44  {
45  if (((i_bps) % opts.row_length) == 0)
46  {
47  fprintf(stdout,"\n");
48  if (opts.line_nums)
49  fprintf(stdout, "%03u ", line_num++ );
50  }
51 
52  const char bp = dna_to_char( stream[i] );
53  fprintf(stdout,"%c", bp);
54  }
55  }
56  fflush(stdout);
57 }
58 
59 void read_ft(FILE* file, const uint32 begin, const uint32 end)
60 {
61  fseek( file, sizeof(uint32) * begin, SEEK_CUR );
62 
63  for (uint32 i = begin; i < end; ++i)
64  {
65  uint32 read_id = 0;
66 
67  fread( &read_id, sizeof(uint32), 1, file );
68 
69  fprintf(stdout, "%u\n", read_id);
70  }
71 }
72 
73 int main(int argc, char* argv[])
74 {
75  if (argc < 3 || (argc == 2 && (strcmp(argv[1],"--help") == 0 || strcmp(argv[1],"-h") == 0)))
76  {
77  printf("nvbio-less is a program to read dna fragments from {.pac|.rpac|.bwt|.rbwt} files.\n");
78  printf("Copyright NVIDIA Corporation 2012. All rights reserved.\n");
79  printf("\nUSAGE:\n");
80  printf("nvbio-less [options] begin end filename\n");
81  printf("options:\n");
82  printf(" -r row length specify output row length\n");
83  printf(" -ln print line numbers\n");
84  printf("e.g:\n");
85  printf(" nvbio-less -r 50 0 100 mydna.pac\n");
86  }
87  else
88  {
89  uint32 begin, end;
90 
91  Options opts;
92  opts.row_length = uint32(-1);
93  opts.line_nums = false;
94 
95  uint32 n_options = 0;
96  for (; n_options < argc-4; ++n_options)
97  {
98  if (argv[n_options+1][0] != '-')
99  break;
100 
101  if (strcmp( argv[n_options+1], "-r" ) == 0)
102  opts.row_length = atoi( argv[++n_options + 1] );
103  else if (strcmp( argv[n_options+1], "-ln" ) == 0)
104  opts.line_nums = true;
105  }
106 
107  sscanf( argv[1+n_options], "%u", &begin );
108  sscanf( argv[2+n_options], "%u", &end );
109  const std::string filename = argv[3+n_options];
110 
111  FILE* file = fopen( filename.c_str(), "rb" );
112  if (file == NULL)
113  {
114  fprintf(stderr, "error: file \"%s\" not found\n", filename.c_str());
115  exit(1);
116  }
117 
118  if (filename.find(".ft") != std::string::npos)
119  {
120  // filter file produced by nvbio-aln-diff
121  read_ft( file, begin, end );
122 
123  fclose( file );
124  return 0;
125  }
126 
127  if (filename.find(".pac") != std::string::npos ||
128  filename.find(".rpac") != std::string::npos)
129  read<uint8>( file, begin, end, opts );
130  else
131  if (filename.find(".wpac") != std::string::npos ||
132  filename.find(".bwt") != std::string::npos ||
133  filename.find(".rbwt") != std::string::npos)
134  {
135  // skip header
136  if (filename.find(".wpac") != std::string::npos)
137  fseek( file, 8u, SEEK_CUR );
138  else if (filename.find(".bwt") != std::string::npos ||
139  filename.find(".rbwt") != std::string::npos)
140  fseek( file, 20u, SEEK_CUR );
141 
142  read<uint32>( file, begin, end, opts );
143  }
144  else
145  {
146  fclose( file );
147 
148  fprintf(stderr, "error: unsupported file type\n");
149  exit(1);
150  }
151 
152  fclose( file );
153  }
154  return 0;
155 }
156