libStatGen Software 1
Tabix.h
1/*
2 * Copyright (C) 2012-2013 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __TABIX_H__
19#define __TABIX_H__
20
21#include <stdint.h>
22#include <vector>
23#include <map>
24#include <stdlib.h>
25
26#include "IndexBase.h"
27
28#include "InputFile.h"
29#include "StatGenStatus.h"
30
31class Tabix : public IndexBase
32{
33public:
34
35 enum Format
36 {
37 FORMAT_GENERIC = 0,
38 FORMAT_SAM = 1,
39 FORMAT_VCF = 2
40 };
41
42 Tabix();
43 virtual ~Tabix();
44
45 /// Reset the member data for a new index file.
46 void resetIndex();
47
48 // Read & parse the specified index file.
49 /// \param filename the bam index file to be read.
50 /// \return the status of the read.
51 StatGenStatus::Status readIndex(const char* filename);
52
53 /// Get the starting file offset to look for the specified start position.
54 /// For an entire reference ID, set start to -1.
55 /// To start at the beginning of the region, set start to 0/-1.
56 bool getStartPos(const char* refName, int32_t start,
57 uint64_t& fileStartPos) const;
58
59 /// Return the reference name at the specified index or
60 /// throws an exception if out of range.
61 const char* getRefName(unsigned int indexNum) const;
62
63 // Get the format of this tabix file.
64 inline int32_t getFormat() const { return myFormat.format; }
65
66private:
67 struct TabixFormat
68 {
69 int32_t format;
70 int32_t col_seq;
71 int32_t col_beg;
72 int32_t col_end;
73 int32_t meta; // character that starts header lines
74 int32_t skip; // Number of lines to skip from putting into the index.
75 };
76
77 TabixFormat myFormat;
78
79 char* myChromNamesBuffer;
80
81 // vector pointing to the chromosome names.
82 std::vector<const char*> myChromNamesVector;
83};
84
85
86#endif
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
Definition: Tabix.h:32
void resetIndex()
Reset the member data for a new index file.
Definition: Tabix.cpp:39
bool getStartPos(const char *refName, int32_t start, uint64_t &fileStartPos) const
Get the starting file offset to look for the specified start position.
Definition: Tabix.cpp:218
const char * getRefName(unsigned int indexNum) const
Return the reference name at the specified index or throws an exception if out of range.
Definition: Tabix.cpp:247
StatGenStatus::Status readIndex(const char *filename)
Definition: Tabix.cpp:52