libStatGen Software 1
Loading...
Searching...
No Matches
BaseAsciiMap.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef _BASE_ASCII_MAP_H
19#define _BASE_ASCII_MAP_H
20
21#include "StringBasics.h"
22
23/// Map between characters and the associated base type.
25{
26public:
27 /// Value associated with 'N' in the ascii to base map (bad read).
28 static const int baseNIndex = 004;
29 /// Value associated with any non-base character in the ascii to base
30 /// map (unknown, bad data).
31 static const int baseXIndex = 005;
32
33 // Two arrays for converting back and forth between base pair character
34 // value (ASCII) to a base integer in the range 0..3. Note there is actually
35 // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me).
36 //
37 /// Convert from int representation to the base.
38 static const char int2base[];
39 /// Convert from int representation to colorspace representation.
40 static const char int2colorSpace[];
41 static unsigned char base2complement[];
42
43 /// The type of space (color or base) to use in the mapping.
45 /// Base decision on the first raw seq character/type has yet
46 /// to be determined.
48 BASE_SPACE, ///< Bases only (A,C,G,T,N).
49 COLOR_SPACE ///< Color space only (0,1,2,3,.).
50 };
51
52 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
53 /// both base and color space.
54 /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3;
55 /// 'N'/'n'/'4' -> 4; anything else -> 5.
56 static unsigned char baseColor2int[256+1]; // base space read (ATCG)
57 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
58 /// just base space (ACTGNactgn).
59 /// 'A'/'a' -> 0; 'C'/'c' -> 1; 'G'/'g' -> 2; 'T'/'t' -> 3;
60 /// 'N'/'n' -> 4; anything else -> 5.
61 static unsigned char base2int[256+1]; // base space read (ATCG)
62 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
63 /// just color space (0123).
64 /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5.
65 static unsigned char color2int[256+1]; // base space read (ATCG)
66
67public:
70
71 /// Set the base type based on the passed in option.
72 inline void setBaseMapType(SPACE_TYPE spaceType)
73 {
75 //First check to see if it is in base space.
76 switch (spaceType)
77 {
78 case BASE_SPACE:
79 // base space.
80 myBase2IntMapPtr = base2int;
81 break;
82 case COLOR_SPACE:
83 // color space.
84 myBase2IntMapPtr = color2int;
85 break;
86 default:
87 // Unknown map type, zero the pointer.
88 myBase2IntMapPtr = NULL;
89 break;
90 }
91 };
92
93 /// Returns the baseIndex value for the character passed in.
94 inline int getBaseIndex(const char& letter)
95 {
96 if (myBase2IntMapPtr == NULL)
97 {
98 // Check to see if we have hit the number of primer bases.
99 if (myPrimerCount < myNumPrimerBases)
100 {
101 // Still expecting primer bases, so lookup
102 // the letter in the base map.
103 ++myPrimerCount;
104 return(base2int[(int)letter]);
105 }
106
107 // Have already processed all the primers, so determine
108 // whether this is base or color space.
109
110 // Need to determime the base type.
111 setBaseMapType(letter);
112
113 // If it is still null, return invalid. Will be set when the first
114 // letter is either color or base.
115 if (myBase2IntMapPtr == NULL)
116 {
117 return(baseXIndex);
118 }
119 }
120
121 // Also check if configured as color space that the primers are correct.
122 if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases))
123 {
124 // Still expecting primer bases, so lookup
125 // the letter in the base map.
126 ++myPrimerCount;
127 return(base2int[(int)letter]);
128 }
129
130 return myBase2IntMapPtr[(int)letter];
131 }
132
133 /// Return the space type that is currently set.
135 {
136 if (myBase2IntMapPtr == base2int)
137 {
138 return(BASE_SPACE);
139 }
140 else if (myBase2IntMapPtr == color2int)
141 {
142 return(COLOR_SPACE);
143 }
144 else
145 {
146 return(UNKNOWN);
147 }
148 }
149
150 /// Set the number of primer bases expected before the actual
151 /// base/color space type occurs for the rest of the entries.
152 void setNumPrimerBases(int numPrimerBases)
153 {
154 myNumPrimerBases = numPrimerBases;
155 }
156
157 /// Reset the number of primers to 0.
159 {
160 myPrimerCount = 0;
161 };
162
163 /// Reset the base mapping type to UNKNOWN.
165 {
166 myBase2IntMapPtr = NULL;
168 };
169
170private:
171 // Set the base type based on the passed in letter.
172 // If the letter is in neither the color space or the base space, both
173 // will be allowed.
174 inline void setBaseMapType(const char& letter)
175 {
176 //First check to see if it is in base space.
177 if (base2int[(int)letter] != baseXIndex)
178 {
179 // This is a valid base space index, so it is base space.
180 myBase2IntMapPtr = base2int;
181 }
182 else if (color2int[(int)letter] != baseXIndex)
183 {
184 // This is a valid color space index, so it is base space.
185 myBase2IntMapPtr = color2int;
186 }
187 else
188 {
189 // Unknown map type, zero the pointer.
190 myBase2IntMapPtr = NULL;
191 }
192 };
193
194
195 // The number of primer bases to expect for a color-space file.
196 unsigned int myNumPrimerBases;
197
198 // This is the number of primer bases that have been seen since
199 // the map type was set/reset.
200 unsigned int myPrimerCount;
201
202 unsigned char* myBase2IntMapPtr;
203};
204
205#endif
Map between characters and the associated base type.
static unsigned char base2complement[]
This table maps 5' base space to the 3' complement base space values, as well as 5' color space value...
void setBaseMapType(SPACE_TYPE spaceType)
Set the base type based on the passed in option.
static unsigned char color2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just color space (0123).
static const int baseXIndex
Value associated with any non-base character in the ascii to base map (unknown, bad data).
static unsigned char base2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just base space (ACTGNactgn).
void setNumPrimerBases(int numPrimerBases)
Set the number of primer bases expected before the actual base/color space type occurs for the rest o...
static const char int2colorSpace[]
Convert from int representation to colorspace representation.
void resetBaseMapType()
Reset the base mapping type to UNKNOWN.
void resetPrimerCount()
Reset the number of primers to 0.
SPACE_TYPE
The type of space (color or base) to use in the mapping.
@ COLOR_SPACE
Color space only (0,1,2,3,.).
@ UNKNOWN
Base decision on the first raw seq character/type has yet to be determined.
@ BASE_SPACE
Bases only (A,C,G,T,N).
static unsigned char baseColor2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for both base and color space.
int getBaseIndex(const char &letter)
Returns the baseIndex value for the character passed in.
SPACE_TYPE getSpaceType()
Return the space type that is currently set.
static const char int2base[]
Convert from int representation to the base.
static const int baseNIndex
Value associated with 'N' in the ascii to base map (bad read).