libStatGen Software 1
Loading...
Searching...
No Matches
CigarHelper.cpp
1/*
2 * Copyright (C) 2011 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18//////////////////////////////////////////////////////////////////////////
19
20#include "CigarHelper.h"
21
22// Soft Clip from the beginning of the read to the specified reference position.
24 int32_t refPosition0Based,
25 CigarRoller& newCigar,
26 int32_t &new0BasedPosition)
27{
28 newCigar.clear();
29 Cigar* cigar = record.getCigarInfo();
30 if(cigar == NULL)
31 {
32 // Failed to get the cigar.
33 ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
34 return(NO_CLIP);
35 }
36
37 // No cigar or position in the record, so return no clip.
38 if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
39 {
40 return(NO_CLIP);
41 }
42
43 // Check to see if the reference position occurs before the record starts,
44 // if it does, do no clipping.
45 if(refPosition0Based < record.get0BasedPosition())
46 {
47 // Not within this read, so nothing to clip.
48 newCigar.Set(record.getCigar());
49 return(NO_CLIP);
50 }
51
52 // The position falls after the read starts, so loop through until the
53 // position or the end of the read is found.
54 int32_t readClipPosition = 0;
55 bool clipWritten = false;
56 new0BasedPosition = record.get0BasedPosition();
57 for(int i = 0; i < cigar->size(); i++)
58 {
59 const Cigar::CigarOperator* op = &(cigar->getOperator(i));
60
61 if(clipWritten)
62 {
63 // Clip point has been found, so just add everything.
64 newCigar += *op;
65 // Go to the next operation.
66 continue;
67 }
68
69 // The clip point has not yet been found, so check to see if we found
70 // it now.
71
72 // Not a clip, check to see if the operation is found in the
73 // reference.
75 {
76 // match, mismatch, deletion, skip
77
78 // increment the current reference position to just past this
79 // operation.
80 new0BasedPosition += op->count;
81
82 // Check to see if this is also in the query, because otherwise
83 // the operation is still being consumed.
84 if(Cigar::foundInQuery(*op))
85 {
86 // Also in the query, determine if the entire thing should
87 // be clipped or just part of it.
88
89 uint32_t numKeep = 0;
90 // Check to see if we have hit our clip position.
91 if(refPosition0Based < new0BasedPosition)
92 {
93 // The specified clip position is in this cigar operation.
94 numKeep = new0BasedPosition - refPosition0Based - 1;
95
96 if(numKeep > op->count)
97 {
98 // Keep the entire read. This happens because
99 // we keep reading until the first match/mismatch
100 // after the clip.
101 numKeep = op->count;
102 }
103 }
104
105 // Add the part of this operation that is being clipped
106 // to the clip count.
107 readClipPosition += (op->count - numKeep);
108
109 // Only write the clip if we found a match/mismatch
110 // to write. Otherwise we will keep accumulating clips
111 // for the case of insertions.
112 if(numKeep > 0)
113 {
114 new0BasedPosition -= numKeep;
115
116 newCigar.Add(Cigar::softClip, readClipPosition);
117
118 // Add the clipped part of this cigar to the clip
119 // position.
120 newCigar.Add(op->operation, numKeep);
121
122 // Found a match after the clip point, so stop
123 // consuming cigar operations.
124 clipWritten = true;
125 continue;
126 }
127 }
128 }
129 else
130 {
131 // Only add hard clips. The softclips will be added in
132 // when the total number is found.
133 if(op->operation == Cigar::hardClip)
134 {
135 // Check if this is the first operation, if so, just write it.
136 if(i == 0)
137 {
138 newCigar += *op;
139 }
140 // Check if it is the last operation (otherwise skip it).
141 else if(i == (cigar->size() - 1))
142 {
143 // Check whether or not the clip was ever written, and if
144 // not, write it.
145 if(clipWritten == false)
146 {
147 newCigar.Add(Cigar::softClip, readClipPosition);
148 // Since no match/mismatch was ever found, set
149 // the new ref position to the original one.
150 new0BasedPosition = record.get0BasedPosition();
151 clipWritten = true;
152 }
153 // Add the hard clip.
154 newCigar += *op;
155 }
156 }
157 // Not yet to the clip position, so do not add this operation.
158 if(Cigar::foundInQuery(*op))
159 {
160 // Found in the query, so update the read clip position.
161 readClipPosition += op->count;
162 }
163 }
164 } // End loop through cigar.
165
166
167 // Check whether or not the clip was ever written, and if
168 // not, write it.
169 if(clipWritten == false)
170 {
171 newCigar.Add(Cigar::softClip, readClipPosition);
172 // Since no match/mismatch was ever found, set
173 // the new ref position to the original one.
174 new0BasedPosition = record.get0BasedPosition();
175 }
176
177 // Subtract 1 since readClipPosition atually contains the first 0based
178 // position that is not clipped.
179 return(readClipPosition - 1);
180}
181
182
183// Soft Clip from the end of the read at the specified reference position.
185 int32_t refPosition0Based,
186 CigarRoller& newCigar)
187{
188 newCigar.clear();
189 Cigar* cigar = record.getCigarInfo();
190 if(cigar == NULL)
191 {
192 // Failed to get the cigar.
193 ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
194 return(NO_CLIP);
195 }
196
197 // No cigar or position in the record, so return no clip.
198 if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
199 {
200 return(NO_CLIP);
201 }
202
203 // Check to see if the reference position occurs after the record ends,
204 // if so, do no clipping.
205 if(refPosition0Based > record.get0BasedAlignmentEnd())
206 {
207 // Not within this read, so nothing to clip.
208 newCigar.Set(record.getCigar());
209 return(NO_CLIP);
210 }
211
212 // The position falls before the read ends, so loop through until the
213 // position is found.
214 int32_t currentRefPosition = record.get0BasedPosition();
215 int32_t readClipPosition = 0;
216 for(int i = 0; i < cigar->size(); i++)
217 {
218 const Cigar::CigarOperator* op = &(cigar->getOperator(i));
219
220 // If the operation is found in the reference, increase the
221 // reference position.
223 {
224 // match, mismatch, deletion, skip
225 // increment the current reference position to just past
226 // this operation.
227 currentRefPosition += op->count;
228 }
229
230 // Check to see if we have hit our clip position.
231 if(refPosition0Based < currentRefPosition)
232 {
233 // If this read is also in the query (match/mismatch),
234 // write the partial op to the new cigar.
235 int32_t numKeep = 0;
236 if(Cigar::foundInQuery(*op))
237 {
238 numKeep = op->count - (currentRefPosition - refPosition0Based);
239 if(numKeep > 0)
240 {
241 newCigar.Add(op->operation, numKeep);
242 readClipPosition += numKeep;
243 }
244 }
245 else if(Cigar::isClip(*op))
246 {
247 // This is a hard clip, so write it.
248 newCigar.Add(op->operation, op->count);
249 }
250 else
251 {
252
253 // Not found in the query (skip/deletion),
254 // so don't write any of the operation.
255 }
256 // Found the clip point, so break.
257 break;
258 }
259 else if(refPosition0Based == currentRefPosition)
260 {
261 newCigar += *op;
262 if(Cigar::foundInQuery(*op))
263 {
264 readClipPosition += op->count;
265 }
266 }
267 else
268 {
269 // Not yet to the clip position, so add this operation/size to
270 // the new cigar.
271 newCigar += *op;
272 if(Cigar::foundInQuery(*op))
273 {
274 // Found in the query, so update the read clip position.
275 readClipPosition += op->count;
276 }
277 }
278 } // End loop through cigar.
279
280 // Before adding the softclip, read from the end of the cigar checking to
281 // see if the operations are in the query, removing operations that are
282 // not (pad/delete/skip) until a hardclip or an operation in the query is
283 // found. We do not want a pad/delete/skip right before a softclip.
284 for(int j = newCigar.size() - 1; j >= 0; j--)
285 {
286 const Cigar::CigarOperator* op = &(newCigar.getOperator(j));
287 if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op))
288 {
289 // pad/delete/skip
290 newCigar.Remove(j);
291 }
292 else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op))
293 {
294 // Soft clip, so increment the clip position for the return value.
295 // Remove the softclip since the readClipPosition is used to
296 // calculate teh size of the soft clip added.
297 readClipPosition -= op->count;
298 newCigar.Remove(j);
299 }
300 else
301 {
302 // Found a cigar operation that should not be deleted, so stop deleting.
303 break;
304 }
305 }
306
307 // Determine the number of soft clips.
308 int32_t numSoftClips = record.getReadLength() - readClipPosition;
309
310 if(numSoftClips < 0)
311 {
312 // Error, read len is less than the clip position
313 ErrorHandler::handleError("Soft clipping error: readLength is less than the clip position");
314 return(NO_CLIP);
315 }
316
317 // NOTE that if the previous operation is a softclip, the CigarRoller logic
318 // will merge this with that one.
319 newCigar.Add(Cigar::softClip, numSoftClips);
320
321 // Check if an ending hard clip needs to be added.
322 if(cigar->size() != 0)
323 {
324 const Cigar::CigarOperator* lastOp =
325 &(cigar->getOperator(cigar->size() - 1));
326 if(lastOp->operation == Cigar::hardClip)
327 {
328 newCigar += *lastOp;
329 }
330 }
331
332 return(readClipPosition);
333}
static int32_t softClipBeginByRefPos(SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar, int32_t &new0BasedPosition)
Soft clip the cigar from the beginning of the read at the specified reference position.
static int32_t softClipEndByRefPos(SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar)
Soft clip the cigar from the back of the read at the specified reference position.
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition CigarRoller.h:67
bool Remove(int index)
Remove the operation at the specified index.
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
void clear()
Clear this object so that it has no Cigar Operations.
void Set(const char *cigarString)
Sets this object to the specified cigarString.
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition Cigar.h:84
int size() const
Return the number of cigar operations.
Definition Cigar.h:364
static bool foundInReference(Operation op)
Return true if the specified operation is found in the reference sequence, false if not.
Definition Cigar.h:177
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
Definition Cigar.h:219
@ hardClip
Hard clip on the read (clipped sequence not present in the query sequence or reference)....
Definition Cigar.h:95
@ softClip
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)....
Definition Cigar.h:94
const CigarOperator & getOperator(int i) const
Return the Cigar Operation at the specified index (starting at 0).
Definition Cigar.h:354
static bool isClip(Operation op)
Return true if the specified operation is a clipping operation, false if not.
Definition Cigar.h:261
static void handleError(const char *message, HandlingType handlingType=EXCEPTION)
Handle an error based on the error handling type.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition SamRecord.h:52
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
int32_t getReadLength()
Get the length of the read.
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getCigar()
Returns the SAM formatted CIGAR string.