libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spectralalignment.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specglob/spectraalignment.cpp
3 * \date 08/11/2023
4 * \author Olivier Langella
5 * \brief petide to spectrum alignment
6 *
7 * C++ implementation of the SpecGlob algorithm described in :
8 * 1. Prunier, G. et al. Fast alignment of mass spectra in large proteomics
9 * datasets, capturing dissimilarities arising from multiple complex
10 * modifications of peptides. BMC Bioinformatics 24, 421 (2023).
11 *
12 * HAL Id : hal-04296170 , version 1
13 * Mot de passe : hxo20cl
14 * DOI : 10.1186/s12859-023-05555-y
15 */
16
17
18/*
19 * SpecGlobTool, Spectra to peptide alignment tool
20 * Copyright (C) 2023 Olivier Langella
21 * <olivier.langella@universite-paris-saclay.fr>
22 *
23 * This program is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License as published by
25 * the Free Software Foundation, either version 3 of the License, or
26 * (at your option) any later version.
27 *
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with this program. If not, see <http://www.gnu.org/licenses/>.
35 *
36 */
37
38#include "spectralalignment.h"
39#include "../../pappsoexception.h"
40#include "../../utils.h"
41
42
43namespace pappso
44{
45namespace specglob
46{
48 pappso::PrecisionPtr precision_ptr)
49{
50 m_precisionPtr = precision_ptr;
51 m_scoreValues = score_values;
52}
53
57
58
64
70void
72 ExperimentalSpectrumCsp experimental_spectrum)
73{
74 mcsp_experimentalSpectrum = experimental_spectrum;
75 mcsp_peptideSpectrum = peptide_spectrum;
76 m_maxScore = 0;
77 m_matrix.resize(mcsp_peptideSpectrum.get()->size(),
78 mcsp_experimentalSpectrum.get()->size());
79
80 // set the value of difference between Precursor mass and theoretical mass of
81 // the proposed peptide
83 mcsp_experimentalSpectrum.get()->getPrecursorMass() -
84 mcsp_peptideSpectrum.get()->getPeptideSp().get()->getMass();
85
87
88 m_itPosMax = m_matrix.end2();
89 // Make the alignment in filling the matrices
90
91 auto last_theoretical_peptide = std::prev(m_matrix.end1());
92 for(auto itmi = ++m_matrix.begin1(); itmi != m_matrix.end1(); ++itmi)
93 {
94 for(auto itmj = ++itmi.begin(); itmj != itmi.end(); itmj++)
95 {
96 // qDebug() << "i=" << itmj.index1() << " j=" << itmj.index2();
100 // keep the Max Score during matrixes filling
101 // the condition is to keep the max score in the last line (last
102 // theoretical amino acid) This is for try to keep all amino acids of
103 // the initial Theoretical sequence (but can modify alignment choice)
104 if(itmi == last_theoretical_peptide && (*itmj).score > m_maxScore)
105 {
106 m_maxScore = (*itmj).score;
107 m_itPosMax = itmj;
108 }
109 }
110 }
111}
112
113
114void
116 const boost::numeric::ublas::matrix<SpectralAlignmentDataPoint>::iterator2
117 &it_pos,
118 const PeptideSpectrum &peptide_spectrum,
119 const ExperimentalSpectrum &experimental_spectrum)
120{
121 // long theoIndiceI = it_pos.index1();
122 // long expeIndiceJ = it_pos.index2();
123 // in first time, we set score corresponding to type of peak (initial/mirror
124 // or both) we set initial score to peak mirror or initial
127 // experimental_spectrum.at(expeIndiceJ).type;
128
131 int reAlignScoreNOToAdd =
133
134 switch(expePeakType)
135 {
137 // this is a symmetric peak
140 reAlignScoreNOToAdd =
142 if(m_BETTER_END_RA &&
143 it_pos.index1() == peptide_spectrum.getPeptideSp().get()->size())
144 {
145 reAlignScoreToAdd =
148 }
149 break;
151 // this is a native peak with a symmetric corresponding
154 reAlignScoreNOToAdd =
156 if(m_BETTER_END_RA &&
157 it_pos.index1() == peptide_spectrum.getPeptideSp().get()->size())
158 {
159 reAlignScoreToAdd =
162 }
163 break;
164 default:
165 // this is a native or synthetic peak
167 reAlignScoreToAdd =
169 reAlignScoreNOToAdd =
171 if(m_BETTER_END_RA &&
172 it_pos.index1() == peptide_spectrum.getPeptideSp().get()->size())
173 {
174 reAlignScoreToAdd =
177 }
178 break;
179 }
180 SpectralAlignmentDataPoint &matrix_data_point_i_j = *it_pos;
181
182 /*
183 long k = getkValue(it_pos,
184 peptide_spectrum.at(it_pos.index1()).diff_mz,
185 experimental_spectrum);
186
187 if(it_pos.index1() == 15)
188 qDebug() << "position1" << it_pos.index1() << "position2" << it_pos.index1()
189 << " k=" << k << " " << peptide_spectrum.at(it_pos.index1()).diff_mz;
190 */
191 pappso::MzRange aaMassRange(peptide_spectrum.at(it_pos.index1()).diff_mz,
193 qDebug();
194 // long k = -1;
195 auto itKpeak =
196 experimental_spectrum.reverseFindDiffMz(it_pos.index2(), aaMassRange);
197 // if(itKpeak != experimental_spectrum.rend())
198 // k = itKpeak->indice;
199
200 if(itKpeak == experimental_spectrum.rend())
201 {
202 matrix_data_point_i_j.score =
203 m_matrix(it_pos.index1() - 1, it_pos.index2()).score +
205 matrix_data_point_i_j.origin_column_indices = it_pos.index2();
206 matrix_data_point_i_j.alignment_type = SpectralAlignmentType::nonAlign;
207 }
208 else
209 {
210 SpectralAlignmentDataPoint &matrix_data_point_previ_k =
211 m_matrix(it_pos.index1() - 1, itKpeak->indice);
212 int scoreAlignK = matrix_data_point_previ_k.score + alignScoreToAdd;
213 // if it come from non align, we must verify that there is no offset from
214 // previous align
215 if(matrix_data_point_previ_k.alignment_type ==
217 {
218 int l;
219 for(l = it_pos.index1() - 1; l > 0; l--)
220 {
221 if(m_matrix(l, itKpeak->indice).origin_column_indices !=
222 itKpeak->indice)
223 break;
224 }
225 if(std::abs(m_matrix(l, itKpeak->indice).mass_difference -
226 (*it_pos).mass_difference) > m_precisionPtr->getNominal())
227 scoreAlignK = matrix_data_point_previ_k.score + reAlignScoreToAdd;
228 }
229
230 // int[0] = the j value m and int[1] = the score value
232 it_pos, itKpeak->indice, reAlignScoreToAdd, reAlignScoreNOToAdd);
233
234 // For debug to see value for any match
235 // System.out.println("score k = " + scoreAlignK + " - score m = " +
236 // reAlignBestScore[1] + " - origin m = "
237 // + reAlignBestScore[0]);
238
239 if(scoreAlignK >= reAlignBestScore.score)
240 {
241 // setMatricesData(theoIndiceI, expeIndiceJ, scoreAlignK, k, 2);
242 matrix_data_point_i_j.score = scoreAlignK;
243 matrix_data_point_i_j.origin_column_indices = itKpeak->indice;
244 matrix_data_point_i_j.alignment_type = SpectralAlignmentType::align;
245 }
246 else
247 {
248 /*setMatricesData(theoIndiceI,
249 expeIndiceJ,
250 reAlignBestScore[1],
251 reAlignBestScore[0],
252 reAlignBestScore[2]);*/
253
254 matrix_data_point_i_j = reAlignBestScore;
255 }
256 }
257}
258
259
262 const boost::numeric::ublas::matrix<SpectralAlignmentDataPoint>::iterator2
263 &it_pos,
264 std::size_t expeIndicesK,
265 int reAlignScore,
266 int alignScoreToAdd)
267{
268
269 std::size_t previous_peptide_row = it_pos.index1() - 1;
270 SpectralAlignmentDataPoint return_data_point = *it_pos;
271 int bestScore = -10000;
272 int origin = -1;
273
274 // find the best score column indice on previous row, walking back from
275 // expeIndicesK m is a j value between 0 and k where a realign can be do if we
276 // accept mass offset
277 for(long m = expeIndicesK; m > -1; m--)
278 {
279 // the >= here is for keep the highest S value if there is multiple S with
280 // the same best score
281 if(m_matrix(previous_peptide_row, m).score > bestScore)
282 {
283 bestScore = m_matrix(previous_peptide_row, m).score;
284 origin = m;
285 }
286 }
287
288 return_data_point.origin_column_indices = origin;
289 return_data_point.score = bestScore + reAlignScore;
290 return_data_point.alignment_type = SpectralAlignmentType::reAlign; // [2] = 1;
291
292 if(origin == -1)
293 return return_data_point;
294
295 // We check for the last alignment if we have chain of Non Alignment to
296 // compare the last mass offset found
297 std::size_t lastAlignIndiceI = previous_peptide_row;
298 for(long l = previous_peptide_row; l > 0; l--)
299 {
300 if(m_matrix(l, origin).origin_column_indices != 0)
301 {
302 lastAlignIndiceI = l;
303 break;
304 }
305 }
306
307 // if the difference of mass offset between actual state and last align (or
308 // realign) is null, we consider that to an alignment
309 if((lastAlignIndiceI != (previous_peptide_row)) && (it_pos.index1() > 1) &&
310 (std::abs(m_matrix(previous_peptide_row, expeIndicesK).mass_difference -
311 m_matrix(lastAlignIndiceI, origin).mass_difference) <
313 {
314 return_data_point.score = bestScore + alignScoreToAdd;
315 return_data_point.alignment_type =
317 }
318
319 // we return the origin (value of m) and the associate calculated score and
320 // the type of Alignment
321 return return_data_point;
322}
323
324
325void
327 const PeptideSpectrum &peptide_spectrum,
328 const ExperimentalSpectrum &experimental_spectrum)
329{
330
331 auto it_peptide = peptide_spectrum.begin();
332 auto it_spectrum = experimental_spectrum.begin();
333 for(auto itr1 = m_matrix.begin1(); itr1 != m_matrix.end1();
334 ++itr1, it_peptide++)
335 {
336 it_spectrum = experimental_spectrum.begin();
337 for(auto itr2 = itr1.begin(); itr2 != itr1.end(); itr2++, it_spectrum++)
338 {
339 (*itr2).alignment_type = SpectralAlignmentType::nonAlign;
340 if(it_peptide == peptide_spectrum.begin())
341 (*itr2).alignment_type = SpectralAlignmentType::nonAlign;
342 (*itr2).origin_column_indices = 0;
343 (*itr2).score = 0;
344 (*itr2).mass_difference = it_spectrum->symetric_mz - it_peptide->mz;
345 // qDebug() << " i=" << itr2.index1() << " j=" << itr2.index2()
346 // << " mass_difference=" << (*itr2).mass_difference;
347 }
348 }
349}
350
351const matrix<SpectralAlignmentDataPoint> &
353{
354 return m_matrix;
355}
356
357std::vector<int>
358SpectralAlignment::getScoreRow(std::size_t row_indice) const
359{
360 std::vector<int> score;
361
362 auto itr1 = m_matrix.begin1() + row_indice;
363 for(auto itr2 = itr1.begin(); itr2 != itr1.end(); itr2++)
364 {
365 score.push_back((*itr2).score);
366 }
367
368 return score;
369}
370
371int
373{
374 return m_maxScore;
375}
376
377boost::numeric::ublas::matrix<SpectralAlignmentDataPoint>::iterator2
382
385 const boost::numeric::ublas::matrix<SpectralAlignmentDataPoint>::iterator2
386 &itpos) const
387{
388 return mcsp_experimentalSpectrum.get()->at(itpos.index2());
389}
390
391double
396
397
398QString
400{
401 if(m_maxScore < 1)
402 {
403 throw pappso::PappsoException("no backtrack");
404 }
405 QString pepModified = "";
406 QString theoSequence =
407 mcsp_peptideSpectrum.get()->getPeptideSp().get()->getSequence();
408 int actualI = m_itPosMax.index1();
409 int prevI;
410 int actualJ = m_itPosMax.index2();
411 int prevJ;
412 double actualDelMass;
413 double prevDelMass;
414 int modifCount = 0;
415 double totExplainMass = 0.0;
416
417 // System.out.println(theoSequence);
418 // System.out.println("actualI is : " + actualI + " and actualJ is : " +
419 // actualJ);
420
421 while(actualI > 0)
422 {
423 // System.out.println(actualI);
424 // define the actual mass delta to the actual i and j
425 QString tempPepSeq = "";
426 QString tempAA = "";
427 QString aminoAcid = "";
428 actualDelMass = m_matrix(actualI, actualJ).mass_difference;
429 prevI = actualI - 1;
430 prevJ = m_matrix(actualI, actualJ).origin_column_indices;
431 prevDelMass = m_matrix(prevI, prevJ).mass_difference;
432
433 // We checking first if last Amino acid are not aligned
434
435 if(m_matrix(actualI, actualJ).alignment_type ==
437 {
438 // System.out.println("I'm NON ALIGN");
439 tempPepSeq = QString("[%1]").arg(theoSequence.at(actualI - 1));
440
441 while(m_matrix(prevI, prevJ).alignment_type ==
443 prevI > 0)
444 {
445 tempPepSeq =
446 QString("[%1]").arg(theoSequence.at(prevI - 1)) + tempPepSeq;
447 prevI--;
448 }
449 // modifCount++;
450 actualI = prevI;
451 actualJ = prevJ;
452
453 pepModified = tempPepSeq + pepModified;
454
455 qDebug() << "a1 pepModified=" << pepModified;
456 // if not, we are in the case where there is an alignment or a
457 // realignment if there is Align or a re-align, put the letter of
458 // Amino Acid and check what we get before to see if we have a mass
459 // change
460 }
461 else
462 {
463 // we put the actual amino acid because he is found, and in function
464 // of alignment type, we can have a mass offset
465 tempPepSeq = QString("%1").arg(theoSequence.at(actualI - 1));
466 aminoAcid = tempPepSeq;
467
468 // we check if there is a deletion before the founded aminoAcid
469 if(prevI > 0 && m_matrix(prevI, prevJ).alignment_type ==
471 {
472
473 // modifCount++;
474
475 // we continue to check if this is a deletion bloc to keep the
476 // mass difference due to the deletion of the block
477 while(prevI > 0 && m_matrix(prevI, prevJ).alignment_type ==
479 {
480
481 tempPepSeq = QString("[%1]").arg(theoSequence.at(prevI - 1)) +
482 tempPepSeq;
483 tempAA =
484 QString("%1").arg(theoSequence.at(prevI - 1)) + tempAA;
485
486 prevI--;
487 prevDelMass = m_matrix(prevI, prevJ).mass_difference;
488 if(prevI == 0)
489 {
490 prevDelMass = 0.0;
491 }
492 }
493
494 // check the mass delta to avoid showing a null mass delta
495 if(std::abs(actualDelMass - prevDelMass) >
497 {
498
499 tempPepSeq = tempPepSeq.mid(0, tempPepSeq.length() - 1);
500
501 tempPepSeq +=
502 QString("[%1]").arg(actualDelMass - prevDelMass) +
503 aminoAcid;
504 totExplainMass += (actualDelMass - prevDelMass);
505 modifCount++;
506 qDebug() << "a2a1 tempPepSeq=" << tempPepSeq;
507 }
508
509 // if there this is just a re-align, we need to indicate the mass
510 // offset
511 }
512 else if(m_matrix(actualI, actualJ).alignment_type ==
514 {
515
516 tempPepSeq =
517 QString("[%1]").arg(actualDelMass - prevDelMass) + tempPepSeq;
518 modifCount++;
519 totExplainMass += (actualDelMass - prevDelMass);
520
521 // the fact when you align the first amino acid, but there is a
522 // leak of Amino acid in OMS solution before
523 }
524 else if(actualI == 1 &&
525 m_matrix(actualI, actualJ).alignment_type ==
527 (std::abs(actualDelMass) > m_precisionPtr->getNominal()))
528 {
529 tempPepSeq = QString("[%1]").arg(actualDelMass) + tempPepSeq;
530 totExplainMass += actualDelMass;
531 modifCount++;
532 }
533
534 pepModified = tempPepSeq + pepModified;
535 qDebug() << "a2 pepModified=" << pepModified;
536 // System.out.println(pepModified);
537
538 actualI = prevI;
539 actualJ = prevJ;
540 }
541 }
542
543 // setModificationNumber(modifCount);
544
545 return QString("%1_[%2]")
546 .arg(pepModified)
547 .arg(m_precursorMassDelta - totExplainMass);
548}
549
550
553{
554 if(m_maxScore < 1)
555 {
557 QObject::tr("Spectral alignment failed m_maxScore == %1")
558 .arg(m_maxScore));
559 }
560 PeptideModel sg_peptide_model(
561 mcsp_experimentalSpectrum.get()->getQualifiedMassSpectrum(),
562 *(mcsp_peptideSpectrum.get()->getPeptideSp().get()));
563 int actualI = m_itPosMax.index1();
564 int prevI;
565 int actualJ = m_itPosMax.index2();
566 int prevJ;
567 double actualDelMass;
568 double prevDelMass;
569
570 // System.out.println(theoSequence);
571 // System.out.println("actualI is : " + actualI + " and actualJ is : " +
572 // actualJ);
573
574 while(actualI > 0)
575 {
576 // System.out.println(actualI);
577 // define the actual mass delta to the actual i and j
578 QString aminoAcid = "";
579 actualDelMass = m_matrix(actualI, actualJ).mass_difference;
580 prevI = actualI - 1;
581 prevJ = m_matrix(actualI, actualJ).origin_column_indices;
582 prevDelMass = m_matrix(prevI, prevJ).mass_difference;
583
584 // We checking first if last Amino acid are not aligned
585
586 if(m_matrix(actualI, actualJ).alignment_type ==
588 {
589 // System.out.println("I'm NON ALIGN");
590
591 while(m_matrix(prevI, prevJ).alignment_type ==
593 prevI > 0)
594 {
595 if(prevI > 0)
596 sg_peptide_model[prevI - 1].bracket = true;
597 prevI--;
598 }
599 // modifCount++;
600 actualI = prevI;
601 actualJ = prevJ;
602
603 // if not, we are in the case where there is an alignment or a
604 // realignment if there is Align or a re-align, put the letter of
605 // Amino Acid and check what we get before to see if we have a mass
606 // change
607 }
608 else
609 {
610 // we put the actual amino acid because he is found, and in function
611 // of alignment type, we can have a mass offset
612
613 // we check if there is a deletion before the founded aminoAcid
614 if(prevI > 0 && m_matrix(prevI, prevJ).alignment_type ==
616 {
617
618 // modifCount++;
619
620 // we continue to check if this is a deletion bloc to keep the
621 // mass difference due to the deletion of the block
622 while(prevI > 0 && m_matrix(prevI, prevJ).alignment_type ==
624 {
625
626
627 if(prevI > 0)
628 sg_peptide_model[prevI - 1].bracket = true;
629 prevI--;
630 prevDelMass = m_matrix(prevI, prevJ).mass_difference;
631 if(prevI == 0)
632 {
633 prevDelMass = 0.0;
634 }
635 }
636
637 // check the mass delta to avoid showing a null mass delta
638 if(std::abs(actualDelMass - prevDelMass) >
640 {
641
642 int mass_i = actualI - 1;
643 if(mass_i == 0)
644 {
645 sg_peptide_model.setBeginMassDelta(actualDelMass -
646 prevDelMass);
647 }
648 else
649 {
650 sg_peptide_model[mass_i - 1].mass_difference =
651 actualDelMass - prevDelMass;
652 }
653 }
654
655 // if there this is just a re-align, we need to indicate the mass
656 // offset
657 }
658 else if(m_matrix(actualI, actualJ).alignment_type ==
660 {
661
662 int mass_i = actualI - 1;
663 if(mass_i == 0)
664 {
665 sg_peptide_model.setBeginMassDelta(actualDelMass -
666 prevDelMass);
667 }
668 else
669 {
670 sg_peptide_model[mass_i - 1].mass_difference =
671 actualDelMass - prevDelMass;
672 }
673 // the fact when you align the first amino acid, but there is a
674 // leak of Amino acid in OMS solution before
675 }
676 else if(actualI == 1 &&
677 m_matrix(actualI, actualJ).alignment_type ==
679 (std::abs(actualDelMass) > m_precisionPtr->getNominal()))
680 {
681 sg_peptide_model.setBeginMassDelta(actualDelMass);
682 }
683
684 actualI = prevI;
685 actualJ = prevJ;
686 }
687 }
688
689 // setModificationNumber(modifCount);
690
691 // return QString("%1_[%2]")
692 // .arg(pepModified)
693 // .arg(m_precursorMassDelta - totExplainMass);
694 return sg_peptide_model;
695}
696} // namespace specglob
697} // namespace pappso
virtual pappso_double getNominal() const final
Definition precision.cpp:65
std::vector< ExperimentalSpectrumDataPoint >::const_reverse_iterator reverseFindDiffMz(std::size_t start_position, const pappso::MzRange &targeted_mass_range) const
find the peak for wich mass difference from rbegin corresponds to aaTheoMass Find if a peak back in t...
pappso::PeptideSp getPeptideSp() const
int get(ScoreValueType type)
SpectralAlignment(ScoreValues score_values, pappso::PrecisionPtr precision_ptr)
SpectralAlignmentDataPoint getBestRealignScore(const boost::numeric::ublas::matrix< SpectralAlignmentDataPoint >::iterator2 &it_pos, std::size_t expeIndicesK, int reAlignScore, int alignScoreToAdd)
boost::numeric::ublas::matrix< SpectralAlignmentDataPoint >::iterator2 getMaxPosIterator() const
std::vector< int > getScoreRow(std::size_t row_indice) const
void fillMassDelta(const PeptideSpectrum &peptide_spectrum, const ExperimentalSpectrum &experimental_spectrum)
const ExperimentalSpectrumDataPoint & getExperimentalSpectrumDataPoint(const boost::numeric::ublas::matrix< SpectralAlignmentDataPoint >::iterator2 &itpos) const
ExperimentalSpectrumCsp mcsp_experimentalSpectrum
PeptideSpectraCsp getPeptideSpectraCsp() const
ExperimentalSpectrumCsp getExperimentalSpectrumCsp() const
const matrix< SpectralAlignmentDataPoint > & getMatrix() const
boost::numeric::ublas::matrix< SpectralAlignmentDataPoint >::iterator2 m_itPosMax
matrix< SpectralAlignmentDataPoint > m_matrix
void fillMatricesWithScores(const boost::numeric::ublas::matrix< SpectralAlignmentDataPoint >::iterator2 &it_pos, const PeptideSpectrum &peptide_spectrum, const ExperimentalSpectrum &experimental_spectrum)
void align(PeptideSpectraCsp peptide_spectrum, ExperimentalSpectrumCsp experimental_spectrum)
std::shared_ptr< const PeptideSpectrum > PeptideSpectraCsp
@ scoreAlignNative
Score for good alignment native (int)
@ scoreReAlignSymNO
Score for re-alignment without offset symetric (int)
@ scoreNonAlign
Score for non alignment (int)
@ scoreAlignBoth
Score for good alignment both (int)
@ scoreReAlignBoth
Score for re-alignment both (int)
@ scoreReAlignBothNO
Score for re-alignment without offset both (int)
@ scoreReAlignSym
Score for re-alignment symetric (int)
@ scoreAlignSym
Score for good alignment symetric (int)
@ scoreReAlignNativeNO
Score for re-alignment without offset native (int)
std::shared_ptr< const ExperimentalSpectrum > ExperimentalSpectrumCsp
@ nonAlign
the type of alignment to put in origin matrix NON Alignment (0 - NA)
ExperimentalSpectrumDataPointType
Definition types.h:78
@ symetric
new peak : computed symetric mass from a corresponding native peak
@ both
both, the ion and the complement exists in the original spectrum
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39