libpappsomspp
Library for mass spectrometry
msrundatasettree.cpp
Go to the documentation of this file.
1// GPL 3+
2// Filippo Rusconi
3
4#include <map>
5#include <limits>
6#include <iostream>
7#include <iomanip>
8
9#include "msrundatasettree.h"
10
11#include "../pappsoexception.h"
12#include "../exception/exceptionnotpossible.h"
13
14
15namespace pappso
16{
17
18
20 : mcsp_msRunId(ms_run_id_csp)
21{
22}
23
24
26{
27 // qDebug();
28
29 for(auto &&node : m_rootNodes)
30 {
31 // Each node is responsible for freeing its children nodes!
32
33 delete node;
34 }
35
36 m_rootNodes.clear();
37
38 // Beware not to delete the node member of the map, as we have already
39 // destroyed them above!
40 //
41 // for(auto iterator = m_indexNodeMap.begin(); iterator !=
42 // m_indexNodeMap.end();
43 //++iterator)
44 //{
45 // delete(iterator->second);
46 //}
47
48 // qDebug();
49}
50
51
54 QualifiedMassSpectrumCstSPtr mass_spectrum_csp)
55{
56 // qDebug();
57
58 if(mass_spectrum_csp == nullptr)
59 qFatal("Cannot be nullptr");
60
61 if(mass_spectrum_csp.get() == nullptr)
62 qFatal("Cannot be nullptr");
63
64 // We need to get the precursor spectrum index, in case this spectrum is a
65 // fragmentation index.
66
67 MsRunDataSetTreeNode *new_node_p = nullptr;
68
69 std::size_t precursor_spectrum_index =
70 mass_spectrum_csp->getPrecursorSpectrumIndex();
71
72 // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
73
74 if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
75 {
76 // This spectrum is a full scan spectrum, not a fragmentation spectrum.
77 // Create a new node with no parent and push it back to the root nodes
78 // vector.
79
80 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
81
82 // Since there is no parent in this overload, it is assumed that the node
83 // to be populated with the new node is the root node.
84
85 m_rootNodes.push_back(new_node_p);
86
87 // true: with_data
88 // qDebug().noquote() << "Pushed back to the roots node vector node:"
89 //<< new_node_p->toString(true);
90 }
91 else
92 {
93 // This spectrum is a fragmentation spectrum.
94
95 // Sanity check
96
97 if(mass_spectrum_csp->getMsLevel() <= 1)
98 {
100 "msrundatasettree.cpp -- ERROR the MS level needs to be > 1 in a "
101 "fragmentation spectrum.");
102 }
103
104 // Get the node that contains the precursor ion mass spectrum.
105 MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
106
107 if(parent_node_p == nullptr)
108 {
110 "msrundatasettree.cpp -- ERROR could not find "
111 "a tree node matching the index.");
112 }
113
114 // qDebug() << "Fragmentation spectrum"
115 //<< "Found parent node:" << parent_node_p
116 //<< "for precursor index:" << precursor_spectrum_index;
117
118 // At this point, create a new node with the right parent.
119
120 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
121
122 parent_node_p->m_children.push_back(new_node_p);
123 }
124
125 // And now document that addition in the node index map.
126 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
127 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
128
129 // We also want to document the new node relating to the
130 // retention time.
131
133 mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
134
135 // Likewise for the drift time.
136
138 mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
139
141
142 // qDebug() << "New index/node map:"
143 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
144 //<< new_node_p;
145
146 return new_node_p;
147}
148
149
150const std::map<std::size_t, MsRunDataSetTreeNode *> &
152{
153 return m_indexNodeMap;
154}
155
156
157std::size_t
159{
160 // We have a node and we want to get the matching mass spectrum index.
161
162 if(node == nullptr)
163 throw("Cannot be that the node pointer is nullptr");
164
165 std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
166 std::find_if(
167 m_indexNodeMap.begin(),
168 m_indexNodeMap.end(),
169 [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
170 return pair.second == node;
171 });
172
173 if(iterator != m_indexNodeMap.end())
174 return iterator->first;
175
176 return std::numeric_limits<std::size_t>::max();
177}
178
179
180std::size_t
182 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp) const
183{
184 MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
185
186 return massSpectrumIndex(node_p);
187}
188
189
190const std::vector<MsRunDataSetTreeNode *> &
192{
193 return m_rootNodes;
194}
195
196
197void
199{
200 // qDebug() << "Going to call node->accept(visitor) for each root node.";
201
202 for(auto &&node : m_rootNodes)
203 {
204 // qDebug() << "Calling accept for root node:" << node;
205
206 if(visitor.shouldStop())
207 break;
208
209 node->accept(visitor);
210 }
211}
212
213
214void
217 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
218 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
219{
220 // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
221 // is:"
222 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
223
224 using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
225
226 Iterator iter = nodes_begin_iterator;
227
228 // Inform the visitor of the number of nodes to work on.
229
230 std::size_t node_count =
231 std::distance(nodes_begin_iterator, nodes_end_iterator);
232
233 visitor.setNodesToProcessCount(node_count);
234
235 while(iter != nodes_end_iterator)
236 {
237 // qDebug() << "Visitor:" << &visitor
238 //<< "The distance is between iterators is:"
239 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
240
241 // qDebug() << "Node visited:" << (*iter)->toString();
242
243 if(visitor.shouldStop())
244 break;
245
246 (*iter)->accept(visitor);
247 ++iter;
248 }
249}
250
251
254{
255 // qDebug();
256
257 for(auto &node : m_rootNodes)
258 {
259 // qDebug() << "In one node of the root nodes.";
260
261 MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
262 if(iterNode != nullptr)
263 return iterNode;
264 }
265
266 return nullptr;
267}
268
269
271MsRunDataSetTree::findNode(std::size_t spectrum_index) const
272{
273 // qDebug();
274
275 for(auto &node : m_rootNodes)
276 {
277 // qDebug() << "In one node of the root nodes.";
278
279 MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
280 if(iterNode != nullptr)
281 return iterNode;
282 }
283
284 return nullptr;
285}
286
287
288std::vector<MsRunDataSetTreeNode *>
290{
291 // We want to push back all the nodes of the tree in a flat vector of nodes.
292
293 std::vector<MsRunDataSetTreeNode *> nodes;
294
295 for(auto &&node : m_rootNodes)
296 {
297 // The node will store itself and all of its children.
298 node->flattenedView(nodes, true /* with_descendants */);
299 }
300
301 return nodes;
302}
303
304
305std::vector<MsRunDataSetTreeNode *>
307 bool with_descendants)
308{
309 std::vector<MsRunDataSetTreeNode *> nodes;
310
311 // Logically, ms_level cannot be 0.
312
313 if(!ms_level)
314 {
316 "msrundatasettree.cpp -- ERROR the MS level cannot be 0.");
317
318 return nodes;
319 }
320
321 // The depth of the tree at which we are right at this point is 0, we have not
322 // gone into the children yet.
323
324 std::size_t depth = 0;
325
326 // If ms_level is 1, then that means that we want the nodes starting right at
327 // the root nodes with or without the descendants.
328
329 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
330 //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
331
332 if(ms_level == 1)
333 {
334 for(auto &&node : m_rootNodes)
335 {
336 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
337 //<< " () "
338 //<< "Handling one of the root nodes at ms_level = 1."
339 //<< std::endl;
340
341 node->flattenedView(nodes, with_descendants);
342 }
343
344 return nodes;
345 }
346
347 // At this point, we know that we want the descendants of the root nodes since
348 // we want ms_level > 1, so we need go to to the children of the root nodes.
349
350 // Let depth to 0, because if we go to the children of the root nodes we will
351 // still be at depth 0, that is MS level 1.
352
353 for(auto &node : m_rootNodes)
354 {
355 // std::cout
356 //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
357 //<< std::setprecision(15)
358 //<< "Requesting a flattened view of the root's child nodes with depth: "
359 //<< depth << std::endl;
360
361 node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
362 }
363
364 return nodes;
365}
366
367
370 std::size_t product_spectrum_index)
371{
372
373 // qDebug();
374
375 // Find the node that holds the mass spectrum that was acquired as the
376 // precursor that when fragmented gave a spectrum at spectrum_index;
377
378 // Get the node that contains the product_spectrum_index first.
379 MsRunDataSetTreeNode *node = nullptr;
380 node = findNode(product_spectrum_index);
381
382 // Now get the node that contains the precursor_spectrum_index.
383
384 return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
385}
386
387
388std::vector<MsRunDataSetTreeNode *>
390 std::size_t precursor_spectrum_index)
391{
392 std::vector<MsRunDataSetTreeNode *> nodes;
393
394 // First get the node of the precursor spectrum index.
395
396 MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
397
398 if(precursor_node == nullptr)
399 return nodes;
400
401 nodes.assign(precursor_node->m_children.begin(),
402 precursor_node->m_children.end());
403
404 return nodes;
405}
406
407
408std::vector<MsRunDataSetTreeNode *>
410 PrecisionPtr precision_ptr)
411{
412
413 // Find all the precursor nodes holding a mass spectrum that contained a
414 // precursor mz-value.
415
416 if(precision_ptr == nullptr)
418 "msrundatasettree.cpp -- ERROR precision_ptr cannot be nullptr.");
419
420 std::vector<MsRunDataSetTreeNode *> product_nodes;
421
422 // As a first step, find all the nodes that hold a mass spectrum that was
423 // acquired as a fragmentation spectrum of an ion of mz, that is, search all
424 // the product ion nodes for which precursor was mz.
425
426 for(auto &&node : m_rootNodes)
427 {
428 node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
429 }
430
431 // Now, for each node found get the precursor node
432
433 std::vector<MsRunDataSetTreeNode *> precursor_nodes;
434
435 for(auto &&node : product_nodes)
436 {
437 precursor_nodes.push_back(
438 findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
439 }
440
441 return precursor_nodes;
442}
443
444
445bool
447 MsRunDataSetTreeNode *node_p,
448 DataKind data_kind)
449{
450 // qDebug();
451
452 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
453 using DoubleNodeVectorMap = std::map<double, NodeVector>;
454 using MapPair = std::pair<double, NodeVector>;
455 using MapIterator = DoubleNodeVectorMap::iterator;
456
457 DoubleNodeVectorMap *map_p;
458
459 if(data_kind == DataKind::rt)
460 {
462 }
463 else if(data_kind == DataKind::dt)
464 {
466 }
467 else
468 qFatal("Programming error.");
469
470 // There are two possibilities:
471 //
472 // 1. The time was never encountered yet. We won't find it. We need to
473 // allocate a vector of Node's and set it associated to time in the map.
474 //
475 // 2. The time was encountered already, we will find it in the maps, we'll
476 // just push_back the Node in the vector of nodes.
477
478 MapIterator found_iterator = map_p->find(time);
479
480 if(found_iterator != map_p->end())
481 {
482 // The time value was encountered already.
483
484 found_iterator->second.push_back(node_p);
485
486 // qDebug() << "Found iterator for time:" << time;
487 }
488 else
489 {
490 // We need to create a new vector with the node.
491
492 NodeVector node_vector = {node_p};
493
494 map_p->insert(MapPair(time, node_vector));
495
496 // qDebug() << "Inserted new time:node_vector pair.";
497 }
498
499 return true;
500}
501
502
505 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
506 MsRunDataSetTreeNode *parent_p)
507{
508 // qDebug();
509
510 // We want to add a mass spectrum. Either the parent_p argument is nullptr or
511 // not. If it is nullptr, then we just append the mass spectrum to the vector
512 // of root nodes. If it is not nullptr, we need to append the mass spectrum to
513 // that node.
514
515 MsRunDataSetTreeNode *new_node_p =
516 new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
517
518 if(parent_p == nullptr)
519 {
520 m_rootNodes.push_back(new_node_p);
521
522 // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
523 // &m_rootNodes;
524 }
525 else
526 {
527 parent_p->m_children.push_back(new_node_p);
528
529 // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
530 }
531
533
534 // And now document that addition in the node index map.
535 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
536 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
537
538 // We also want to document the new node relating to the
539 // retention time.
540
542 mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
543
544 // Likewise for the drift time.
545
547 mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
548
549 // qDebug() << "New index/node map:"
550 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
551 //<< new_node;
552
553 return new_node_p;
554}
555
556
559 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
560 std::size_t precursor_spectrum_index)
561{
562 // qDebug();
563
564 // First get the node containing the mass spectrum that was acquired at index
565 // precursor_spectrum_index.
566
567 // qDebug() << "Need to find the precursor's mass spectrum node for precursor
568 // "
569 //"spectrum index:"
570 //<< precursor_spectrum_index;
571
572 MsRunDataSetTreeNode *mass_spec_data_node_p =
573 findNode(precursor_spectrum_index);
574
575 // qDebug() << "Found node" << mass_spec_data_node_p
576 //<< "for precursor index:" << precursor_spectrum_index;
577
578 if(mass_spec_data_node_p == nullptr)
579 {
581 "msrundatasettree.cpp -- ERROR could not find a a "
582 "tree node matching the index.");
583 }
584
585 // qDebug() << "Calling addMassSpectrum with parent node:"
586 //<< mass_spec_data_node_p;
587
588 return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
589}
590
591
592std::size_t
594 double end,
595 NodeVector &nodes,
596 DataKind data_kind) const
597{
598 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
599 using DoubleNodeVectorMap = std::map<double, NodeVector>;
600 using MapIterator = DoubleNodeVectorMap::const_iterator;
601
602 const DoubleNodeVectorMap *map_p;
603
604 if(data_kind == DataKind::rt)
605 {
607 }
608 else if(data_kind == DataKind::dt)
609 {
611 }
612 else
613 qFatal("Programming error.");
614
615 std::size_t added_nodes = 0;
616
617 // Get the iterator to the map item that has the key greater or equal to
618 // start.
619
620 MapIterator start_iterator = map_p->lower_bound(start);
621
622 if(start_iterator == map_p->end())
623 return 0;
624
625 // Now get the end of the map useful range of items.
626
627 MapIterator end_iterator = map_p->upper_bound(end);
628
629 // Now that we have the iterator range, iterate in it and get the mass spectra
630 // from each item's pair.second node vector.
631
632 for(MapIterator iterator = start_iterator; iterator != end_iterator;
633 ++iterator)
634 {
635 // We are iterating in MapPair items.
636
637 NodeVector node_vector = iterator->second;
638
639 // All the nodes in the node vector need to be copied to the mass_spectra
640 // vector passed as parameter.
641
642 for(auto &&node_p : node_vector)
643 {
644 nodes.push_back(node_p);
645
646 ++added_nodes;
647 }
648 }
649
650 return added_nodes;
651}
652
653
654std::size_t
656 double start, double end, NodeVector &nodes, DataKind data_kind) const
657{
658 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
659 using NodeVectorIterator = NodeVector::iterator;
660
661 using DoubleNodeVectorMap = std::map<double, NodeVector>;
662 using MapIterator = DoubleNodeVectorMap::const_iterator;
663
664 const DoubleNodeVectorMap *map_p;
665
666 if(data_kind == DataKind::rt)
667 {
669 }
670 else if(data_kind == DataKind::dt)
671 {
673 }
674 else
675 qFatal("Programming error.");
676
677 std::size_t removed_vector_items = 0;
678
679 // We want to remove from the nodes vector all the nodes that contain a mass
680 // spectrum acquired at a time range outside of [ start-end ], that is, the
681 // time values [begin() - start [ and ]end -- end()[.
682
683 // Get the iterator to the map item that has the key less to
684 // start (we want to keep the map item having key == start).
685
686 MapIterator first_end_iterator = (*map_p).upper_bound(start);
687
688 // Now that we have the first_end_iterator, we can iterate between [begin --
689 // first_end_iterator[
690
691 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
692 ++iterator)
693 {
694 // Remove from the nodes vector the nodes.
695
696 // We are iterating in MapPair items.
697
698 NodeVector node_vector = iterator->second;
699
700 // All the nodes in the node vector need to be removed from the
701 // mass_spectra vector passed as parameter if found.
702
703 for(auto &&node_p : node_vector)
704 {
705 NodeVectorIterator iterator =
706 std::find(nodes.begin(), nodes.end(), node_p);
707
708 if(iterator != nodes.end())
709 {
710 // We found the node: remove it.
711
712 nodes.erase(iterator);
713
714 ++removed_vector_items;
715 }
716 }
717 }
718
719 // Now the second begin iterator, so that we can remove all the items
720 // contained in the second range, that is, ]end--end()[.
721
722 // The second_first_iterator will point to the item having its time value less
723 // or equal to end. But we do not want to get items having their time equal to
724 // end, only < end. So, if the iterator is not begin(), we just need to
725 // decrement it once.
726 MapIterator second_first_iterator = map_p->upper_bound(end);
727 if(second_first_iterator != map_p->begin())
728 --second_first_iterator;
729
730 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
731 ++iterator)
732 {
733 // We are iterating in MapPair items.
734
735 NodeVector node_vector = iterator->second;
736
737 // All the nodes in the node vector need to be removed from the
738 // mass_spectra vector passed as parameter if found.
739
740 for(auto &&node_p : node_vector)
741 {
742 NodeVectorIterator iterator =
743 std::find(nodes.begin(), nodes.end(), node_p);
744
745 if(iterator != nodes.end())
746 {
747 // We found the node: remove it.
748
749 nodes.erase(iterator);
750
751 ++removed_vector_items;
752 }
753 }
754 }
755
756 return removed_vector_items;
757}
758
759
760std::size_t
762 double start,
763 double end,
764 QualMassSpectraVector &mass_spectra,
765 DataKind data_kind) const
766{
767 // qDebug() << "With start:" << start << "and end:" << end;
768
769 if(start == end)
770 qDebug() << "Special case, start and end are equal:" << start;
771
772 // We will use the maps that relate rt | dt to a vector of data tree nodes.
773 // Indeed, we may have more than one mass spectrum acquired for a given rt, in
774 // case of ion mobility mass spectrometry. Same for dt: we will have as many
775 // spectra for each dt as there are retention time values...
776
777 using DoubleNodeVectorMap = std::map<double, NodeVector>;
778 using MapIterator = DoubleNodeVectorMap::const_iterator;
779
780 const DoubleNodeVectorMap *map_p;
781
782 if(data_kind == DataKind::rt)
783 {
785
786 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
787 // start
788 //<< "end:" << end;
789 }
790 else if(data_kind == DataKind::dt)
791 {
793
794 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
795 // start
796 //<< "end:" << end;
797 }
798 else
799 qFatal("Programming error.");
800
801 // qDebug() << "The rt |dt / mass spectra map has size:" << map_p->size()
802 //<< "The start:" << start << "the end:" << end;
803
804 std::size_t added_mass_spectra = 0;
805
806 // Get the iterator to the map item that has the key greater or equal to
807 // start.
808
809 MapIterator start_iterator = map_p->lower_bound(start);
810
811 if(start_iterator == map_p->end())
812 {
813 qDebug() << "The start iterator is end()!";
814 return 0;
815 }
816
817 // qDebug() << "The start_iterator points to:" << start_iterator->first
818 //<< "as a rt|dt time.";
819
820 // Now get the end of the map's useful range of items.
821
822 // Returns an iterator pointing to the first element in the container whose
823 // key is considered to go after 'end'.
824
825 MapIterator end_iterator = map_p->upper_bound(end);
826
827 // Immediately verify if there is no distance between start and end.
828 if(!std::distance(start_iterator, end_iterator))
829 {
830 qDebug() << "No range of mass spectra could be selected.";
831 return 0;
832 }
833
834 if(end_iterator == map_p->end())
835 {
836 // qDebug() << "The end_iterator points to the end of the map."
837 //<< "The last map item is prev() at key value: "
838 //<< std::prev(end_iterator)->first;
839 }
840 else
841 {
842 // qDebug() << "The end_iterator points to:" << end_iterator->first
843 //<< "as a rt|dt time and the accounted key value is actually"
844 //<< std::prev(end_iterator)->first;
845 }
846
847 // qDebug() << "The number of time values to iterate through:"
848 //<< std::distance(start_iterator, end_iterator)
849 //<< "with values: start: " << start_iterator->first
850 //<< "and end: " << std::prev(end_iterator)->first;
851
852 // Now that we have the iterator range, iterate in it and get the mass
853 // spectra from each item's pair.second node vector.
854
855 for(MapIterator iterator = start_iterator; iterator != end_iterator;
856 ++iterator)
857 {
858 // We are iterating in MapPair items.
859
860 NodeVector node_vector = iterator->second;
861
862 // All the nodes' mass spectra in the node vector need to be copied to
863 // the mass_spectra vector passed as parameter.
864
865 for(auto &&node_p : node_vector)
866 {
867 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp =
868 node_p->getQualifiedMassSpectrum();
869
870#if 0
871 // Sanity check only for deep debugging.
872
873 if(qualified_mass_spectrum_csp == nullptr ||
874 qualified_mass_spectrum_csp.get() == nullptr)
875 {
877 "The QualifiedMassSpectrumCstSPtr cannot be nullptr.");
878 }
879 else
880 {
881 //qDebug() << "Current mass spectrum is valid with rt:"
882 //<< qualified_mass_spectrum_csp->getRtInMinutes();
883 }
884#endif
885
886 mass_spectra.push_back(qualified_mass_spectrum_csp);
887
888 ++added_mass_spectra;
889 }
890 }
891
892 // qDebug() << "Returning added_mass_spectra:" << added_mass_spectra;
893
894 return added_mass_spectra;
895}
896
897
898std::size_t
900 double start,
901 double end,
902 QualMassSpectraVector &mass_spectra,
903 DataKind data_kind) const
904{
905 using QualMassSpectraVectorIterator = QualMassSpectraVector::iterator;
906
907 using DoubleNodeVectorMap = std::map<double, NodeVector>;
908 using MapIterator = DoubleNodeVectorMap::const_iterator;
909
910 const DoubleNodeVectorMap *map_p;
911
912 if(data_kind == DataKind::rt)
913 {
915
916 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
917 // start
918 //<< "end:" << end;
919 }
920 else if(data_kind == DataKind::dt)
921 {
923
924 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
925 // start
926 //<< "end:" << end;
927 }
928 else
929 qFatal("Programming error.");
930
931 std::size_t removed_vector_items = 0;
932
933 // We want to remove from the nodes vector all the nodes that contain a mass
934 // spectrum acquired at a time range outside of [ start-end ], that is, the
935 // time values [begin() - start [ and ]end -- end()[.
936
937 // Looking for an iterator that points to an item having a time < start.
938
939 // lower_bound returns an iterator pointing to the first element in the
940 // range [first, last) that is not less than (i.e. greater or equal to)
941 // value, or last if no such element is found.
942
943 MapIterator first_end_iterator = (*map_p).lower_bound(start);
944
945 // first_end_iterator points to the item that has the next time value with
946 // respect to start. This is fine because we'll not remove that point
947 // because the for loop below will stop one item short of
948 // first_end_iterator. That means that we effectively remove all the items
949 // [begin() -> start[ (start not include). Exactly what we want.
950
951 // qDebug() << "lower_bound for start:" << first_end_iterator->first;
952
953 // Now that we have the first_end_iterator, we can iterate between [begin --
954 // first_end_iterator[
955
956 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
957 ++iterator)
958 {
959 // Remove from the nodes vector the nodes.
960
961 // We are iterating in MapPair items.
962
963 NodeVector node_vector = iterator->second;
964
965 // All the nodes in the node vector need to be removed from the
966 // mass_spectra vector passed as parameter if found.
967
968 for(auto &&node_p : node_vector)
969 {
970 QualMassSpectraVectorIterator iterator =
971 std::find(mass_spectra.begin(),
972 mass_spectra.end(),
973 node_p->getQualifiedMassSpectrum());
974
975 if(iterator != mass_spectra.end())
976 {
977 // We found the mass spectrum: remove it.
978
979 mass_spectra.erase(iterator);
980
981 ++removed_vector_items;
982 }
983 }
984 }
985
986 // Now the second begin iterator, so that we can remove all the items
987 // contained in the second range, that is, ]end--end()[.
988
989 // The second_first_iterator will point to the item having its time value
990 // less or equal to end. But we do not want to get items having their time
991 // equal to end, only < end. So, if the iterator is not begin(), we just
992 // need to decrement it once.
993
994 MapIterator second_first_iterator = map_p->upper_bound(end);
995
996 // second_first_iterator now points to the item after the one having time
997 // end. Which is exactly what we want: we want to remove ]end--end()[ and
998 // this is exactly what the loop starting a the point after end below.
999
1000 // qDebug() << "second_first_iterator for end:" <<
1001 // second_first_iterator->first;
1002
1003 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
1004 ++iterator)
1005 {
1006 // We are iterating in MapPair items.
1007
1008 NodeVector node_vector = iterator->second;
1009
1010 // All the nodes in the node vector need to be removed from the
1011 // mass_spectra vector passed as parameter if found.
1012
1013 for(auto &&node_p : node_vector)
1014 {
1015 QualMassSpectraVectorIterator iterator =
1016 std::find(mass_spectra.begin(),
1017 mass_spectra.end(),
1018 node_p->getQualifiedMassSpectrum());
1019
1020 if(iterator != mass_spectra.end())
1021 {
1022 // We found the node: remove it.
1023
1024 mass_spectra.erase(iterator);
1025
1026 ++removed_vector_items;
1027 }
1028 }
1029 }
1030
1031 return removed_vector_items;
1032}
1033
1034
1035std::size_t
1037{
1038 // We want to know what is the depth of the tree, that is the highest level
1039 // of MSn, that is, n.
1040
1041 if(!m_rootNodes.size())
1042 return 0;
1043
1044 // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
1045
1046 // By essence, we are at MS0: only if we have at least one root node do we
1047 // know we have MS1 data. So we already know that we have at least one
1048 // child, so start with depth 1.
1049
1050 std::size_t depth = 1;
1051 std::size_t tmp_depth = 0;
1052 std::size_t greatest_depth = 0;
1053
1054 for(auto &node : m_rootNodes)
1055 {
1056 tmp_depth = node->depth(depth);
1057
1058 // qDebug() << "Returned depth:" << tmp_depth;
1059
1060 if(tmp_depth > greatest_depth)
1061 greatest_depth = tmp_depth;
1062 }
1063
1064 return greatest_depth;
1065}
1066
1067
1068std::size_t
1070{
1071
1072 std::size_t cumulative_node_count = 0;
1073
1074 for(auto &node : m_rootNodes)
1075 {
1076 node->size(cumulative_node_count);
1077
1078 // qDebug() << "Returned node_count:" << node_count;
1079 }
1080
1081 return cumulative_node_count;
1082}
1083
1084
1085std::size_t
1087{
1088 return m_indexNodeMap.size();
1089}
1090
1091
1092std::size_t
1094{
1095 return m_spectrumCount;
1096}
1097
1098
1099} // namespace pappso
virtual void setNodesToProcessCount(std::size_t)=0
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
std::vector< MsRunDataSetTreeNode * > m_children
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
std::size_t indexNodeMapSize() const
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, DataKind data_kind)
std::vector< MsRunDataSetTreeNode * > flattenedView()
std::size_t getSpectrumCount() const
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
std::vector< MsRunDataSetTreeNode * > m_rootNodes
std::size_t addDataSetTreeNodesInsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::map< double, NodeVector > DoubleNodeVectorMap
std::size_t removeDataSetTreeNodesOutsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
std::vector< MsRunDataSetTreeNode * > NodeVector
std::size_t addDataSetQualMassSpectraInsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
std::size_t removeDataSetQualMassSpectraOutsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:46
double pappso_double
A type definition for doubles.
Definition: types.h:50
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
DataKind
Definition: types.h:211
@ dt
Drift time.
@ rt
Retention time.