IsoSpec 2.2.1
fixedEnvelopes.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17#include "fixedEnvelopes.h"
18#include <limits>
19#include "isoMath.h"
20
21namespace IsoSpec
22{
23
24FixedEnvelope::FixedEnvelope(const FixedEnvelope& other) :
25_masses(array_copy<double>(other._masses, other._confs_no)),
26_probs(array_copy<double>(other._probs, other._confs_no)),
27_confs(array_copy_nptr<int>(other._confs, other._confs_no*other.allDim)),
28_confs_no(other._confs_no),
29allDim(other.allDim),
30sorted_by_mass(other.sorted_by_mass),
31sorted_by_prob(other.sorted_by_prob),
32total_prob(other.total_prob)
33{}
34
35FixedEnvelope::FixedEnvelope(FixedEnvelope&& other) :
36_masses(other._masses),
37_probs(other._probs),
38_confs(other._confs),
39_confs_no(other._confs_no),
40allDim(other.allDim),
41sorted_by_mass(other.sorted_by_mass),
42sorted_by_prob(other.sorted_by_prob),
43total_prob(other.total_prob)
44{
45other._masses = nullptr;
46other._probs = nullptr;
47other._confs = nullptr;
48other._confs_no = 0;
49other.total_prob = 0.0;
50}
51
52FixedEnvelope::FixedEnvelope(double* in_masses, double* in_probs, size_t in_confs_no, bool masses_sorted, bool probs_sorted, double _total_prob) :
53_masses(in_masses),
54_probs(in_probs),
55_confs(nullptr),
56_confs_no(in_confs_no),
57allDim(0),
58sorted_by_mass(masses_sorted),
59sorted_by_prob(probs_sorted),
60total_prob(_total_prob)
61{}
62
63FixedEnvelope FixedEnvelope::operator+(const FixedEnvelope& other) const
64{
65 double* nprobs = reinterpret_cast<double*>(malloc(sizeof(double) * (_confs_no+other._confs_no)));
66 if(nprobs == nullptr)
67 throw std::bad_alloc();
68 double* nmasses = reinterpret_cast<double*>(malloc(sizeof(double) * (_confs_no+other._confs_no)));
69 if(nmasses == nullptr)
70 {
71 free(nprobs);
72 throw std::bad_alloc();
73 }
74
75 memcpy(nprobs, _probs, sizeof(double) * _confs_no);
76 memcpy(nmasses, _masses, sizeof(double) * _confs_no);
77
78 memcpy(nprobs+_confs_no, other._probs, sizeof(double) * other._confs_no);
79 memcpy(nmasses+_confs_no, other._masses, sizeof(double) * other._confs_no);
80
81 return FixedEnvelope(nmasses, nprobs, _confs_no + other._confs_no);
82}
83
84FixedEnvelope FixedEnvelope::operator*(const FixedEnvelope& other) const
85{
86 double* nprobs = reinterpret_cast<double*>(malloc(sizeof(double) * _confs_no * other._confs_no));
87 if(nprobs == nullptr)
88 throw std::bad_alloc();
89 // deepcode ignore CMemoryLeak: It's not a memleak: the memory is passed to FixedEnvelope which
90 // deepcode ignore CMemoryLeak: takes ownership of it, and will properly free() it in destructor.
91 double* nmasses = reinterpret_cast<double*>(malloc(sizeof(double) * _confs_no * other._confs_no));
92 if(nmasses == nullptr)
93 {
94 free(nprobs);
95 throw std::bad_alloc();
96 }
97
98 size_t tgt_idx = 0;
99
100 for(size_t ii = 0; ii < _confs_no; ii++)
101 for(size_t jj = 0; jj < other._confs_no; jj++)
102 {
103 nprobs[tgt_idx] = _probs[ii] * other._probs[jj];
104 nmasses[tgt_idx] = _masses[ii] + other._masses[jj];
105 tgt_idx++;
106 }
107
108 return FixedEnvelope(nmasses, nprobs, tgt_idx);
109}
110
111void FixedEnvelope::sort_by_mass()
112{
113 if(sorted_by_mass)
114 return;
115
116 sort_by(_masses);
117
118 sorted_by_mass = true;
119 sorted_by_prob = false;
120}
121
122
123void FixedEnvelope::sort_by_prob()
124{
125 if(sorted_by_prob)
126 return;
127
128 sort_by(_probs);
129
130 sorted_by_prob = true;
131 sorted_by_mass = false;
132}
133
134template<typename T> void reorder_array(T* arr, size_t* order, size_t size, bool can_destroy = false)
135{
136 if(!can_destroy)
137 {
138 size_t* order_c = new size_t[size];
139 memcpy(order_c, order, sizeof(size_t)*size);
140 order = order_c;
141 }
142
143 for(size_t ii = 0; ii < size; ii++)
144 while(order[ii] != ii)
145 {
146 std::swap(arr[ii], arr[order[ii]]);
147 std::swap(order[order[ii]], order[ii]);
148 }
149
150 if(!can_destroy)
151 delete[] order;
152}
153
154void FixedEnvelope::sort_by(double* order)
155{
156 size_t* indices = new size_t[_confs_no];
157
158 if(_confs_no <= 1)
159 return;
160
161 for(size_t ii = 0; ii < _confs_no; ii++)
162 indices[ii] = ii;
163
164 std::sort<size_t*>(indices, indices + _confs_no, TableOrder<double>(order));
165
166 size_t* inverse = new size_t[_confs_no];
167
168 for(size_t ii = 0; ii < _confs_no; ii++)
169 inverse[indices[ii]] = ii;
170
171 delete[] indices;
172
173 reorder_array(_masses, inverse, _confs_no);
174 reorder_array(_probs, inverse, _confs_no, _confs == nullptr);
175 if(_confs != nullptr)
176 {
177 int* swapspace = new int[allDim];
178 for(size_t ii = 0; ii < _confs_no; ii++)
179 while(inverse[ii] != ii)
180 {
181 memcpy(swapspace, &_confs[ii*allDim], allDimSizeofInt);
182 memcpy(&_confs[ii*allDim], &_confs[inverse[ii]*allDim], allDimSizeofInt);
183 memcpy(&_confs[inverse[ii]*allDim], swapspace, allDimSizeofInt);
184 std::swap(inverse[inverse[ii]], inverse[ii]);
185 }
186 delete[] swapspace;
187 }
188 delete[] inverse;
189}
190
191
192double FixedEnvelope::get_total_prob()
193{
194 if(std::isnan(total_prob))
195 {
196 total_prob = 0.0;
197 for(size_t ii = 0; ii < _confs_no; ii++)
198 total_prob += _probs[ii];
199 }
200 return total_prob;
201}
202
203void FixedEnvelope::scale(double factor)
204{
205 for(size_t ii = 0; ii < _confs_no; ii++)
206 _probs[ii] *= factor;
207 total_prob *= factor;
208}
209
210void FixedEnvelope::normalize()
211{
212 double tp = get_total_prob();
213 if(tp != 1.0)
214 {
215 scale(1.0/tp);
216 total_prob = 1.0;
217 }
218}
219
220void FixedEnvelope::shift_mass(double value)
221{
222 for(size_t ii = 0; ii < _confs_no; ii++)
223 _masses[ii] += value;
224}
225
226void FixedEnvelope::resample(size_t samples, double beta_bias)
227{
228 if(_confs_no == 0)
229 throw std::logic_error("Resample called on an empty spectrum");
230
231 double pprob = 0.0;
232 double cprob = 0.0;
233 size_t pidx = -1; // Overflows - but it doesn't matter.
234
235 _probs[_confs_no-1] = (std::numeric_limits<double>::max)();
236
237 while(samples > 0)
238 {
239 pprob += _probs[++pidx];
240 _probs[pidx] = 0.0;
241 double covered_part = (pprob - cprob) / (1.0 - cprob);
242 while(samples * covered_part < beta_bias && samples > 0)
243 {
244 cprob += rdvariate_beta_1_b(samples) * (1.0 - cprob);
245 while(pprob < cprob)
246 {
247 pprob += _probs[++pidx];
248 _probs[pidx] = 0.0;
249 }
250 _probs[pidx] += 1.0;
251 samples--;
252 covered_part = (pprob - cprob) / (1.0 - cprob);
253 }
254 if(samples <= 0)
255 break;
256 size_t nrtaken = rdvariate_binom(samples, covered_part);
257 _probs[pidx] += static_cast<double>(nrtaken);
258 samples -= nrtaken;
259 cprob = pprob;
260 }
261
262 pidx++;
263 memset(_probs + pidx, 0, sizeof(double)*(_confs_no - pidx));
264}
265
266FixedEnvelope FixedEnvelope::LinearCombination(const std::vector<const FixedEnvelope*>& spectra, const std::vector<double>& intensities)
267{
268 return LinearCombination(spectra.data(), intensities.data(), spectra.size());
269}
270
271FixedEnvelope FixedEnvelope::LinearCombination(const FixedEnvelope* const * spectra, const double* intensities, size_t size)
272{
273 size_t ret_size = 0;
274 for(size_t ii = 0; ii < size; ii++)
275 ret_size += spectra[ii]->_confs_no;
276
277 double* newprobs = reinterpret_cast<double*>(malloc(sizeof(double)*ret_size));
278 if(newprobs == nullptr)
279 throw std::bad_alloc();
280 double* newmasses = reinterpret_cast<double*>(malloc(sizeof(double)*ret_size));
281 if(newmasses == nullptr)
282 {
283 free(newprobs);
284 throw std::bad_alloc();
285 }
286
287 size_t cntr = 0;
288 for(size_t ii = 0; ii < size; ii++)
289 {
290 double mul = intensities[ii];
291 for(size_t jj = 0; jj < spectra[ii]->_confs_no; jj++)
292 newprobs[jj+cntr] = spectra[ii]->_probs[jj] * mul;
293 memcpy(newmasses + cntr, spectra[ii]->_masses, sizeof(double) * spectra[ii]->_confs_no);
294 cntr += spectra[ii]->_confs_no;
295 }
296 return FixedEnvelope(newmasses, newprobs, cntr);
297}
298
299double FixedEnvelope::WassersteinDistance(FixedEnvelope& other)
300{
301 double ret = 0.0;
302 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
303 throw std::logic_error("Spectra must be normalized before computing Wasserstein Distance");
304
305 if(_confs_no == 0 || other._confs_no == 0)
306 return 0.0;
307
308 sort_by_mass();
309 other.sort_by_mass();
310
311 size_t idx_this = 0;
312 size_t idx_other = 0;
313
314 double acc_prob = 0.0;
315 double last_point = 0.0;
316
317
318 while(idx_this < _confs_no && idx_other < other._confs_no)
319 {
320 if(_masses[idx_this] < other._masses[idx_other])
321 {
322 ret += (_masses[idx_this] - last_point) * std::abs(acc_prob);
323 acc_prob += _probs[idx_this];
324 last_point = _masses[idx_this];
325 idx_this++;
326 }
327 else
328 {
329 ret += (other._masses[idx_other] - last_point) * std::abs(acc_prob);
330 acc_prob -= other._probs[idx_other];
331 last_point = other._masses[idx_other];
332 idx_other++;
333 }
334 }
335
336 acc_prob = std::abs(acc_prob);
337
338 while(idx_this < _confs_no)
339 {
340 ret += (_masses[idx_this] - last_point) * acc_prob;
341 acc_prob -= _probs[idx_this];
342 last_point = _masses[idx_this];
343 idx_this++;
344 }
345
346 while(idx_other < other._confs_no)
347 {
348 ret += (other._masses[idx_other] - last_point) * acc_prob;
349 acc_prob -= other._probs[idx_other];
350 last_point = other._masses[idx_other];
351 idx_other++;
352 }
353
354 return ret;
355}
356
357
358double FixedEnvelope::OrientedWassersteinDistance(FixedEnvelope& other)
359{
360 double ret = 0.0;
361 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
362 throw std::logic_error("Spectra must be normalized before computing Wasserstein Distance");
363
364 if(_confs_no == 0 || other._confs_no == 0)
365 return 0.0;
366
367 sort_by_mass();
368 other.sort_by_mass();
369
370 size_t idx_this = 0;
371 size_t idx_other = 0;
372
373 double acc_prob = 0.0;
374 double last_point = 0.0;
375
376
377 while(idx_this < _confs_no && idx_other < other._confs_no)
378 {
379 if(_masses[idx_this] < other._masses[idx_other])
380 {
381 ret += (_masses[idx_this] - last_point) * acc_prob;
382 acc_prob += _probs[idx_this];
383 last_point = _masses[idx_this];
384 idx_this++;
385 }
386 else
387 {
388 ret += (other._masses[idx_other] - last_point) * acc_prob;
389 acc_prob -= other._probs[idx_other];
390 last_point = other._masses[idx_other];
391 idx_other++;
392 }
393 }
394
395 while(idx_this < _confs_no)
396 {
397 ret += (_masses[idx_this] - last_point) * acc_prob;
398 acc_prob -= _probs[idx_this];
399 last_point = _masses[idx_this];
400 idx_this++;
401 }
402
403 while(idx_other < other._confs_no)
404 {
405 ret += (other._masses[idx_other] - last_point) * acc_prob;
406 acc_prob -= other._probs[idx_other];
407 last_point = other._masses[idx_other];
408 idx_other++;
409 }
410
411 return ret;
412}
413
414double FixedEnvelope::AbyssalWassersteinDistance(FixedEnvelope& other, double abyss_depth, double other_scale)
415{
416 sort_by_mass();
417 other.sort_by_mass();
418
419 std::vector<std::pair<double, double>> carried;
420
421 size_t idx_this = 0;
422 size_t idx_other = 0;
423
424 //std::cout << "AAA" << std::endl;
425
426 auto finished = [&]() -> bool { return idx_this >= _confs_no && idx_other >= other._confs_no; };
427 auto next = [&]() -> std::pair<double, double> {
428 if(idx_this >= _confs_no || (idx_other < other._confs_no && _masses[idx_this] > other._masses[idx_other]))
429 {
430 std::pair<double, double> res = std::pair<double, double>(other._masses[idx_other], other._probs[idx_other]*other_scale);
431 idx_other++;
432 return res;
433 }
434 else
435 {
436 std::pair<double, double> res = std::pair<double, double>(_masses[idx_this], -_probs[idx_this]);
437 idx_this++;
438 return res;
439 }
440 };
441 double accd = 0.0;
442 double condemned = 0.0;
443
444 while(!finished())
445 {
446 auto [m, p] = next();
447 if(!carried.empty() && carried[0].second * p > 0.0)
448 {
449 carried.emplace_back(m, p);
450 continue;
451 }
452
453 while(!carried.empty())
454 {
455 auto& [cm, cp] = carried.back();
456 if(m - cm >= abyss_depth)
457 {
458 for(auto it = carried.cbegin(); it != carried.cend(); it++)
459 condemned += fabs(it->second);
460 carried.clear();
461 break;
462 }
463 if((cp+p)*p > 0.0)
464 {
465 accd += fabs((m-cm)*cp);
466 p += cp;
467 carried.pop_back();
468 }
469 else
470 {
471 accd += fabs((m-cm)*p);
472 cp += p;
473 p = 0.0;
474 break;
475 }
476 }
477 if(p != 0.0)
478 carried.emplace_back(m, p);
479 //std::cout << m << " " << p << std::endl;
480 }
481
482 for(auto it = carried.cbegin(); it != carried.cend(); it++)
483 condemned += fabs(it->second);
484
485 return accd + condemned * abyss_depth * 0.5;
486}
487
488#if 0
489double FixedEnvelope::ScaledAbyssalWassersteinDistance(FixedEnvelope * const * others, double abyss_depth, const double* other_scales, const size_t N)
490{
491 sort_by_mass();
492
493 std::priority_queue<std::pair<double, size_t>> PQ;
494 std::unique_ptr<size_t[]> indices = std::make_unique<size_t[]>(N);
495 memset(indices.get(), 0, sizeof(size_t)*N);
496
497 for(size_t ii=0; ii<N; ii++)
498 {
499 others[ii]->sort_by_mass();
500 if(others[ii]->_confs_no > 0)
501 PQ.emplace_back({others._probs[0], ii});
502 }
503
504
505
506
507 std::vector<std::pair<double, double>> carried;
508
509 size_t idx_this = 0;
510 size_t idx_other = 0;
511
512 //std::cout << "AAA" << std::endl;
513
514 auto finished = [&]() -> bool { return idx_this >= _confs_no && PQ.empty(); };
515 auto next = [&]() -> std::tuple<double, double, size_t> {
516 if(idx_this >= _confs_no || (idx_other < other._confs_no && _masses[idx_this] > other._masses[idx_other]))
517 {
518 std::pair<double, double> res = std::pair<double, double>(other._masses[idx_other], other._probs[idx_other]*other_scale);
519 idx_other++;
520 return res;
521 }
522 else
523 {
524 std::pair<double, double> res = std::pair<double, double>(_masses[idx_this], -_probs[idx_this]);
525 idx_this++;
526 return res;
527 }
528 };
529 double accd = 0.0;
530 double condemned = 0.0;
531
532 while(!finished())
533 {
534 auto [m, p] = next();
535 if(!carried.empty() && carried[0].second * p > 0.0)
536 {
537 carried.emplace_back(m, p);
538 continue;
539 }
540
541 while(!carried.empty())
542 {
543 auto& [cm, cp] = carried.back();
544 if(m - cm >= abyss_depth)
545 {
546 for(auto it = carried.cbegin(); it != carried.cend(); it++)
547 condemned += fabs(it->second);
548 carried.clear();
549 break;
550 }
551 if((cp+p)*p > 0.0)
552 {
553 accd += fabs((m-cm)*cp);
554 p += cp;
555 carried.pop_back();
556 }
557 else
558 {
559 accd += fabs((m-cm)*p);
560 cp += p;
561 p = 0.0;
562 break;
563 }
564 }
565 if(p != 0.0)
566 carried.emplace_back(m, p);
567 //std::cout << m << " " << p << std::endl;
568 }
569
570 for(auto it = carried.cbegin(); it != carried.cend(); it++)
571 condemned += fabs(it->second);
572
573 return accd + condemned * abyss_depth * 0.5;
574}
575
576#endif
577
578#if 0
579double AbyssalWassersteinDistanceGrad(FixedEnvelope* const* envelopes, const double* scales, double* ret_gradient, size_t N, double abyss_depth_exp, double abyss_depth_the)
580{
581return 0.0;
582 std::unique_ptr<size_t[]> env_idx = std::make_unique<size_t[]>(N+1);
583 memset(env_idx.get(), 0, (N+1)*sizeof(size_t));
584 memset(ret_gradient, 0, (N+1)*sizeof(double));
585
586 auto pq_cmp = [](std::pair<double, size_t>& p1, std::pair<double, size_t>& p2) { return p1.first > p2.first; };
587 std::priority_queue<std::pair<double, size_t>, std::vector<std::pair<double, size_t>>, decltype(pq_cmp)> PQ(pq_cmp);
588
589 for(size_t ii=0; ii<=N; ii++)
590 {
591 envelopes[ii]->sort_by_mass();
592 if(envelopes[ii]->_confs_no > 0)
593 {
594 std::cout << "Initial push: " << envelopes[ii]->_masses[0] << " " << ii << '\n';
595 PQ.push({envelopes[ii]->_masses[0], ii});
596 }
597 }
598
599 std::vector<std::tuple<double, double, size_t>> carried;
600
601 auto next = [&]() -> std::tuple<double, double, size_t> {
602 auto [mass, eidx] = PQ.top();
603 double prob = envelopes[eidx]->_probs[env_idx[eidx]];
604 PQ.pop();
605 if(eidx == 0)
606 prob = -prob;
607 else
608 prob = prob * scales[eidx];
609 std::cout << "Next: " << mass << " " << prob << " " << eidx << '\n';
610 env_idx[eidx]++;
611 if(env_idx[eidx] < envelopes[eidx]->_confs_no)
612 PQ.push({envelopes[eidx]->_masses[env_idx[eidx]], eidx});
613
614 return {mass, prob, eidx};
615 };
616 double accd = 0.0;
617 double condemned = 0.0;
618 //double flow;
619 const double max_flow_dist = abyss_depth_exp + abyss_depth_the;
620 max_flow_dist *= 2.0;
621
622 while(!PQ.empty())
623 {
624 auto [m, p, eidx] = next();
625 if(!carried.empty() && std::get<1>(carried[0]) * p > 0.0)
626 {
627 carried.emplace_back(m, p, eidx);
628 continue;
629 }
630
631 while(!carried.empty())
632 {
633 auto& [cm, cp, ceidx] = carried.back();
634 if(m - cm >= max_flow_dist)
635 {
636 for(auto it = carried.cbegin(); it != carried.cend(); it++)
637 condemned += fabs(std::get<1>(*it));
638 carried.clear();
639 break;
640 }
641 std::cout << "accing\n";
642 if((cp+p)*p > 0.0)
643 {
644 accd += fabs((m-cm)*cp);
645 p += cp;
646 carried.pop_back();
647 std::cout << "cprob:" << cp << '\n';
648 }
649 else
650 {
651 accd += fabs((m-cm)*p);
652 cp += p;
653 std::cout << "prob:" << p << '\n';
654 p = 0.0;
655 break;
656 }
657 }
658 if(p != 0.0)
659 carried.emplace_back(m, p, eidx);
660 //std::cout << m << " " << p << std::endl;
661 }
662
663 for(auto it = carried.cbegin(); it != carried.cend(); it++)
664 condemned += fabs(std::get<1>(*it));
665
666 std::cout << "ISO:" << accd << " " << condemned << '\n';
667 return accd + condemned * max_flow_dist * 0.5;
668 while(!PQ.empty())
669 {
670 auto [m, p, eidx] = next();
671 if(!carried.empty() && (std::get<1>(carried[0]) * p > 0.0))
672 {
673 carried.emplace_back(m, p, eidx);
674 continue;
675 }
676
677 while(!carried.empty())
678 {
679 auto& [cm, cp, ceidx] = carried.back();
680 if(m - cm >= max_flow_dist)
681 {
682 for(auto it = carried.cbegin(); it != carried.cend(); it++)
683 {
684 flow = fabs(std::get<1>(*it));
685 const size_t target_idx = std::get<2>(*it);
686 flow *= target_idx == 0 ? abyss_depth_exp : abyss_depth_the;
687 ret_gradient[target_idx] += flow;
688 condemned += flow;
689 std::cout << "condemnin': " << m << " " << p << " " << eidx << " | " << cm << " " << cp << " " << ceidx << '\n';
690 }
691 carried.clear();
692 break;
693 }
694 if((cp+p)*p > 0.0)
695 {
696 flow = fabs((m-cm)*cp);
697 accd += flow;
698 p += cp;
699 ret_gradient[ceidx] += flow;
700 carried.pop_back();
701 }
702 else
703 {
704 flow = fabs((m-cm)*p);
705 accd += flow;
706 cp += p;
707 ret_gradient[eidx] += flow;
708 p = 0.0;
709 break;
710 }
711 }
712 if(p != 0.0)
713 carried.emplace_back(m, p, eidx);
714 //std::cout << m << " " << p << std::endl;
715 }
716
717 for(auto it = carried.cbegin(); it != carried.cend(); it++)
718 condemned += fabs(std::get<1>(*it));
719
720
721 return accd + condemned * (abyss_depth_exp + abyss_depth_the) * 0.5;
722}
723#endif
724
725
726std::tuple<double, double, double> FixedEnvelope::WassersteinMatch(FixedEnvelope& other, double flow_distance, double other_scale)
727{
728 if(_confs_no == 0)
729 return {0.0, other.get_total_prob() * other_scale, 0.0};
730
731 double unmatched1 = 0.0;
732 double unmatched2 = 0.0;
733 double massflow = 0.0;
734
735 sort_by_mass();
736 other.sort_by_mass();
737
738 size_t idx_this = 0;
739 size_t idx_other = 0;
740 double used_prob_this = 0.0;
741 double used_prob_other = 0.0;
742
743 while(idx_this < _confs_no && idx_other < other._confs_no)
744 {
745 bool moved = true;
746 while(moved && idx_this < _confs_no && idx_other < other._confs_no)
747 {
748 moved = false;
749 if(_masses[idx_this] < other._masses[idx_other] - flow_distance)
750 {
751 unmatched1 += _probs[idx_this] - used_prob_this;
752 used_prob_this = 0.0;
753 idx_this++;
754 moved = true;
755 }
756 if(other._masses[idx_other] < _masses[idx_this] - flow_distance)
757 {
758 unmatched2 += other._probs[idx_other]*other_scale - used_prob_other;
759 used_prob_other = 0.0;
760 idx_other++;
761 moved = true;
762 }
763 }
764 if(idx_this < _confs_no && idx_other < other._confs_no)
765 {
766 assert(_probs[idx_this] - used_prob_this >= 0.0);
767 assert(other._probs[idx_other]*other_scale - used_prob_other >= 0.0);
768
769 if(_probs[idx_this] - used_prob_this < other._probs[idx_other]*other_scale - used_prob_other)
770 {
771 massflow += _probs[idx_this] - used_prob_this;
772 used_prob_other += _probs[idx_this] - used_prob_this;
773 assert(used_prob_other >= 0.0);
774 used_prob_this = 0.0;
775 idx_this++;
776 }
777 else
778 {
779 massflow += other._probs[idx_other]*other_scale - used_prob_other;
780 used_prob_this += other._probs[idx_other]*other_scale - used_prob_other;
781 assert(used_prob_this >= 0.0);
782 used_prob_other = 0.0;
783 idx_other++;
784 }
785 }
786 }
787
788 unmatched1 -= used_prob_this;
789 unmatched2 -= used_prob_other;
790
791 for(; idx_this < _confs_no; idx_this++)
792 unmatched1 += _probs[idx_this];
793 for(; idx_other < other._confs_no; idx_other++)
794 unmatched2 += other._probs[idx_other]*other_scale;
795
796 return {unmatched1, unmatched2, massflow};
797}
798
799FixedEnvelope FixedEnvelope::bin(double bin_width, double middle)
800{
801 sort_by_mass();
802
803 FixedEnvelope ret;
804
805 if(_confs_no == 0)
806 return ret;
807
808 ret.reallocate_memory<false>(ISOSPEC_INIT_TABLE_SIZE);
809
810 if(bin_width == 0)
811 {
812 double curr_mass = _masses[0];
813 double accd_prob = _probs[0];
814 for(size_t ii = 1; ii<_confs_no; ii++)
815 {
816 if(curr_mass != _masses[ii])
817 {
818 ret.store_conf(curr_mass, accd_prob);
819 curr_mass = _masses[ii];
820 accd_prob = _probs[ii];
821 }
822 else
823 accd_prob += _probs[ii];
824 }
825 ret.store_conf(curr_mass, accd_prob);
826 return ret;
827 }
828
829 size_t ii = 0;
830
831 double half_width = 0.5*bin_width;
832 double hwmm = half_width-middle;
833
834 while(ii < _confs_no)
835 {
836 double current_bin_middle = floor((_masses[ii]+hwmm)/bin_width)*bin_width + middle;
837 double current_bin_end = current_bin_middle + half_width;
838 double bin_prob = 0.0;
839
840 while(ii < _confs_no && _masses[ii] <= current_bin_end)
841 {
842 bin_prob += _probs[ii];
843 ii++;
844 }
845 ret.store_conf(current_bin_middle, bin_prob);
846 }
847
848 return ret;
849}
850
851template<bool tgetConfs> void FixedEnvelope::reallocate_memory(size_t new_size)
852{
853 current_size = new_size;
854 // FIXME: Handle overflow gracefully here. It definitely could happen for people still stuck on 32 bits...
855 _masses = reinterpret_cast<double*>(realloc(_masses, new_size * sizeof(double)));
856 if(_masses == nullptr)
857 throw std::bad_alloc();
858 tmasses = _masses + _confs_no;
859
860 _probs = reinterpret_cast<double*>(realloc(_probs, new_size * sizeof(double)));
861 if(_probs == nullptr)
862 throw std::bad_alloc();
863 tprobs = _probs + _confs_no;
864
865 constexpr_if(tgetConfs)
866 {
867 _confs = reinterpret_cast<int*>(realloc(_confs, new_size * allDimSizeofInt));
868 if(_confs == nullptr)
869 throw std::bad_alloc();
870 tconfs = _confs + (allDim * _confs_no);
871 }
872}
873
874void FixedEnvelope::slow_reallocate_memory(size_t new_size)
875{
876 current_size = new_size;
877 // FIXME: Handle overflow gracefully here. It definitely could happen for people still stuck on 32 bits...
878 _masses = reinterpret_cast<double*>(realloc(_masses, new_size * sizeof(double)));
879 if(_masses == nullptr)
880 throw std::bad_alloc();
881 tmasses = _masses + _confs_no;
882
883 _probs = reinterpret_cast<double*>(realloc(_probs, new_size * sizeof(double)));
884 if(_probs == nullptr)
885 throw std::bad_alloc();
886 tprobs = _probs + _confs_no;
887
888 if(_confs != nullptr)
889 {
890 _confs = reinterpret_cast<int*>(realloc(_confs, new_size * allDimSizeofInt));
891 if(_confs == nullptr)
892 throw std::bad_alloc();
893 tconfs = _confs + (allDim * _confs_no);
894 }
895}
896
897template<bool tgetConfs> void FixedEnvelope::threshold_init(Iso&& iso, double threshold, bool absolute)
898{
899 IsoThresholdGenerator generator(std::move(iso), threshold, absolute);
900
901 size_t tab_size = generator.count_confs();
902 this->allDim = generator.getAllDim();
903 this->allDimSizeofInt = this->allDim * sizeof(int);
904
905 this->reallocate_memory<tgetConfs>(tab_size);
906
907 double* ttmasses = this->_masses;
908 double* ttprobs = this->_probs;
909 ISOSPEC_MAYBE_UNUSED int* ttconfs;
910 constexpr_if(tgetConfs)
911 ttconfs = _confs;
912
913 while(generator.advanceToNextConfiguration())
914 {
915 *ttmasses = generator.mass(); ttmasses++;
916 *ttprobs = generator.prob(); ttprobs++;
917 constexpr_if(tgetConfs) { generator.get_conf_signature(ttconfs); ttconfs += allDim; }
918 }
919
920 this->_confs_no = tab_size;
921}
922
923template void FixedEnvelope::threshold_init<true>(Iso&& iso, double threshold, bool absolute);
924template void FixedEnvelope::threshold_init<false>(Iso&& iso, double threshold, bool absolute);
925
926
927template<bool tgetConfs> void FixedEnvelope::total_prob_init(Iso&& iso, double target_total_prob, bool optimize)
928{
929 if(target_total_prob <= 0.0)
930 return;
931
932 if(target_total_prob >= 1.0)
933 {
934 threshold_init<tgetConfs>(std::move(iso), 0.0, true);
935 return;
936 }
937
938 IsoLayeredGenerator generator(std::move(iso), 1000, 1000, true, std::min<double>(target_total_prob, 0.9999));
939
940 this->allDim = generator.getAllDim();
941 this->allDimSizeofInt = this->allDim*sizeof(int);
942
943
944 this->reallocate_memory<tgetConfs>(ISOSPEC_INIT_TABLE_SIZE);
945
946 size_t last_switch = 0;
947 double prob_at_last_switch = 0.0;
948 double prob_so_far = 0.0;
949 double layer_delta;
950
951 const double sum_above = log1p(-target_total_prob) - 2.3025850929940455; // log(0.1);
952
953 do
954 { // Store confs until we accumulate more prob than needed - and, if optimizing,
955 // store also the rest of the last layer
956 while(generator.advanceToNextConfigurationWithinLayer())
957 {
958 this->template addConfILG<tgetConfs>(generator);
959 prob_so_far += *(tprobs-1); // The just-stored probability
960 if(prob_so_far >= target_total_prob)
961 {
962 if (optimize)
963 {
964 while(generator.advanceToNextConfigurationWithinLayer())
965 this->template addConfILG<tgetConfs>(generator);
966 break;
967 }
968 else
969 return;
970 }
971 }
972 if(prob_so_far >= target_total_prob)
973 break;
974
975 last_switch = this->_confs_no;
976 prob_at_last_switch = prob_so_far;
977
978 layer_delta = sum_above - log1p(-prob_so_far);
979 layer_delta = (std::max)((std::min)(layer_delta, -0.1), -5.0);
980 } while(generator.nextLayer(layer_delta));
981
982 if(!optimize || prob_so_far <= target_total_prob)
983 return;
984
985 // Right. We have extra configurations and we have been asked to produce an optimal p-set, so
986 // now we shall trim unneeded configurations, using an algorithm dubbed "quicktrim"
987 // - similar to the quickselect algorithm, except that we use the cumulative sum of elements
988 // left of pivot to decide whether to go left or right, instead of the positional index.
989 // We'll be sorting by the prob array, permuting the other ones in parallel.
990
991 int* conf_swapspace = nullptr;
992 constexpr_if(tgetConfs)
993 conf_swapspace = reinterpret_cast<int*>(malloc(this->allDimSizeofInt));
994
995 size_t start = last_switch;
996 size_t end = this->_confs_no;
997 double sum_to_start = prob_at_last_switch;
998
999 while(start < end)
1000 {
1001 // Partition part
1002 size_t len = end - start;
1003#if ISOSPEC_BUILDING_R
1004 size_t pivot = len/2 + start;
1005#else
1006 size_t pivot = random_gen() % len + start; // Using Mersenne twister directly - we don't
1007 // need a very uniform distribution just for pivot
1008 // selection
1009#endif
1010 double pprob = this->_probs[pivot];
1011 swap<tgetConfs>(pivot, end-1, conf_swapspace);
1012
1013 double new_csum = sum_to_start;
1014
1015 size_t loweridx = start;
1016 for(size_t ii = start; ii < end-1; ii++)
1017 if(this->_probs[ii] > pprob)
1018 {
1019 swap<tgetConfs>(ii, loweridx, conf_swapspace);
1020 new_csum += this->_probs[loweridx];
1021 loweridx++;
1022 }
1023
1024 swap<tgetConfs>(end-1, loweridx, conf_swapspace);
1025
1026 // Selection part
1027 if(new_csum < target_total_prob)
1028 {
1029 start = loweridx + 1;
1030 sum_to_start = new_csum + this->_probs[loweridx];
1031 }
1032 else
1033 end = loweridx;
1034 }
1035
1036 constexpr_if(tgetConfs)
1037 free(conf_swapspace);
1038
1039 if(end <= current_size/2)
1040 // Overhead in memory of 2x or more, shrink to fit
1041 this->template reallocate_memory<tgetConfs>(end);
1042
1043 this->_confs_no = end;
1044}
1045
1046template void FixedEnvelope::total_prob_init<true>(Iso&& iso, double target_total_prob, bool optimize);
1047template void FixedEnvelope::total_prob_init<false>(Iso&& iso, double target_total_prob, bool optimize);
1048
1049template<bool tgetConfs> void FixedEnvelope::stochastic_init(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias)
1050{
1051 IsoStochasticGenerator generator(std::move(iso), _no_molecules, _precision, _beta_bias);
1052
1053 this->allDim = generator.getAllDim();
1054 this->allDimSizeofInt = this->allDim * sizeof(int);
1055
1056 this->reallocate_memory<tgetConfs>(ISOSPEC_INIT_TABLE_SIZE);
1057
1058 while(generator.advanceToNextConfiguration())
1059 addConfILG<tgetConfs, IsoStochasticGenerator>(generator);
1060}
1061
1062template void FixedEnvelope::stochastic_init<true>(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias);
1063template void FixedEnvelope::stochastic_init<false>(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias);
1064
1065double FixedEnvelope::empiric_average_mass()
1066{
1067 double ret = 0.0;
1068 for(size_t ii = 0; ii < _confs_no; ii++)
1069 {
1070 ret += _masses[ii] * _probs[ii];
1071 }
1072 return ret / get_total_prob();
1073}
1074
1075double FixedEnvelope::empiric_variance()
1076{
1077 double ret = 0.0;
1078 double avg = empiric_average_mass();
1079 for(size_t ii = 0; ii < _confs_no; ii++)
1080 {
1081 double msq = _masses[ii] - avg;
1082 ret += msq * msq * _probs[ii];
1083 }
1084
1085 return ret / get_total_prob();
1086}
1087
1088FixedEnvelope FixedEnvelope::Binned(Iso&& iso, double target_total_prob, double bin_width, double bin_middle)
1089{
1090 FixedEnvelope ret;
1091
1092 double min_mass = iso.getLightestPeakMass();
1093 double range_len = iso.getHeaviestPeakMass() - min_mass;
1094 size_t no_bins = static_cast<size_t>(range_len / bin_width) + 2;
1095 double half_width = 0.5*bin_width;
1096 double hwmm = half_width-bin_middle;
1097 size_t idx_min = floor((min_mass + hwmm) / bin_width);
1098 size_t idx_max = idx_min + no_bins;
1099
1100 double* acc;
1101# if ISOSPEC_GOT_MMAN
1102 acc = reinterpret_cast<double*>(mmap(nullptr, sizeof(double)*no_bins, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
1103#else
1104 // This will probably crash for large molecules and high resolutions...
1105 acc = reinterpret_cast<double*>(calloc(no_bins, sizeof(double)));
1106#endif
1107 if(acc == NULL)
1108 throw std::bad_alloc();
1109
1110 acc -= idx_min;
1111
1112 IsoLayeredGenerator ITG(std::move(iso));
1113
1114
1115 bool non_empty;
1116 while((non_empty = ITG.advanceToNextConfiguration()) && ITG.prob() == 0.0)
1117 {}
1118
1119 if(non_empty)
1120 {
1121 double accum_prob = ITG.prob();
1122 size_t nonzero_idx = floor((ITG.mass() + hwmm)/bin_width);
1123 acc[nonzero_idx] = accum_prob;
1124
1125 while(ITG.advanceToNextConfiguration() && accum_prob < target_total_prob)
1126 {
1127 double prob = ITG.prob();
1128 accum_prob += prob;
1129 size_t bin_idx = floor((ITG.mass() + hwmm)/bin_width);
1130 acc[bin_idx] += prob;
1131 }
1132
1133 // Making the assumption that there won't be gaps of more than 10 Da in the spectrum. This is true for all
1134 // molecules made of natural elements.
1135 size_t distance_10da = static_cast<size_t>(10.0/bin_width) + 1;
1136
1137 size_t empty_steps = 0;
1138
1139 ret.reallocate_memory<false>(ISOSPEC_INIT_TABLE_SIZE);
1140
1141 for(size_t ii = nonzero_idx; ii >= idx_min && empty_steps < distance_10da; ii--)
1142 {
1143 if(acc[ii] > 0.0)
1144 {
1145 empty_steps = 0;
1146 ret.store_conf(static_cast<double>(ii)*bin_width + bin_middle, acc[ii]);
1147 }
1148 else
1149 empty_steps++;
1150 }
1151
1152 empty_steps = 0;
1153 for(size_t ii = nonzero_idx+1; ii < idx_max && empty_steps < distance_10da; ii++)
1154 {
1155 if(acc[ii] > 0.0)
1156 {
1157 empty_steps = 0;
1158 ret.store_conf(static_cast<double>(ii)*bin_width + bin_middle, acc[ii]);
1159 }
1160 else
1161 empty_steps++;
1162 }
1163 }
1164
1165 acc += idx_min;
1166
1167# if ISOSPEC_GOT_MMAN
1168 munmap(acc, sizeof(double)*no_bins);
1169#else
1170 free(acc);
1171#endif
1172
1173 return ret;
1174}
1175
1176} // namespace IsoSpec