29#include "marginalTrek++.h"
34#include "element_tables.h"
58 for(
int i = 0; i < isotopeNo; ++i)
59 res[i] =
static_cast<int>( atomCnt * exp(lprobs[i]) ) + 1;
64 for(
int i = 0; i < isotopeNo; ++i) s += res[i];
66 int diff = atomCnt - s;
78 int coordDiff = res[i] - diff;
86 diff = abs(coordDiff);
94 double LP = unnormalized_logProb(res, lprobs, isotopeNo);
100 for(
int ii = 0; ii < isotopeNo; ii++)
101 for(
int jj = 0; jj < isotopeNo; jj++)
102 if(ii != jj && res[ii] > 0)
106 NLP = unnormalized_logProb(res, lprobs, isotopeNo);
107 if(NLP > LP || (NLP == LP && ii > jj))
129 for(
int ii = 0; ii < isoNo; ii++)
130 if(probs[ii] <= 0.0 || probs[ii] > 1.0)
131 throw std::invalid_argument(
"All isotope probabilities p must fulfill: 0.0 < p <= 1.0");
133 double* ret =
new double[isoNo];
136 for(
int i = 0; i < isoNo; i++)
138 ret[i] = log(probs[i]);
139 for(
int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
140 if(elem_table_probability[j] == probs[i])
142 ret[i] = elem_table_log_probability[j];
149double get_loggamma_nominator(
int x)
152 double ret = lgamma(x+1);
156int verify_atom_cnt(
int atomCnt)
158 #if !ISOSPEC_BUILDING_OPENMS
159 if(ISOSPEC_G_FACT_TABLE_SIZE-1 <= atomCnt)
160 throw std::length_error(
"Subisotopologue too large, size limit (that is, the maximum number of atoms of a single element in a molecule) is: " + std::to_string(ISOSPEC_G_FACT_TABLE_SIZE-1));
166 const double* _masses,
167 const double* _probs,
172isotopeNo(_isotopeNo),
173atomCnt(verify_atom_cnt(_atomCnt)),
175atom_masses(array_copy<double>(_masses, _isotopeNo)),
176loggamma_nominator(get_loggamma_nominator(_atomCnt)),
183isotopeNo(other.isotopeNo),
184atomCnt(other.atomCnt),
185atom_lProbs(array_copy<double>(other.atom_lProbs, isotopeNo)),
186atom_masses(array_copy<double>(other.atom_masses, isotopeNo)),
187loggamma_nominator(other.loggamma_nominator)
204disowned(other.disowned),
205isotopeNo(other.isotopeNo),
206atomCnt(other.atomCnt),
207atom_lProbs(other.atom_lProbs),
208atom_masses(other.atom_masses),
209loggamma_nominator(other.loggamma_nominator)
211 other.disowned =
true;
212 if(other.mode_conf ==
nullptr)
242void Marginal::setupMode()
244 ISOSPEC_IMPOSSIBLE(
mode_conf !=
nullptr);
252 double ret_mass = std::numeric_limits<double>::infinity();
253 for(
unsigned int ii = 0; ii <
isotopeNo; ii++)
261 double ret_mass = 0.0;
262 for(
unsigned int ii = 0; ii <
isotopeNo; ii++)
270 double found_prob = -std::numeric_limits<double>::infinity();
271 double found_mass = 0.0;
272 for(
unsigned int ii = 0; ii <
isotopeNo; ii++)
284 for(
unsigned int ii = 0; ii <
isotopeNo; ii++)
304 return -std::numeric_limits<double>::infinity();
306 const double i =
static_cast<double>(
isotopeNo);
307 const double k = i - 1.0;
308 const double n =
static_cast<double>(
atomCnt);
310 double sum_lprobs = 0.0;
311 for(
int jj = 0; jj < i; jj++)
314 double log_V_simplex = k * log(n) - lgamma(i);
315 double log_N_simplex = lgamma(n+i) - lgamma(n+1.0) - lgamma(i);
316 double log_V_ellipsoid = (k * (log(n) + logpi + logEllipsoidRadius) + sum_lprobs) * 0.5 - lgamma((i+1)*0.5);
318 return log_N_simplex + log_V_ellipsoid - log_V_simplex;
330orderMarginal(atom_lProbs, isotopeNo),
332allocator(isotopeNo, tabSize),
333min_lprob(*std::min_element(atom_lProbs, atom_lProbs+isotopeNo))
335 int* initialConf = allocator.makeCopy(
mode_conf);
341 fringe.resize_and_wipe(1);
344 initialized_until = 1;
350bool MarginalTrek::add_next_conf()
359 while(current_bucket < initialized_until && fringe[current_bucket].empty())
367 if(current_bucket >= initialized_until)
371 pq = std::priority_queue<ProbAndConfPtr, pod_vector<ProbAndConfPtr> >(std::less<ProbAndConfPtr>(),
pod_vector<ProbAndConfPtr>(std::move(fringe[current_bucket])));
374 double logprob = pq.top().first;
375 Conf topConf = pq.top().second;
380 _confs.push_back(topConf);
383 _conf_lprobs.push_back(logprob);
385 for(
unsigned int j = 0; j <
isotopeNo; ++j )
392 for(
unsigned int i = 0; i <
isotopeNo; ++i )
399 Conf acceptedCandidate = allocator.makeCopy(topConf);
401 ++acceptedCandidate[i];
402 --acceptedCandidate[j];
404 double new_prob = logProb(acceptedCandidate);
405 size_t bucket_nr = bucket_no(new_prob);
407 if(bucket_nr >= initialized_until)
410 initialized_until = bucket_nr+1;
411 fringe.resize_and_wipe(initialized_until);
414 ISOSPEC_IMPOSSIBLE(bucket_nr < current_bucket);
415 if(bucket_nr == current_bucket)
416 pq.push({new_prob, acceptedCandidate});
418 fringe[bucket_nr].push_back({new_prob, acceptedCandidate});
434MarginalTrek::~MarginalTrek()
436 const size_t fringe_size = fringe.size();
437 for(
size_t ii = 0; ii < fringe_size; ii++)
449allocator(isotopeNo, tabSize)
451 Conf currentConf = allocator.makeCopy(
mode_conf);
452 if(logProb(currentConf) >= lCutOff)
454 configurations.push_back(currentConf);
458 unsigned int idx = 0;
460 std::unique_ptr<double[]> prob_partials(
new double[
isotopeNo]);
461 std::unique_ptr<double[]> prob_part_acc(
new double[
isotopeNo+1]);
464 while(idx < configurations.size())
466 currentConf = configurations[idx];
470 prob_partials[ii] = minuslogFactorial(currentConf[ii]) + currentConf[ii] *
atom_lProbs[ii];
472 for(
unsigned int ii = 0; ii <
isotopeNo; ii++ )
477 if(currentConf[ii] != 0)
479 double prev_partial_ii = prob_partials[ii];
481 prob_partials[ii] = minuslogFactorial(currentConf[ii]) + currentConf[ii] *
atom_lProbs[ii];
483 for(
unsigned int jj = 0; jj <
isotopeNo; jj++ )
485 prob_part_acc[jj+1] = prob_part_acc[jj] + prob_partials[jj];
492 double logp = prob_part_acc[jj] + minuslogFactorial(1+currentConf[jj]) + (1+currentConf[jj]) *
atom_lProbs[jj];
493 for(
size_t kk = jj+1; kk <
isotopeNo; kk++)
494 logp += prob_partials[kk];
498 auto tmp = allocator.makeCopy(currentConf);
500 configurations.push_back(tmp);
501 lProbs.push_back(logp);
505 prob_part_acc[jj+1] = prob_part_acc[jj] + prob_partials[jj];
511 prob_partials[ii] = prev_partial_ii;
519 no_confs = configurations.size();
520 confs = configurations.data();
522 if(sort && no_confs > 0)
524 std::unique_ptr<size_t[]> order_arr(get_inverse_order(lProbs.data(), no_confs));
525 impose_order(order_arr.get(), no_confs, lProbs.data(), confs);
528 probs =
new double[no_confs];
529 masses =
new double[no_confs];
532 for(
unsigned int ii = 0; ii < no_confs; ii++)
534 probs[ii] = exp(lProbs[ii]);
538 lProbs.push_back(-std::numeric_limits<double>::infinity());
544 if(masses !=
nullptr)
557:
Marginal(std::move(m)), current_threshold(1.0), allocator(isotopeNo, tabSize),
558equalizer(isotopeNo), keyHasher(isotopeNo)
561 lProbs.push_back(std::numeric_limits<double>::infinity());
563 lProbs.push_back(-std::numeric_limits<double>::infinity());
564 guarded_lProbs = lProbs.data()+1;
578 while(!fringe.empty())
580 Conf currentConf = fringe.back();
583 double opc = fringe_unn_lprobs.back();
585 fringe_unn_lprobs.pop_back();
586 if(opc < new_threshold)
588 new_fringe.push_back(currentConf);
589 new_fringe_unn_lprobs.push_back(opc);
594 configurations.push_back(currentConf);
596 for(
unsigned int ii = 0; ii <
isotopeNo; ii++ )
601 if(currentConf[ii] > 0)
604 for(
unsigned int jj = 0; jj <
isotopeNo; jj++ )
611 Conf nc = allocator.makeCopy(currentConf);
615 if(lpc >= new_threshold)
617 fringe.push_back(nc);
618 fringe_unn_lprobs.push_back(lpc);
622 new_fringe.push_back(nc);
623 new_fringe_unn_lprobs.push_back(lpc);
639 current_threshold = new_threshold;
640 fringe.swap(new_fringe);
641 fringe_unn_lprobs.swap(new_fringe_unn_lprobs);
645 size_t to_sort_size = configurations.size() - probs.size();
648 std::unique_ptr<size_t[]> order_arr(get_inverse_order(lProbs.data()+1+probs.size(), to_sort_size));
649 double* P = lProbs.data()+1+probs.size();
650 Conf* C = configurations.data()+probs.size();
651 size_t* O = order_arr.get();
652 impose_order(O, to_sort_size, P, C);
656 if(probs.capacity() * 2 < configurations.size() + 2)
659 probs.reserve(configurations.size());
660 masses.reserve(configurations.size());
665 for(
unsigned int ii = probs.size(); ii < configurations.size(); ii++)
667 probs.push_back(exp(lProbs[ii+1]));
671 lProbs.push_back(-std::numeric_limits<double>::infinity());
673 guarded_lProbs = lProbs.data()+1;
681 double ret = std::numeric_limits<double>::infinity();
691 double ret = -std::numeric_limits<double>::infinity();
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
double get_max_mass() const
Get the maximal mass in current layer.
double get_min_mass() const
Get the minimal mass in current layer.
LayeredMarginal(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
The marginal distribution class (a subisotopologue).
Conf computeModeConf() const
The the probability of the mode subisotopologue.
Marginal(const double *_masses, const double *_probs, int _isotopeNo, int _atomCnt)
Class constructor.
const unsigned int atomCnt
double getLightestConfMass() const
Get the mass of the lightest subisotopologue.
const unsigned int isotopeNo
const double *const atom_masses
double variance() const
Calculate the variance of the theoretical distribution describing the subisotopologue.
ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const
Calculate the log-probability of a given subisotopologue.
const double loggamma_nominator
double getHeaviestConfMass() const
Get the mass of the heaviest subisotopologue.
double getAtomAverageMass() const
The average mass of a single atom.
double getMonoisotopicConfMass() const
Get the mass of the monoisotopic subisotopologue.
virtual ~Marginal()
Destructor.
double getLogSizeEstimate(double logEllipsoidRadius) const
Return estimated logarithm of size of the marginal at a given ellipsoid radius.
const double *const atom_lProbs
MarginalTrek(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
virtual ~PrecalculatedMarginal()
Destructor.
PrecalculatedMarginal(Marginal &&m, double lCutOff, bool sort=true, int tabSize=1000, int hashSize=1000)
The move constructor (disowns the Marginal).
double * getMLogProbs(const double *probs, int isoNo)
void writeInitialConfiguration(const int atomCnt, const int isotopeNo, const double *lprobs, int *res)
Find one of the most probable subisotopologues.