ProteoWizard
Digestion.hpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Matt Chambers <matt.chambers .@. vanderbilt.edu>
6//
7// Copyright 2006 Louis Warschaw Prostate Cancer Center
8// Cedars Sinai Medical Center, Los Angeles, California 90048
9// Copyright 2008 Vanderbilt University - Nashville, TN 37232
10//
11// Licensed under the Apache License, Version 2.0 (the "License");
12// you may not use this file except in compliance with the License.
13// You may obtain a copy of the License at
14//
15// http://www.apache.org/licenses/LICENSE-2.0
16//
17// Unless required by applicable law or agreed to in writing, software
18// distributed under the License is distributed on an "AS IS" BASIS,
19// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20// See the License for the specific language governing permissions and
21// limitations under the License.
22//
23
24
25#ifndef _DIGESTION_HPP_
26#define _DIGESTION_HPP_
27
28
32#include "Peptide.hpp"
33#include "boost/shared_ptr.hpp"
34#include <string>
35#include <limits>
36#include <set>
37
38
39namespace pwiz {
40namespace proteome {
41
42
43using namespace pwiz::cv;
44
45
46/// peptide subclass that contains extra metadata provided by digestion
48{
49 public:
50
51 DigestedPeptide(const std::string& sequence);
52 DigestedPeptide(const char* sequence);
53
54 DigestedPeptide(std::string::const_iterator begin,
55 std::string::const_iterator end,
56 size_t offset,
57 size_t missedCleavages,
58 bool NTerminusIsSpecific,
59 bool CTerminusIsSpecific,
60 std::string NTerminusPrefix = "",
61 std::string CTerminusSuffix = "");
62
63 DigestedPeptide(const Peptide& peptide,
64 size_t offset,
65 size_t missedCleavages,
66 bool NTerminusIsSpecific,
67 bool CTerminusIsSpecific,
68 std::string NTerminusPrefix = "",
69 std::string CTerminusSuffix = "");
70
74
75 /// returns the zero-based offset of the N terminus of the peptide
76 /// in the polypeptide from which it was digested
77 size_t offset() const;
78
79 /// returns the number of missed cleavage sites in the peptide
80 size_t missedCleavages() const;
81
82 /// returns the number of termini that matched to the digestion rules
83 size_t specificTermini() const;
84
85 /// returns true iff the N terminus matched the digestion rules
86 bool NTerminusIsSpecific() const;
87
88 /// returns true iff the C terminus matched the digestion rules
89 bool CTerminusIsSpecific() const;
90
91 /// returns residue preceding digestion site
92 std::string NTerminusPrefix() const;
93
94 /// returns residue following digestion site
95 std::string CTerminusSuffix() const;
96
97 /// returns true iff peptide sequences, masses, and all digestion metadata are equal
98 bool operator==(const DigestedPeptide& rhs) const;
99
100 private:
101 size_t offset_;
105 std::string NTerminusPrefix_;
106 std::string CTerminusSuffix_;
107};
108
109
110/// enumerates the peptides from proteolytic digestion of a polypeptide or protein;
112{
113 public:
114
115 /// sets the number of peptide termini that must match to a digestion motif
116 /// note: castable to int; i.e. non=0, semi=1, fully=2
117 enum PWIZ_API_DECL Specificity
118 {
119 NonSpecific = 0, /// neither termini must match digestion motif(s)
120 SemiSpecific = 1, /// either or both termini must match digestion motif(s)
121 FullySpecific = 2 /// both termini must match digestion motif(s)
122 };
123
124 /// sets constraints for valid peptides produced by iterating the digestion
126 {
128
129 //double minimumMass;
130 //double maximumMass;
131
134
136
138
139 Config(int maximumMissedCleavages = 100000,
140 //double minimumMass = 0,
141 //double maximumMass = 100000,
142 int minimumLength = 0,
143 int maximumLength = 100000,
144 Specificity minimumSpecificity = FullySpecific,
145 bool clipNTerminalMethionine = true);
146 };
147
148 /// returns the set of predefined cleavage agents defined in the PSI-MS CV
149 static const std::set<CVID>& getCleavageAgents();
150
151 /// returns the names of the set of predefined cleavage agents defined in the PSI-MS CV
152 static const std::vector<std::string>& getCleavageAgentNames();
153
154 /// returns the cvid of the specified cleavage agent using a case-insensitive search,
155 /// or CVID_Unknown if the agent is not found
156 static CVID getCleavageAgentByName(const std::string& agentName);
157
158 /// returns the cvid of the specified cleavage agent looking it up by the Perl regular expression,
159 /// or CVID_Unknown if the agent is not found (the regex pattern must match exactly)
160 static CVID getCleavageAgentByRegex(const std::string& agentRegex);
161
162 /// returns the official PSI Perl regular expression defining the places in a
163 /// polypeptide or protein that the agent will cut.
164 static const std::string& getCleavageAgentRegex(CVID agentCvid);
165
166 /// returns a modified version of a cleavage agent regex where any ambiguous AA symbols (BJXZ)
167 /// are augmented with their unambiguous counterparts (e.g. B -> [BND])
168 static std::string disambiguateCleavageAgentRegex(const std::string& cleavageAgentRegex);
169
170 /// specifies digestion occurs by a commonly used cleavage agent
171 Digestion(const Peptide& polypeptide,
172 CVID cleavageAgent,
173 const Config& config = Config());
174
175 /// specifies digestion occurs by a combination of commonly used cleavage agents
176 Digestion(const Peptide& polypeptide,
177 const std::vector<CVID>& cleavageAgents,
178 const Config& config = Config());
179
180 /// specifies digestion occurs by a user-specified, zero-width Perl regular expression
181 /// example: "(?<=K)" means "cleaves after K"
182 /// example: "((?<=D))|((?=D))" means "cleaves before or after D"
183 /// example: "(?=[DE])" means "cleaves before D or E"
184 /// example: "(?<=[FYWLKR])(?!P)" means "cleaves after any single residue from FYWLKR except when it is followed by P"
185 Digestion(const Peptide& polypeptide,
186 const std::string& cleavageAgentRegex,
187 const Config& config = Config());
188
189 /// specifies digestion occurs by a combination of user-specified, zero-width Perl regular expressions
190 /// example: "(?<=K)" means "cleaves after K"
191 /// example: "((?<=D))|((?=D))" means "cleaves before or after D"
192 /// example: "(?=[DE])" means "cleaves before D or E"
193 /// example: "(?<=[FYWLKR])(?!P)" means "cleaves after any single residue from FYWLKR except when it is followed by P"
194 Digestion(const Peptide& polypeptide,
195 const std::vector<std::string>& cleavageAgentRegexes,
196 const Config& config = Config());
197
198 /// returns all instances of the given peptide in the polypeptide under digestion;
199 /// note: the filters set in Digestion::Config are respected!
200 std::vector<DigestedPeptide> find_all(const Peptide& peptide) const;
201
202 /// returns the first instance of the given peptide in the polypeptide under digestion;
203 /// if offsetHint is provided, the search will begin at that offset;
204 /// throws runtime_error if no instance of the peptide is found;
205 /// note: the filters set in Digestion::Config are respected!
206 DigestedPeptide find_first(const Peptide& peptide, size_t offsetHint = 0) const;
207
208
210
211
212 private:
213 class Impl; // forward-declared for const_iterator
214
215 public:
216
217 /// provides forward-only, read-only iteration to enumerate peptides
219 {
220 public:
223
228 bool operator!=(const const_iterator& that) const;
229 bool operator==(const const_iterator& that) const;
230
231 typedef std::forward_iterator_tag iterator_category;
233 typedef size_t difference_type;
236
237 private:
239 const_iterator(const Digestion& digestion);
240
241 friend class Digestion;
242 friend class Digestion::Impl;
243
244 class Impl;
245 boost::shared_ptr<Impl> impl_;
246 };
247
249
252
253 private:
254 friend class const_iterator;
255 friend class const_iterator::Impl;
256 boost::shared_ptr<Impl> impl_;
257};
258
259
260} // namespace proteome
261} // namespace pwiz
262
263
264#endif // _DIGESTION_HPP_
NonSpecific
SemiSpecific
neither termini must match digestion motif(s)
#define PWIZ_API_DECL
Definition Export.hpp:32
peptide subclass that contains extra metadata provided by digestion
Definition Digestion.hpp:48
DigestedPeptide(const std::string &sequence)
DigestedPeptide & operator=(const DigestedPeptide &)
bool operator==(const DigestedPeptide &rhs) const
returns true iff peptide sequences, masses, and all digestion metadata are equal
bool CTerminusIsSpecific() const
returns true iff the C terminus matched the digestion rules
size_t offset() const
returns the zero-based offset of the N terminus of the peptide in the polypeptide from which it was d...
bool NTerminusIsSpecific() const
returns true iff the N terminus matched the digestion rules
DigestedPeptide(const char *sequence)
std::string CTerminusSuffix() const
returns residue following digestion site
DigestedPeptide(const Peptide &peptide, size_t offset, size_t missedCleavages, bool NTerminusIsSpecific, bool CTerminusIsSpecific, std::string NTerminusPrefix="", std::string CTerminusSuffix="")
size_t missedCleavages() const
returns the number of missed cleavage sites in the peptide
DigestedPeptide(std::string::const_iterator begin, std::string::const_iterator end, size_t offset, size_t missedCleavages, bool NTerminusIsSpecific, bool CTerminusIsSpecific, std::string NTerminusPrefix="", std::string CTerminusSuffix="")
DigestedPeptide(const DigestedPeptide &)
std::string NTerminusPrefix() const
returns residue preceding digestion site
size_t specificTermini() const
returns the number of termini that matched to the digestion rules
provides forward-only, read-only iteration to enumerate peptides
const_iterator(const const_iterator &rhs)
bool operator!=(const const_iterator &that) const
const DigestedPeptide & operator*() const
const DigestedPeptide * operator->() const
const_iterator(const Digestion &digestion)
std::forward_iterator_tag iterator_category
bool operator==(const const_iterator &that) const
enumerates the peptides from proteolytic digestion of a polypeptide or protein;
static CVID getCleavageAgentByRegex(const std::string &agentRegex)
returns the cvid of the specified cleavage agent looking it up by the Perl regular expression,...
Digestion(const Peptide &polypeptide, CVID cleavageAgent, const Config &config=Config())
specifies digestion occurs by a commonly used cleavage agent
static CVID getCleavageAgentByName(const std::string &agentName)
returns the cvid of the specified cleavage agent using a case-insensitive search, or CVID_Unknown if ...
Digestion(const Peptide &polypeptide, const std::vector< std::string > &cleavageAgentRegexes, const Config &config=Config())
specifies digestion occurs by a combination of user-specified, zero-width Perl regular expressions ex...
std::vector< DigestedPeptide > find_all(const Peptide &peptide) const
returns all instances of the given peptide in the polypeptide under digestion; note: the filters set ...
boost::shared_ptr< Impl > impl_
const_iterator begin() const
DigestedPeptide find_first(const Peptide &peptide, size_t offsetHint=0) const
returns the first instance of the given peptide in the polypeptide under digestion; if offsetHint is ...
static const std::set< CVID > & getCleavageAgents()
returns the set of predefined cleavage agents defined in the PSI-MS CV
static const std::vector< std::string > & getCleavageAgentNames()
returns the names of the set of predefined cleavage agents defined in the PSI-MS CV
Digestion(const Peptide &polypeptide, const std::vector< CVID > &cleavageAgents, const Config &config=Config())
specifies digestion occurs by a combination of commonly used cleavage agents
const_iterator end() const
static std::string disambiguateCleavageAgentRegex(const std::string &cleavageAgentRegex)
returns a modified version of a cleavage agent regex where any ambiguous AA symbols (BJXZ) are augmen...
Digestion(const Peptide &polypeptide, const std::string &cleavageAgentRegex, const Config &config=Config())
specifies digestion occurs by a user-specified, zero-width Perl regular expression example: "(?...
static const std::string & getCleavageAgentRegex(CVID agentCvid)
returns the official PSI Perl regular expression defining the places in a polypeptide or protein that...
represents a peptide or polypeptide (a sequence of amino acids)
Definition Peptide.hpp:62
sets constraints for valid peptides produced by iterating the digestion
Config(int maximumMissedCleavages=100000, int minimumLength=0, int maximumLength=100000, Specificity minimumSpecificity=FullySpecific, bool clipNTerminalMethionine=true)