ProteoWizard
SAXParserTest.cpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Darren Kessner <darren@proteowizard.org>
6//
7// Copyright 2007 Spielberg Family Center for Applied Proteomics
8// Cedars-Sinai Medical Center, Los Angeles, California 90048
9//
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13//
14// http://www.apache.org/licenses/LICENSE-2.0
15//
16// Unless required by applicable law or agreed to in writing, software
17// distributed under the License is distributed on an "AS IS" BASIS,
18// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19// See the License for the specific language governing permissions and
20// limitations under the License.
21//
22
23
25#include "SAXParser.hpp"
28#include <cstring>
29
30
31using namespace pwiz::util;
32using namespace pwiz::minimxml;
33using namespace pwiz::minimxml::SAXParser;
34
35
36ostream* os_;
37
38// note: this tests single-quoted double quotes
39const char* sampleXML =
40 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
41 "<!DOCTYPE foo>\n"
42 "<RootElement param=\"value\">\n"
43 " <FirstElement escaped_attribute=\"&quot;&lt;&amp;lt;&gt;&quot;\">\n"
44 " Some Text with Entity References: &lt;&amp;&gt;\n"
45 " </FirstElement>\n"
46 " <SecondElement param2=\"something\" param3=\"something.else 1234-56\">\n"
47 " Pre-Text <Inline>Inlined text with <![CDATA[<&\">]]></Inline> Post-text. <br/>\n"
48 " </SecondElement>\n"
49 " <prefix:ThirdElement goober:name=\"value\">\n"
50 " <!--this is a comment-->\n"
51 " <empty_with_space />\n"
52 " </prefix:ThirdElement>\n"
53 " <FifthElement leeloo='>Leeloo > mul-\"tipass'>\n"
54 " You're a monster, Zorg.>I know.\n"
55 " </FifthElement>\n"
56 "</RootElement>\n"
57 "<AnotherRoot>The quick brown fox jumps over the lazy dog.</AnotherRoot>\n";
58
59
60//
61// demo of event handling
62//
63
64
66{
67 PrintAttribute(ostream& os) : os_(os) {}
68 ostream& os_;
69
71 {
72 os_ << " (" << attr.getName() << "," << attr.getValue() << ")";
73 }
74};
75
76
78{
79 public:
80
81 PrintEventHandler(ostream& os)
82 : os_(os)
83 {}
84
85 virtual Status processingInstruction(const string& name,
86 const string& value,
87 stream_offset position)
88 {
89 os_ << "[0x" << hex << position << "] processingInstruction: (" << name << "," << value << ")\n";
90 return Status::Ok;
91 };
92
93 virtual Status startElement(const string& name,
94 const Attributes& attributes,
95 stream_offset position)
96 {
97 os_ << "[0x" << hex << position << "] startElement: " << name;
98 for_each(attributes.begin(), attributes.end(), PrintAttribute(os_));
99 os_ << endl;
100 return Status::Ok;
101 };
102
103 virtual Status endElement(const string& name, stream_offset position)
104 {
105 os_ << "[0x" << hex << position << "] endElement: " << name << endl;
106 return Status::Ok;
107 }
108
110 {
111 os_ << "[0x" << hex << position << "] text: " << text << endl;
112 return Status::Ok;
113 }
114
115 private:
116 ostream& os_;
117};
118
119
120void demo()
121{
122 if (os_)
123 {
124 *os_ << "sampleXML:\n" << sampleXML << endl;
125
126 istringstream is(sampleXML);
127 PrintEventHandler handler(*os_);
128
129 *os_ << "first parse events:\n";
130 parse(is, handler);
131 *os_ << endl;
132
133 *os_ << "second parse events:\n";
134 parse(is, handler);
135 *os_ << endl;
136 }
137}
138
139
140//
141// C++ model of the sample XML
142//
143
144
145struct First
146{
148 string text;
149};
150
151
152struct Second
153{
154 string param2;
155 string param3;
156 vector<string> text;
157};
158
159
160struct Fifth
161{
162 string leeloo;
163 string mr_zorg;
164};
165
166
174
175
176//
177//
178// Handlers to connect XML to C++ model
179//
180
181
182void readAttribute(const Handler::Attributes& attributes,
183 const string& attributeName,
184 string& result)
185{
186 Handler::Attributes::attribute_list::const_iterator it = attributes.find(attributeName);
187 if (it != attributes.end())
188 result = it->getValue();
189}
190
191
192class FirstHandler : public Handler
193{
194 public:
195
197 : object_(first)
198 {
199 parseCharacters = true;
200 this->autoUnescapeAttributes = autoUnescapeAttributes;
201 this->autoUnescapeCharacters = autoUnescapeCharacters;
202 }
203
204 virtual Status startElement(const string& name,
205 const Handler::Attributes& attributes,
206 stream_offset position)
207 {
208 if (name == "FirstElement")
209 readAttribute(attributes, "escaped_attribute", object_.escaped_attribute);
210 return Status::Ok;
211 }
212
214 {
215 unit_assert_operator_equal(158, position);
216 object_.text = text.c_str();
217 return Status::Ok;
218 }
219
220 virtual Status endElement(const string& name, stream_offset position)
221 {
222 unit_assert_operator_equal(210, position);
223 return Status::Ok;
224 }
225
226 private:
228};
229
230
231class SecondHandler : public Handler
232{
233 public:
234
236 : object_(object)
237 {
238 parseCharacters = true;
239 this->autoUnescapeAttributes = autoUnescapeAttributes;
240 this->autoUnescapeCharacters = autoUnescapeCharacters;
241 }
242
243 virtual Status startElement(const string& name,
244 const Handler::Attributes& attributes,
245 stream_offset position)
246 {
247 if (name == "SecondElement")
248 {
249 readAttribute(attributes, "param2", object_.param2);
250 readAttribute(attributes, "param3", object_.param3);
251 // long as we're here, verify copyability of Handler::Attributes
252 Handler::Attributes *copy1 = new Handler::Attributes(attributes);
253 Handler::Attributes copy2(*copy1);
254 delete copy1;
255 std::string str;
256 readAttribute(copy2, "param2", str);
258 }
259
260 return Status::Ok;
261 }
262
264 {
265 object_.text.push_back(text.c_str());
266 return Status::Ok;
267 }
268
269 private:
271};
272
273
274class FifthHandler : public Handler
275{
276 public:
277
279 : object_(object)
280 {
281 parseCharacters = true;
282 this->autoUnescapeAttributes = autoUnescapeAttributes;
283 this->autoUnescapeCharacters = autoUnescapeCharacters;
284 }
285
286 virtual Status startElement(const string& name,
287 const Handler::Attributes& attributes,
288 stream_offset position)
289 {
290 if (name == "FifthElement")
291 {
292 getAttribute(attributes, "leeloo", object_.leeloo);
293 }
294
295 return Status::Ok;
296 }
297
299 {
300 object_.mr_zorg = text.c_str();
301 return Status::Ok;
302 }
303
304 virtual Status endElement(const string& name, stream_offset position)
305 {
306 unit_assert_operator_equal(625, position);
307 return Status::Ok;
308 }
309
310 private:
312};
313
314
315class RootHandler : public Handler
316{
317 public:
318
329
330 virtual Status startElement(const string& name,
331 const Attributes& attributes,
332 stream_offset position)
333 {
334 if (name == "RootElement")
335 {
336 readAttribute(attributes, "param", object_.param);
337 unit_assert_operator_equal(54, position);
338 }
339 else if (name == "FirstElement")
340 {
341 // delegate handling to a FirstHandler
342 unit_assert_operator_equal(86, position);
343 return Status(Status::Delegate, &firstHandler_);
344 }
345 else if (name == "SecondElement")
346 {
347 // delegate handling to a SecondHandler
348 return Status(Status::Delegate, &secondHandler_);
349 }
350 else if (name == "FifthElement")
351 {
352 // delegate handling to a FifthHandler
353 return Status(Status::Delegate, &fifthHandler_);
354 }
355
356 return Status::Ok;
357 }
358
359 private:
364};
365
366
367void test()
368{
369 if (os_) *os_ << "test()\n";
370
371 istringstream is(sampleXML);
372 Root root;
373 RootHandler rootHandler(root);
374 parse(is, rootHandler);
375
376 if (os_)
377 {
378 *os_ << "root.param: " << root.param << endl
379 << "first.escaped_attribute: " << root.first.escaped_attribute << endl
380 << "first.text: " << root.first.text << endl
381 << "second.param2: " << root.second.param2 << endl
382 << "second.param3: " << root.second.param3 << endl
383 << "second.text: ";
384 copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
385 *os_ << "\nfifth.leeloo: " << root.fifth.leeloo << endl
386 << "fifth.mr_zorg: " << root.fifth.mr_zorg << endl
387 << "\n";
388 }
389
390 unit_assert_operator_equal("value", root.param);
392 unit_assert_operator_equal("Some Text with Entity References: <&>", root.first.text);
393 unit_assert_operator_equal("something", root.second.param2);
394 unit_assert_operator_equal("something.else 1234-56", root.second.param3);
395 unit_assert_operator_equal(4, root.second.text.size());
396 unit_assert_operator_equal("Pre-Text", root.second.text[0]);
397 unit_assert_operator_equal("Inlined text with", root.second.text[1]);
398 unit_assert_operator_equal("<&\">", root.second.text[2]);
399 unit_assert_operator_equal("Post-text.", root.second.text[3]);
400 unit_assert_operator_equal(">Leeloo > mul-\"tipass", root.fifth.leeloo);
401 unit_assert_operator_equal("You're a monster, Zorg.>I know.", root.fifth.mr_zorg);
402}
403
404
406{
407 if (os_) *os_ << "testNoAutoUnescape()\n";
408
409 istringstream is(sampleXML);
410 Root root;
411 RootHandler rootHandler(root, false, false);
412 parse(is, rootHandler);
413
414 if (os_)
415 {
416 *os_ << "root.param: " << root.param << endl
417 << "first.escaped_attribute: " << root.first.escaped_attribute << endl
418 << "first.text: " << root.first.text << endl
419 << "second.param2: " << root.second.param2 << endl
420 << "second.param3: " << root.second.param3 << endl
421 << "second.text: ";
422 copy(root.second.text.begin(), root.second.text.end(), ostream_iterator<string>(*os_,"|"));
423 *os_ << "\n\n";
424 }
425
426 unit_assert_operator_equal("value", root.param);
427 unit_assert_operator_equal("&quot;&lt;&amp;lt;&gt;&quot;", root.first.escaped_attribute);
428 unit_assert_operator_equal("Some Text with Entity References: &lt;&amp;&gt;", root.first.text);
429 unit_assert_operator_equal("something", root.second.param2);
430 unit_assert_operator_equal("something.else 1234-56", root.second.param3);
431 unit_assert_operator_equal(4, root.second.text.size());
432 unit_assert_operator_equal("Pre-Text", root.second.text[0]);
433 unit_assert_operator_equal("Inlined text with", root.second.text[1]);
434 unit_assert_operator_equal("<&\">", root.second.text[2]);
435 unit_assert_operator_equal("Post-text.", root.second.text[3]);
436}
437
438
440{
441 public:
442
443 virtual Status startElement(const string& name,
444 const Attributes& attributes,
445 stream_offset position)
446 {
447 if (name == "AnotherRoot")
448 {
449 unit_assert_operator_equal(656, position);
450 return Status::Done;
451 }
452
453 return Status::Ok;
454 }
455};
456
457
459{
460 if (os_) *os_ << "testDone()\n";
461
462 istringstream is(sampleXML);
463 AnotherRootHandler handler;
464 parse(is, handler); // parses <RootElement> ... </RootElement>
465 parse(is, handler); // parses <AnotherRootElement> and aborts
466
467 string buffer;
468 getline(is, buffer, '<');
469
470 if (os_) *os_ << "buffer: " << buffer << "\n\n";
471 unit_assert_operator_equal("The quick brown fox jumps over the lazy dog.", buffer);
472}
473
474
476{
477 if (os_) *os_ << "testBadXML()\n";
478
479 const char* bad = "<A><B></A></B>";
480 istringstream is(bad);
481 Handler handler;
482
483 try
484 {
485 parse(is, handler);
486 }
487 catch (exception& e)
488 {
489 if (os_) *os_ << e.what() << "\nOK: Parser caught bad XML.\n\n";
490 return;
491 }
492
493 throw runtime_error("Parser failed to catch bad XML.");
494}
495
496
498{
499 int count;
501
502 virtual Status endElement(const string& name, stream_offset position)
503 {
504 count++;
505 return Status::Ok;
506 }
507};
508
509
511{
512 if (os_) *os_ << "testNested()\n";
513 const char* nested = "<a><a></a></a>";
514 istringstream is(nested);
515
516 NestedHandler nestedHandler;
517 parse(is, nestedHandler);
518 if (os_) *os_ << "count: " << nestedHandler.count << "\n\n";
519 unit_assert_operator_equal(2, nestedHandler.count);
520}
521
522
524{
525 if (os_) *os_ << "testRootElement()\n";
526
527 string RootElement = "RootElement";
529
530 istringstream sampleXMLStream(sampleXML);
531 unit_assert_operator_equal(RootElement, xml_root_element(sampleXMLStream));
532
533 {ofstream sampleXMLFile("testRootElement.xml"); sampleXMLFile << sampleXML;}
534 unit_assert_operator_equal(RootElement, xml_root_element_from_file("testRootElement.xml"));
535 bfs::remove("testRootElement.xml");
536
537 unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement>"));
538 unit_assert_operator_equal(RootElement, xml_root_element("<?xml?><RootElement name='value'"));
539
540 unit_assert_throws(xml_root_element("not-xml"), runtime_error);
541}
542
543
545{
546 string id1("_x0031_invalid_x0020_ID");
548 unit_assert_operator_equal((void *)&id1, (void *)&decode_xml_id(id1)); // should return reference to id1
549 unit_assert_operator_equal("1invalid ID", id1);
550
551 string id2("_invalid-ID__x0023_2__x003c_3_x003e_");
552 unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id_copy(id2));
553 unit_assert_operator_equal("_invalid-ID_#2_<3>", decode_xml_id(id2));
554
555 string crazyId("_x0021__x0021__x0021_");
557}
558
560{
561 std::string str = " \t foo \n";
562 saxstring xstr = str;
565 unit_assert_operator_equal(str.length(),xstr.length());
566 xstr.trim_lead_ws();
567 unit_assert_operator_equal(xstr.length(),str.length()-3);
568 unit_assert_operator_equal(xstr,str.substr(3));
569 xstr.trim_trail_ws();
570 unit_assert_operator_equal(xstr.length(),str.length()-5);
571 unit_assert_operator_equal(xstr,str.substr(3,3));
572 unit_assert_operator_equal(xstr[1],'o');
573 xstr[1] = '0';
574 unit_assert_operator_equal(xstr[1],'0');
575 std::string str2(xstr.data());
576 unit_assert_operator_equal(str2,"f0o");
577 std::string str3(xstr.c_str());
579 saxstring xstr2(xstr);
580 unit_assert_operator_equal(xstr2,xstr);
581 saxstring xstr3;
582 unit_assert_operator_equal(xstr3.c_str(),std::string());
583}
584
585int main(int argc, char* argv[])
586{
587 TEST_PROLOG(argc, argv)
588
589 try
590 {
591 if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
592 demo();
594 test();
596 testDone();
597 testBadXML();
598 testNested();
600 testDecoding();
601 }
602 catch (exception& e)
603 {
604 TEST_FAILED(e.what())
605 }
606 catch (...)
607 {
608 TEST_FAILED("Caught unknown exception.")
609 }
610
612}
613
void testRootElement()
int main(int argc, char *argv[])
void testSaxParserString()
void testBadXML()
const char * sampleXML
void testDecoding()
void testDone()
void testNested()
void testNoAutoUnescape()
void demo()
ostream * os_
void readAttribute(const Handler::Attributes &attributes, const string &attributeName, string &result)
void test()
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
virtual Status endElement(const string &name, stream_offset position)
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
FifthHandler(Fifth &object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
FirstHandler(First &first, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
virtual Status endElement(const string &name, stream_offset position)
virtual Status endElement(const string &name, stream_offset position)
PrintEventHandler(ostream &os)
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
virtual Status processingInstruction(const string &name, const string &value, stream_offset position)
FifthHandler fifthHandler_
virtual Status startElement(const string &name, const Attributes &attributes, stream_offset position)
FirstHandler firstHandler_
RootHandler(Root &root, bool autoUnescapeAttributes=true, bool autoUnescapeCharacters=true)
SecondHandler secondHandler_
virtual Status startElement(const string &name, const Handler::Attributes &attributes, stream_offset position)
SecondHandler(Second &object, bool autoUnescapeAttributes, bool autoUnescapeCharacters)
virtual Status characters(const SAXParser::saxstring &text, stream_offset position)
std::string getValue(XMLUnescapeBehavior_t Unescape=XMLUnescapeDefault) const
attribute_list::const_iterator begin() const
attribute_list::const_iterator end() const
attribute_list::const_iterator find(const std::string &name) const
SAX event handler interface.
bool parseCharacters
When false, no calls to characters() will be made.
bool autoUnescapeAttributes
Setting these to false will disable the auto-unescaping feature of the parser; this is useful for han...
T & getAttribute(const Attributes &attributes, const char *name, T &result, XMLUnescapeBehavior_t Unescape, T defaultValue=T()) const
boost::iostreams::stream_offset stream_offset
An extended SAX interface for custom XML stream parsing.
Definition SAXParser.hpp:54
PWIZ_API_DECL void parse(std::istream &is, Handler &handler)
Extract a single XML element from the istream, sending SAX events to the handler.
PWIZ_API_DECL std::string decode_xml_id_copy(const std::string &str)
Decodes any characters encoded with their hexadecimal value, e.g.
PWIZ_API_DECL std::string xml_root_element(const std::string &fileheader)
Returns the root element from an XML buffer; throws runtime_error if no element is found.
PWIZ_API_DECL std::string xml_root_element_from_file(const std::string &filepath)
Returns the root element from an XML file; throws runtime_error if no element is found.
PWIZ_API_DECL std::string & decode_xml_id(std::string &str)
Decodes any characters encoded with their hexadecimal value, e.g.
string mr_zorg
string leeloo
string text
string escaped_attribute
virtual Status endElement(const string &name, stream_offset position)
void operator()(const Handler::Attributes::attribute &attr)
PrintAttribute(ostream &os)
First first
Second second
Fifth fifth
string param
string param3
vector< string > text
string param2
Handler returns the Status struct as a means of changing the parser's behavior.
#define unit_assert(x)
Definition unit.hpp:85
#define unit_assert_throws(x, exception)
Definition unit.hpp:106
#define TEST_EPILOG
Definition unit.hpp:183
#define TEST_FAILED(x)
Definition unit.hpp:177
#define unit_assert_operator_equal(expected, actual)
Definition unit.hpp:92
#define TEST_PROLOG(argc, argv)
Definition unit.hpp:175