libxcks  0.1.0.1
xmlparser.cpp
Go to the documentation of this file.
1 /*
2  * libxcks
3  * Copyright (C) 2022 Julien Couot
4  *
5  * This program is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or (at your
8  * option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13  * License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this program. If not, see <https://www.gnu.org/licenses/>.
17  */
18 
24 //---------------------------------------------------------------------------
25 #include <istream>
26 #include <fstream>
27 #include <string>
28 #include <cstring>
29 #include <cassert>
30 
31 #include <expat.h>
32 #include <utf8cpp/utf8.h>
33 #if __has_include(<utf8cpp/cpp17.h>)
34  #include <utf8cpp/cpp17.h>
35  #define LIBXCKS_UTF8CPP_HAS_STRING_VIEW
36 #endif
37 
38 #include "xmlparser.hpp"
39 //---------------------------------------------------------------------------
40 
41 
43 using namespace std;
44 //---------------------------------------------------------------------------
45 
46 
47 namespace libxcks
48 {
49 //###########################################################################
50 // An exception class for the XML parser.
51 //###########################################################################
52 
56 XMLParserException::XMLParserException() noexcept
57 {
58 }
59 //---------------------------------------------------------------------------
60 
61 
67 XMLParserException::XMLParserException(const XMLParserException& source) noexcept
68 {
69  if (this != &source)
70  {
71  this->message = source.message;
72  }
73 }
74 //---------------------------------------------------------------------------
75 
76 
83 XMLParserException& XMLParserException::operator=(const XMLParserException& source) noexcept
84 {
85  if (this != &source)
86  {
87  this->message = source.message;
88  }
89  return *this;
90 }
91 //---------------------------------------------------------------------------
92 
93 
99 XMLParserException::XMLParserException(const std::string& msg) noexcept
100 {
101  setMessage(msg);
102 }
103 //---------------------------------------------------------------------------
104 
105 
109 XMLParserException::~XMLParserException()
110 {
111 }
112 //---------------------------------------------------------------------------
113 
114 
122 string XMLParserException::what() const noexcept
123 {
124  return getMessage();
125 }
126 //---------------------------------------------------------------------------
127 
128 
136 string XMLParserException::getMessage() const noexcept
137 {
138  return message;
139 }
140 //---------------------------------------------------------------------------
141 
142 
148 void XMLParserException::setMessage(const string& msg) noexcept
149 {
150  message = msg;
151 }
152 //---------------------------------------------------------------------------
153 
154 
155 
156 //###########################################################################
157 // Manages elements' attributes.
158 //###########################################################################
168 void XMLParserAttributes::addAttribute(const std::string& name, const std::string& value)
169 {
170  addAttribute(XMLParserAttribute{name, value});
171 }
172 //---------------------------------------------------------------------------
173 
174 
183 void XMLParserAttributes::addAttribute(const XMLParserAttribute&& attribute)
184 {
185  attributes.emplace_back(attribute);
186 }
187 //---------------------------------------------------------------------------
188 
189 
193 void XMLParserAttributes::clear()
194 {
195  attributes.clear();
196  attributes.shrink_to_fit();
197 }
198 //---------------------------------------------------------------------------
199 
200 
207 int XMLParserAttributes::getIndex(const string& name) const
208 {
209  bool notFound = true;
210  const int s = attributes.size();
211  int i = 0;
212  while (notFound && i < s)
213  {
214  if (name == attributes[i].first)
215  notFound = false;
216  else
217  i++;
218  }
219 
220  return notFound ? Not_Found : i;
221 }
222 //---------------------------------------------------------------------------
223 
224 
230 size_t XMLParserAttributes::getCount() const
231 {
232  return attributes.size();
233 }
234 //---------------------------------------------------------------------------
235 
236 
245 bool XMLParserAttributes::getName(size_t index, string& name) const
246 {
247  if (index < 0 || index >= getCount())
248  return false;
249 
250  name = attributes[index].first;
251  return true;
252 }
253 //---------------------------------------------------------------------------
254 
255 
264 bool XMLParserAttributes::getValue(size_t index, std::string& value) const
265 {
266  if (index < 0 || index >= getCount())
267  return false;
268 
269  value = attributes[index].second;
270  return true;
271 }
272 //---------------------------------------------------------------------------
273 
274 
284 bool XMLParserAttributes::getValue(const std::string& name, std::string& value) const
285 {
286  int idx = getIndex(name);
287  if (idx != Not_Found)
288  {
289  value = attributes[idx].second;
290  return true;
291  }
292 
293  return false;
294 }
295 //---------------------------------------------------------------------------
296 
297 
298 
299 //###########################################################################
300 // XMLParser's implementation.
301 //###########################################################################
302 
303 
305 static constexpr size_t DEF_BUFF_SIZE = 4096;
306 
307 
314 inline static string convXMLCharStrtoUTF8Charset(const XML_Char* xlmStr)
315 {
316  #if defined(XML_UNICODE) // XML_Char* is UTF-16 encoded.
317  assert(sizeof(XML_Char) == 2);
318  #if defined(LIBXCKS_UTF8CPP_HAS_STRING_VIEW)
319  return utf8::utf16to8(u16string_view(reinterpret_cast<const char16_t*>(xlmStr)));
320  #else
321  return utf8::utf16to8(u16string(reinterpret_cast<const char16_t*>(xlmStr)));
322  #endif
323  #else // XML_Char* is UTF-8 encoded.
324  return string(xlmStr);
325  #endif
326 }
327 //---------------------------------------------------------------------------
328 
329 
333 XMLParser::XMLParser()
334 {
335  parser = nullptr;
336 }
337 //---------------------------------------------------------------------------
338 
339 
343 XMLParser::~XMLParser()
344 {
345  if (parser != nullptr)
346  XML_ParserFree(parser);
347 }
348 //---------------------------------------------------------------------------
349 
350 
366 bool XMLParser::initParser()
367 {
368  if (parser != nullptr)
369  XML_ParserFree(parser);
370 
371  setDepth(0);
372  parser = XML_ParserCreate(nullptr);
373  if (parser != nullptr)
374  {
375  // Register expat handlers.
376  XML_SetUserData(parser, this);
377  XML_SetElementHandler(parser, XMLParser::startElementHandler, XMLParser::endElementHandler);
378  XML_SetCharacterDataHandler(parser, XMLParser::characterDataHandler);
379  }
380 
381  return (parser != nullptr);
382 }
383 //---------------------------------------------------------------------------
384 
385 
394 void XMLParser::startElement(const string& name, const XMLParserAttributes& atts)
395 {
396 }
397 //---------------------------------------------------------------------------
398 
399 
407 void XMLParser::endElement(const string& name)
408 {
409 }
410 //---------------------------------------------------------------------------
411 
412 
420 void XMLParser::characters(const string& chars)
421 {
422 }
423 //---------------------------------------------------------------------------
424 
425 
436 void XMLParser::fatalError(XML_Error errorCode, const string& errorMessage, int line, int column) noexcept
437 {
438 }
439 //---------------------------------------------------------------------------
440 
441 
442 // Static methods for binding expat handlers
452 void XMLCALL XMLParser::startElementHandler(void* userData, const XML_Char* name, const XML_Char** atts)
453 {
454  // userData can't (shouldn't) be nullptr
455  XMLParser* p = reinterpret_cast<XMLParser*>(userData);
457 
458  // Get the elements' attributes
459  for (int i = 0; atts[i] != nullptr; i += 2)
460  a.addAttribute(XMLParserAttribute{convXMLCharStrtoUTF8Charset(atts[i]), convXMLCharStrtoUTF8Charset(atts[i + 1])});
461 
462  // Call start element callback.
463  p->startElement(convXMLCharStrtoUTF8Charset(name), a);
464 
465  // Change the current depth
466  p->setDepth(p->getDepth() + 1);
467 }
468 //---------------------------------------------------------------------------
469 
470 
475 void XMLCALL XMLParser::endElementHandler(void* userData, const XML_Char* name)
476 {
477  // userData can't (shouldn't) be nullptr
478  XMLParser* p = reinterpret_cast<XMLParser*>(userData);
479 
480  // Change the current depth
481  p->setDepth(p->getDepth() - 1);
482 
483  // Call start element callback.
484  p->endElement(convXMLCharStrtoUTF8Charset(name));
485 }
486 //---------------------------------------------------------------------------
487 
488 
493 void XMLCALL XMLParser::characterDataHandler(void* userData, const XML_Char* s, int len)
494 {
495  // userData can't (shouldn't) be nullptr
496  XMLParser* p = reinterpret_cast<XMLParser*>(userData);
497 
498  // Creates a temporary buffer
499  XML_Char* str = new XML_Char[len + 1];
500  memcpy(str, s, sizeof(XML_Char) * len);
501  #if defined(XML_UNICODE)
502  str[len] = L'\0';
503  #else
504  str[len] = '\0';
505  #endif
506  string chData = convXMLCharStrtoUTF8Charset(str);
507  delete[] str;
508  p->characters(chData);
509 }
510 //---------------------------------------------------------------------------
511 
512 
518 void XMLParser::setDepth(int depth)
519 {
520  xmlDepth = depth;
521 }
522 //---------------------------------------------------------------------------
523 
524 
530 int XMLParser::getDepth() const
531 {
532  return xmlDepth;
533 }
534 //---------------------------------------------------------------------------
535 
536 
544 bool XMLParser::parse(const std::string& filename)
545 {
546  ifstream is(filename, ios::in | ios::binary);
547  return parse(is); // parse(istream&) calls initParser()
548 }
549 //---------------------------------------------------------------------------
550 
551 
561 bool XMLParser::parse(std::istream& is)
562 {
563  if (!is.good())
564  return false;
565 
566  if (!initParser())
567  return false;
568 
569  // Initializes the buffer.
570  char buff[DEF_BUFF_SIZE];
571 
572  // Reads data from the input stream.
573  size_t read;
574  ios::iostate errorState = ios::goodbit;
575  XML_Status lastXMLError = XML_STATUS_OK;
576  bool parserExceptionLaunched = false;
577  string exceptionMessage;
578  try
579  {
580  while (!is.eof() && (errorState & ios::goodbit) == ios::goodbit && lastXMLError == XML_STATUS_OK)
581  {
582  read = is.read(buff, sizeof(char) * DEF_BUFF_SIZE).gcount();
583  if (read > 0 && read <= sizeof(char) * DEF_BUFF_SIZE)
584  lastXMLError = XML_Parse(parser, buff, static_cast<int>(read), static_cast<int>(XML_FALSE));
585  errorState = is.rdstate();
586  }
587  }
588  catch (const XMLParserException& e)
589  {
590  exceptionMessage = e.what();
591  parserExceptionLaunched = true;
592  }
593 
594  // Finishing parsing.
595  if (parserExceptionLaunched || ((errorState & ios::goodbit) != ios::goodbit && (errorState & ios::eofbit) != ios::eofbit))
596  XML_StopParser(parser, XML_FALSE);
597 
598  if (lastXMLError == XML_STATUS_OK)
599  lastXMLError = XML_Parse(parser, buff, 0, static_cast<int>(XML_TRUE));
600 
601  if (lastXMLError != XML_STATUS_OK)
602  {
603  XML_Error err = XML_GetErrorCode(parser);
604  if (exceptionMessage.empty())
605  fatalError(err, convXMLCharStrtoUTF8Charset(XML_ErrorString(err)), XML_GetCurrentLineNumber(parser), XML_GetCurrentColumnNumber(parser));
606  else
607  fatalError(err, exceptionMessage, XML_GetCurrentLineNumber(parser), XML_GetCurrentColumnNumber(parser));
608  }
609 
610  return ((!parserExceptionLaunched) && (lastXMLError == XML_STATUS_OK));
611 }
612 //---------------------------------------------------------------------------
613 } // namespace libxcks
614 //---------------------------------------------------------------------------
Manages elements' attributes.
Definition: xmlparser.hpp:84
void addAttribute(const std::string &name, const std::string &value)
Adds an attribute to the end of the list.
Definition: xmlparser.cpp:168
An exception class for the XML parser.
Definition: xmlparser.hpp:42
virtual std::string what() const noexcept
Returns the explanatory string.
Definition: xmlparser.cpp:122
std::string message
The message of the exception.
Definition: xmlparser.hpp:44
A very simple XML parser.
Definition: xmlparser.hpp:138
virtual void startElement(const std::string &name, const XMLParserAttributes &atts) noexcept(false)
Receives notification of the beginning of an element.
Definition: xmlparser.cpp:394
int getDepth() const
Returns the current depth in XML tree structure.
Definition: xmlparser.cpp:530
A very simple XML Parser.
std::pair< std::string, std::string > XMLParserAttribute
An attribute and its value.
Definition: xmlparser.hpp:77