Orcus
sax_parser_base.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
9 #define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
10 
11 #include "env.hpp"
12 #include "pstring.hpp"
13 #include "cell_buffer.hpp"
14 #include "parser_global.hpp"
15 #include "parser_base.hpp"
16 
17 #include <cassert>
18 #include <cstdlib>
19 #include <exception>
20 #include <sstream>
21 #include <memory>
22 
23 #define ORCUS_DEBUG_SAX_PARSER 0
24 
25 #if ORCUS_DEBUG_SAX_PARSER
26 #include <iostream>
27 using std::cout;
28 using std::endl;
29 #endif
30 
31 namespace orcus { namespace sax {
32 
33 class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error
34 {
35 public:
36  malformed_xml_error() = delete;
37  malformed_xml_error(const std::string& msg, std::ptrdiff_t offset);
38  virtual ~malformed_xml_error() throw();
39 };
40 
46 {
47  enum class keyword_type { dtd_public, dtd_private };
48 
49  keyword_type keyword;
50  pstring root_element;
51  pstring fpi;
52  pstring uri;
53 };
54 
66 ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
67 
73 {
74  pstring ns; // element namespace (optional)
75  pstring name; // element name
76  const char* begin_pos; // position of the opening brace '<'.
77  const char* end_pos; // position of the char after the closing brace '>'.
78 };
79 
88 {
89  pstring ns; // attribute namespace (optional)
90  pstring name; // attribute name
91  pstring value; // attribute value
92  bool transient; // whether or not the attribute value is on a temporary buffer.
93 };
94 
95 class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
96 {
97  struct impl;
98  std::unique_ptr<impl> mp_impl;
99 
100  parser_base() = delete;
101  parser_base(const parser_base&) = delete;
102  parser_base& operator=(const parser_base&) = delete;
103 protected:
104  size_t m_nest_level;
105  size_t m_buffer_pos;
106  bool m_root_elem_open:1;
107 
108 protected:
109  parser_base(const char* content, size_t size);
110  ~parser_base();
111 
112  void next_check()
113  {
114  next();
115  if (!has_char())
116  throw malformed_xml_error("xml stream ended prematurely.", offset());
117  }
118 
119  void nest_up() { ++m_nest_level; }
120  void nest_down()
121  {
122  assert(m_nest_level > 0);
123  --m_nest_level;
124  }
125 
126  void inc_buffer_pos();
127  void reset_buffer_pos() { m_buffer_pos = 0; }
128 
129  void has_char_throw(const char* msg) const
130  {
131  if (!has_char())
132  throw malformed_xml_error(msg, offset());
133  }
134 
142  inline size_t remains() const
143  {
144 #if ORCUS_DEBUG_SAX_PARSER
145  if (mp_char >= mp_end)
146  throw malformed_xml_error("xml stream ended prematurely.", offset());
147 #endif
148  return mp_end - mp_char;
149  }
150 
151  char cur_char_checked() const
152  {
153  if (!has_char())
154  throw malformed_xml_error("xml stream ended prematurely.", offset());
155 
156  return *mp_char;
157  }
158 
159  char next_and_char()
160  {
161  next();
162 #if ORCUS_DEBUG_SAX_PARSER
163  if (mp_char >= mp_end)
164  throw malformed_xml_error("xml stream ended prematurely.", offset());
165 #endif
166  return *mp_char;
167  }
168 
169  char next_char_checked()
170  {
171  next();
172  if (!has_char())
173  throw malformed_xml_error("xml stream ended prematurely.", offset());
174 
175  return *mp_char;
176  }
177 
178  cell_buffer& get_cell_buffer();
179 
180  void blank();
181  void comment();
182 
186  void skip_bom();
187 
188  void expects_next(const char* p, size_t n);
189 
190  void parse_encoded_char(cell_buffer& buf);
191  void value_with_encoded_char(cell_buffer& buf, pstring& str);
192 
201  bool value(pstring& str, bool decode);
202 
203  void name(pstring& str);
204  void element_name(parser_element& elem, const char* begin_pos);
205  void attribute_name(pstring& attr_ns, pstring& attr_name);
206  void characters_with_encoded_char(cell_buffer& buf);
207 };
208 
209 }}
210 
211 #endif
212 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: pstring.hpp:24
Definition: cell_buffer.hpp:21
Definition: sax_parser_base.hpp:33
Definition: sax_parser_base.hpp:87
Definition: parser_base.hpp:34
Definition: sax_parser_base.hpp:72
Definition: sax_parser_base.hpp:45
size_t remains() const
Definition: sax_parser_base.hpp:142
Definition: parser_base.hpp:20
Definition: base64.hpp:15
Definition: sax_parser_base.hpp:95