Orcus
sax_ns_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9 #define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10 
11 #include "sax_parser.hpp"
12 #include "xml_namespace.hpp"
13 #include "global.hpp"
14 
15 #include <unordered_set>
16 #include <vector>
17 #include <memory>
18 #include <algorithm>
19 
20 namespace orcus {
21 
23 {
24  xmlns_id_t ns; // element namespace
25  pstring ns_alias; // element namespace alias
26  pstring name; // element name
27  const char* begin_pos; // position of the opening brace '<'.
28  const char* end_pos; // position of the char after the closing brace '>'.
29 };
30 
32 {
33  xmlns_id_t ns; // attribute namespace
34  pstring ns_alias; // attribute namespace alias
35  pstring name; // attribute name
36  pstring value; // attribute value
37  bool transient; // whether or not the attribute value is transient.
38 };
39 
40 namespace __sax {
41 
43 {
44  pstring ns;
45  pstring name;
46 
47  entity_name(const pstring& _ns, const pstring& _name) :
48  ns(_ns), name(_name) {}
49 
50  bool operator== (const entity_name& other) const
51  {
52  return other.ns == ns && other.name == name;
53  }
54 
55  struct hash
56  {
57  size_t operator() (const entity_name& v) const
58  {
59  static pstring::hash hasher;
60  return hasher(v.ns) + hasher(v.name);
61  }
62  };
63 };
64 
65 typedef std::unordered_set<pstring, pstring::hash> ns_keys_type;
66 typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
67 
68 struct elem_scope
69 {
70  xmlns_id_t ns;
71  pstring name;
72  ns_keys_type ns_keys;
73 };
74 
75 typedef std::vector<std::unique_ptr<elem_scope>> elem_scopes_type;
76 
77 class pop_ns_by_key : std::unary_function<pstring, void>
78 {
79  xmlns_context& m_cxt;
80 public:
81  pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {}
82  void operator() (const pstring& key)
83  {
84  m_cxt.pop(key);
85  }
86 };
87 
88 }
89 
93 template<typename _Handler>
95 {
96 public:
97  typedef _Handler handler_type;
98 
99  sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler);
100  ~sax_ns_parser();
101 
102  void parse();
103 
104 private:
109  class handler_wrapper
110  {
111  __sax::elem_scopes_type m_scopes;
112  __sax::ns_keys_type m_ns_keys;
113  __sax::entity_names_type m_attrs;
114 
115  sax_ns_parser_element m_elem;
117 
118  xmlns_context& m_ns_cxt;
119  handler_type& m_handler;
120 
121  bool m_declaration;
122 
123  public:
124  handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
125 
126  void doctype(const sax::doctype_declaration& dtd)
127  {
128  m_handler.doctype(dtd);
129  }
130 
131  void start_declaration(const pstring& name)
132  {
133  m_declaration = true;
134  m_handler.start_declaration(name);
135  }
136 
137  void end_declaration(const pstring& name)
138  {
139  m_declaration = false;
140  m_handler.end_declaration(name);
141  }
142 
143  void start_element(const sax::parser_element& elem)
144  {
145  m_scopes.push_back(orcus::make_unique<__sax::elem_scope>());
146  __sax::elem_scope& scope = *m_scopes.back();
147  scope.ns = m_ns_cxt.get(elem.ns);
148  scope.name = elem.name;
149  scope.ns_keys.swap(m_ns_keys);
150 
151  m_elem.ns = scope.ns;
152  m_elem.ns_alias = elem.ns;
153  m_elem.name = scope.name;
154  m_elem.begin_pos = elem.begin_pos;
155  m_elem.end_pos = elem.end_pos;
156  m_handler.start_element(m_elem);
157 
158  m_attrs.clear();
159  }
160 
161  void end_element(const sax::parser_element& elem)
162  {
163  __sax::elem_scope& scope = *m_scopes.back();
164  if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
165  throw sax::malformed_xml_error("mis-matching closing element.", -1);
166 
167  m_elem.ns = scope.ns;
168  m_elem.ns_alias = elem.ns;
169  m_elem.name = scope.name;
170  m_elem.begin_pos = elem.begin_pos;
171  m_elem.end_pos = elem.end_pos;
172  m_handler.end_element(m_elem);
173 
174  // Pop all namespaces declared in this scope.
175  std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt));
176 
177  m_scopes.pop_back();
178  }
179 
180  void characters(const pstring& val, bool transient)
181  {
182  m_handler.characters(val, transient);
183  }
184 
185  void attribute(const sax::parser_attribute& attr)
186  {
187  if (m_declaration)
188  {
189  // XML declaration attribute. Pass it through to the handler without namespace.
190  m_handler.attribute(attr.name, attr.value);
191  return;
192  }
193 
194  if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0)
196  "You can't define two attributes of the same name in the same element.", -1);
197 
198  m_attrs.insert(__sax::entity_name(attr.ns, attr.name));
199 
200  if (attr.ns.empty() && attr.name == "xmlns")
201  {
202  // Default namespace
203  m_ns_cxt.push(pstring(), attr.value);
204  m_ns_keys.insert(pstring());
205  return;
206  }
207 
208  if (attr.ns == "xmlns")
209  {
210  // Namespace alias
211  if (!attr.name.empty())
212  {
213  m_ns_cxt.push(attr.name, attr.value);
214  m_ns_keys.insert(attr.name);
215  }
216  return;
217  }
218 
219  m_attr.ns = m_ns_cxt.get(attr.ns);
220  m_attr.ns_alias = attr.ns;
221  m_attr.name = attr.name;
222  m_attr.value = attr.value;
223  m_attr.transient = attr.transient;
224  m_handler.attribute(m_attr);
225  }
226  };
227 
228 private:
229  handler_wrapper m_wrapper;
231 };
232 
233 template<typename _Handler>
235  const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) :
236  m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper)
237 {
238 }
239 
240 template<typename _Handler>
242 {
243 }
244 
245 template<typename _Handler>
247 {
248  m_parser.parse();
249 }
250 
251 }
252 
253 #endif
254 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: pstring.hpp:76
Definition: sax_ns_parser.hpp:68
Definition: pstring.hpp:24
Definition: sax_ns_parser.hpp:55
Definition: sax_ns_parser.hpp:42
Definition: sax_ns_parser.hpp:31
Definition: sax_parser_base.hpp:33
Definition: sax_ns_parser.hpp:94
Definition: xml_namespace.hpp:80
Definition: sax_ns_parser.hpp:22
Definition: sax_parser_base.hpp:87
Definition: sax_parser_base.hpp:72
Definition: sax_parser_base.hpp:45
Definition: sax_ns_parser.hpp:77
Definition: base64.hpp:15