8 #ifndef ORCUS_SAX_PARSER_HPP 9 #define ORCUS_SAX_PARSER_HPP 11 #include "sax_parser_base.hpp" 30 template<
typename _Handler,
typename _Config = sax_parser_default_config>
34 typedef _Handler handler_type;
35 typedef _Config config_type;
37 sax_parser(
const char* content,
const size_t size, handler_type& handler);
51 void element_open(
const char* begin_pos);
52 void element_close(
const char* begin_pos);
54 void declaration(
const char* name_check);
61 handler_type& m_handler;
64 template<
typename _Handler,
typename _Config>
66 const char* content,
const size_t size, handler_type& handler) :
72 template<
typename _Handler,
typename _Config>
77 template<
typename _Handler,
typename _Config>
86 assert(m_buffer_pos == 0);
89 template<
typename _Handler,
typename _Config>
95 if (!has_char() || cur_char() !=
'<')
98 if (config_type::strict_xml_declaration)
100 if (next_char_checked() !=
'?')
107 template<
typename _Handler,
typename _Config>
112 if (cur_char() ==
'<')
115 if (!m_root_elem_open)
119 else if (m_nest_level)
127 template<
typename _Handler,
typename _Config>
130 assert(cur_char() ==
'<');
131 const char* pos = mp_char;
132 char c = next_char_checked();
151 template<
typename _Handler,
typename _Config>
154 assert(is_alpha(cur_char()));
157 element_name(elem, begin_pos);
166 if (next_and_char() !=
'>')
169 elem.end_pos = mp_char;
170 m_handler.start_element(elem);
172 m_handler.end_element(elem);
173 #if ORCUS_DEBUG_SAX_PARSER 174 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
182 elem.end_pos = mp_char;
184 m_handler.start_element(elem);
186 #if ORCUS_DEBUG_SAX_PARSER 187 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
196 template<
typename _Handler,
typename _Config>
199 assert(cur_char() ==
'/');
203 element_name(elem, begin_pos);
205 if (cur_char() !=
'>')
208 elem.end_pos = mp_char;
210 m_handler.end_element(elem);
211 #if ORCUS_DEBUG_SAX_PARSER 212 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
215 m_root_elem_open =
false;
218 template<
typename _Handler,
typename _Config>
221 assert(cur_char() ==
'!');
223 size_t len = remains();
227 switch (next_and_char())
232 if (next_and_char() !=
'-')
246 expects_next(
"CDATA[", 6);
254 expects_next(
"OCTYPE", 6);
265 template<
typename _Handler,
typename _Config>
268 assert(cur_char() ==
'?');
274 #if ORCUS_DEBUG_SAX_PARSER 275 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
278 if (name_check && decl_name != name_check)
280 std::ostringstream os;
281 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
285 m_handler.start_declaration(decl_name);
289 while (cur_char_checked() !=
'?')
294 if (next_char_checked() !=
'>')
297 m_handler.end_declaration(decl_name);
300 #if ORCUS_DEBUG_SAX_PARSER 301 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
305 template<
typename _Handler,
typename _Config>
308 size_t len = remains();
312 const char* p0 = mp_char;
313 size_t i = 0, match = 0;
314 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
328 else if (c ==
'>' && match == 2)
331 size_t cdata_len = i - 2;
332 m_handler.characters(
pstring(p0, cdata_len),
false);
342 template<
typename _Handler,
typename _Config>
347 name(param.root_element);
351 size_t len = remains();
355 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
359 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
362 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
366 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
372 has_char_throw(
"DOCTYPE section too short.");
375 value(param.fpi,
false);
377 has_char_throw(
"DOCTYPE section too short.");
379 has_char_throw(
"DOCTYPE section too short.");
381 if (cur_char() ==
'>')
384 #if ORCUS_DEBUG_SAX_PARSER 385 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
387 m_handler.doctype(param);
393 value(param.uri,
false);
395 has_char_throw(
"DOCTYPE section too short.");
397 has_char_throw(
"DOCTYPE section too short.");
399 if (cur_char() !=
'>')
402 #if ORCUS_DEBUG_SAX_PARSER 403 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
405 m_handler.doctype(param);
409 template<
typename _Handler,
typename _Config>
412 const char* p0 = mp_char;
413 for (; has_char(); next())
415 if (cur_char() ==
'<')
418 if (cur_char() ==
'&')
423 buf.append(p0, mp_char-p0);
424 characters_with_encoded_char(buf);
426 m_handler.characters(
pstring(),
false);
428 m_handler.characters(
pstring(buf.get(), buf.size()),
true);
436 m_handler.characters(val,
false);
440 template<
typename _Handler,
typename _Config>
444 pstring attr_ns_name, attr_name, attr_value;
445 attribute_name(attr.ns, attr.name);
447 #if ORCUS_DEBUG_SAX_PARSER 448 std::ostringstream os;
449 os <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'";
455 std::ostringstream os;
456 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
461 attr.transient = value(attr.value,
true);
466 #if ORCUS_DEBUG_SAX_PARSER 467 os <<
" value='" << attr.value <<
"'" << endl;
471 m_handler.attribute(attr);
Definition: pstring.hpp:24
Definition: cell_buffer.hpp:21
Definition: sax_parser.hpp:15
static const bool strict_xml_declaration
Definition: sax_parser.hpp:23
Definition: sax_parser_base.hpp:87
Definition: sax_parser_base.hpp:72
Definition: sax_parser_base.hpp:45
Definition: base64.hpp:15
Definition: sax_parser.hpp:31
Definition: sax_parser_base.hpp:95