Orcus
yaml_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9 #define INCLUDED_ORCUS_YAML_PARSER_HPP
10 
11 #include "orcus/yaml_parser_base.hpp"
12 #include "orcus/parser_global.hpp"
13 
14 namespace orcus {
15 
16 template<typename _Handler>
18 {
19 public:
20  typedef _Handler handler_type;
21 
22  yaml_parser(const char* p, size_t n, handler_type& hdl);
23 
24  void parse();
25 
26 private:
27  size_t end_scope();
28  void check_or_begin_document();
29  void check_or_begin_map();
30  void check_or_begin_sequence();
31  void parse_value(const char* p, size_t len);
32  void push_value(const char* p, size_t len);
33  void parse_line(const char* p, size_t len);
34  void parse_map_key(const char* p, size_t len);
35 
36 private:
37  handler_type& m_handler;
38 };
39 
40 template<typename _Handler>
41 yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) :
42  yaml::parser_base(p, n), m_handler(hdl) {}
43 
44 template<typename _Handler>
46 {
47  m_handler.begin_parse();
48 
49  while (has_char())
50  {
51  size_t indent = parse_indent();
52  if (indent == parse_indent_end_of_stream)
53  break;
54 
55  if (indent == parse_indent_blank_line)
56  continue;
57 
58  size_t cur_scope = get_scope();
59 
60  if (cur_scope <= indent)
61  {
62  if (in_literal_block())
63  {
64  handle_line_in_literal(indent);
65  continue;
66  }
67 
68  if (has_line_buffer())
69  {
70  // This line is part of multi-line string. Push the line to the
71  // buffer as-is.
72  handle_line_in_multi_line_string();
73  continue;
74  }
75  }
76 
77  if (cur_scope == scope_empty || indent > cur_scope)
78  {
79  push_scope(indent);
80  }
81  else if (indent < cur_scope)
82  {
83  // Current indent is less than the current scope level.
84  do
85  {
86  cur_scope = end_scope();
87  if (cur_scope < indent)
88  throw yaml::parse_error("parse: invalid indent level.");
89  }
90  while (indent < cur_scope);
91  }
92 
93  // Parse the rest of the line.
95  line = line.trim();
96 
97  assert(!line.empty());
98  parse_line(line.get(), line.size());
99  }
100 
101  // End all remaining scopes.
102  size_t cur_scope = get_scope();
103  while (cur_scope != scope_empty)
104  cur_scope = end_scope();
105 
106  if (get_doc_hash())
107  m_handler.end_document();
108 
109  m_handler.end_parse();
110 }
111 
112 template<typename _Handler>
114 {
115  switch (get_scope_type())
116  {
117  case yaml::scope_t::map:
118  m_handler.end_map();
119  break;
120  case yaml::scope_t::sequence:
121  m_handler.end_sequence();
122  break;
123  case yaml::scope_t::multi_line_string:
124  {
125  pstring merged = merge_line_buffer();
126  m_handler.string(merged.get(), merged.size());
127  }
128  break;
129  default:
130  {
131  if (has_line_buffer())
132  {
133  assert(get_line_buffer_count() == 1);
134  pstring line = pop_line_front();
135  parse_value(line.get(), line.size());
136  }
137  }
138  }
139  return pop_scope();
140 }
141 
142 template<typename _Handler>
144 {
145  if (!get_doc_hash())
146  {
147  set_doc_hash(mp_char);
148  m_handler.begin_document();
149  }
150 }
151 
152 template<typename _Handler>
154 {
155  if (get_scope_type() == yaml::scope_t::unset)
156  {
157  check_or_begin_document();
158  set_scope_type(yaml::scope_t::map);
159  m_handler.begin_map();
160  }
161 }
162 
163 template<typename _Handler>
165 {
166  if (get_scope_type() == yaml::scope_t::unset)
167  {
168  check_or_begin_document();
169  set_scope_type(yaml::scope_t::sequence);
170  m_handler.begin_sequence();
171  }
172 }
173 
174 template<typename _Handler>
175 void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
176 {
177  check_or_begin_document();
178 
179  const char* p0 = p;
180  const char* p_end = p + len;
181  double val = parse_numeric(p, len);
182  if (p == p_end)
183  {
184  m_handler.number(val);
185  return;
186  }
187 
188  yaml::keyword_t kw = parse_keyword(p0, len);
189 
190  if (kw != yaml::keyword_t::unknown)
191  {
192  switch (kw)
193  {
194  case yaml::keyword_t::null:
195  m_handler.null();
196  break;
197  case yaml::keyword_t::boolean_true:
198  m_handler.boolean_true();
199  break;
200  case yaml::keyword_t::boolean_false:
201  m_handler.boolean_false();
202  break;
203  default:
204  ;
205  }
206 
207  return;
208  }
209 
210  // Failed to parse it as a number or a keyword. It must be a string.
211  m_handler.string(p0, len);
212 }
213 
214 template<typename _Handler>
215 void yaml_parser<_Handler>::push_value(const char* p, size_t len)
216 {
217  check_or_begin_document();
218 
219  if (has_line_buffer() && get_scope_type() == yaml::scope_t::unset)
220  set_scope_type(yaml::scope_t::multi_line_string);
221 
222  push_line_back(p, len);
223 }
224 
225 template<typename _Handler>
226 void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
227 {
228  const char* p_end = p + len;
229  const char* p0 = p; // Save the original head position.
230 
231  if (*p == '-')
232  {
233  ++p;
234  if (p == p_end)
235  {
236  // List item start.
237  check_or_begin_sequence();
238  return;
239  }
240 
241  switch (*p)
242  {
243  case '-':
244  {
245  // start of a document
246  ++p;
247  if (p == p_end)
248  throw yaml::parse_error("parse_line: line ended with '--'.");
249 
250  if (*p != '-')
251  yaml::parse_error::throw_with("parse_line: '-' expected but '", *p, "' found.");
252 
253  ++p; // Skip the '-'.
254  set_doc_hash(p);
255  m_handler.begin_document();
256 
257  if (p != p_end)
258  {
259  skip_blanks(p, p_end-p);
260 
261  // Whatever comes after '---' is equivalent of first node.
262  assert(p != p_end);
263  push_scope(0);
264  parse_line(p, p_end-p);
265  }
266  }
267  break;
268  case ' ':
269  {
270  check_or_begin_sequence();
271 
272  // list item start with inline first item content.
273  ++p;
274  if (p == p_end)
275  throw yaml::parse_error("parse_line: list item expected, but the line ended prematurely.");
276 
277  skip_blanks(p, p_end-p);
278 
279  size_t scope_width = get_scope() + (p-p0);
280  push_scope(scope_width);
281  parse_line(p, p_end-p);
282  }
283  break;
284  }
285 
286  return;
287  }
288 
289  // If the line doesn't start with a '-', it must be a dictionary key.
290  parse_map_key(p, len);
291 }
292 
293 template<typename _Handler>
294 void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
295 {
296  const char* p_end = p + len;
297  const char* p0 = p; // Save the original head position.
298 
299  switch (*p)
300  {
301  case '"':
302  {
303  pstring quoted_str = parse_double_quoted_string_value(p, len);
304 
305  if (p == p_end)
306  {
307  m_handler.string(quoted_str.get(), quoted_str.size());
308  return;
309  }
310 
311  skip_blanks(p, p_end-p);
312 
313  if (*p != ':')
314  throw yaml::parse_error("parse_map_key: ':' is expected after the quoted string key.");
315 
316  check_or_begin_map();
317  m_handler.begin_map_key();
318  m_handler.string(quoted_str.get(), quoted_str.size());
319  m_handler.end_map_key();
320 
321  ++p; // skip the ':'.
322  if (p == p_end)
323  return;
324 
325  // Skip all white spaces.
326  skip_blanks(p, p_end-p);
327  }
328  break;
329  case '\'':
330  {
331  pstring quoted_str = parse_single_quoted_string_value(p, len);
332 
333  if (p == p_end)
334  {
335  m_handler.string(quoted_str.get(), quoted_str.size());
336  return;
337  }
338 
339  skip_blanks(p, p_end-p);
340 
341  if (*p != ':')
342  throw yaml::parse_error("parse_map_key: ':' is expected after the quoted string key.");
343 
344  check_or_begin_map();
345  m_handler.begin_map_key();
346  m_handler.string(quoted_str.get(), quoted_str.size());
347  m_handler.end_map_key();
348 
349  ++p; // skip the ':'.
350  if (p == p_end)
351  return;
352 
353  skip_blanks(p, p_end-p);
354  }
355  break;
356  default:
357  {
358  key_value kv = parse_key_value(p, p_end-p);
359 
360  if (kv.key.empty())
361  {
362  // No map key found.
363  if (*p == '|')
364  {
365  start_literal_block();
366  return;
367  }
368 
369  push_value(p, len);
370  return;
371  }
372 
373  check_or_begin_map();
374  m_handler.begin_map_key();
375  parse_value(kv.key.get(), kv.key.size());
376  m_handler.end_map_key();
377 
378  if (kv.value.empty())
379  return;
380 
381  p = kv.value.get();
382  }
383  }
384 
385  if (*p == '|')
386  {
387  start_literal_block();
388  return;
389  }
390 
391  // inline map item.
392  if (*p == '-')
393  throw yaml::parse_error("parse_map_key: sequence entry is not allowed as an inline map item.");
394 
395  size_t scope_width = get_scope() + (p-p0);
396  push_scope(scope_width);
397  parse_line(p, p_end-p);
398 }
399 
400 }
401 
402 #endif
403 
404 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: yaml_parser_base.hpp:59
Definition: pstring.hpp:24
Definition: yaml_parser.hpp:17
const char * get_doc_hash() const
void set_doc_hash(const char *hash)
Definition: yaml_parser_base.hpp:19
Definition: base64.hpp:15
Definition: yaml_parser_base.hpp:44