Orcus
css_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9 #define INCLUDED_ORCUS_CSS_PARSER_HPP
10 
11 #define ORCUS_DEBUG_CSS 0
12 
13 #include "parser_global.hpp"
14 #include "css_parser_base.hpp"
15 
16 #include <cassert>
17 
18 #if ORCUS_DEBUG_CSS
19 #include <iostream>
20 using std::cout;
21 using std::endl;
22 #endif
23 
24 namespace orcus {
25 
26 template<typename _Handler>
28 {
29 public:
30  typedef _Handler handler_type;
31 
32  css_parser(const char* p, size_t n, handler_type& hdl);
33  void parse();
34 
35 private:
36  // Handlers - at the time a handler is called the current position is
37  // expected to point to the first unprocessed non-blank character, and
38  // each handler must set the current position to the next unprocessed
39  // non-blank character when it finishes.
40  void rule();
41  void at_rule_name();
42  void simple_selector_name();
43  void property_name();
44  void property();
45  void quoted_value();
46  void value();
47  void function_value(const char* p, size_t len);
48  void function_rgb(bool alpha);
49  void function_hsl(bool alpha);
50  void function_url();
51  void name_sep();
52  void property_sep();
53  void block();
54 
55  handler_type& m_handler;
56 };
57 
58 template<typename _Handler>
59 css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
60  css::parser_base(p, n), m_handler(hdl) {}
61 
62 template<typename _Handler>
64 {
65  shrink_stream();
66 
67 #if ORCUS_DEBUG_CSS
68  std::cout << "compressed: '";
69  const char* p = mp_char;
70  for (; p != mp_end; ++p)
71  std::cout << *p;
72  std::cout << "'" << std::endl;
73 #endif
74  m_handler.begin_parse();
75  while (has_char())
76  rule();
77  m_handler.end_parse();
78 }
79 
80 template<typename _Handler>
82 {
83  // <selector name> , ... , <selector name> <block>
84  while (has_char())
85  {
86  if (skip_comment())
87  continue;
88 
89  char c = cur_char();
90  if (is_alpha(c))
91  {
92  simple_selector_name();
93  continue;
94  }
95 
96  switch (c)
97  {
98  case '>':
99  set_combinator(c, css::combinator_t::direct_child);
100  break;
101  case '+':
102  set_combinator(c, css::combinator_t::next_sibling);
103  break;
104  case '.':
105  case '#':
106  case '@':
107  simple_selector_name();
108  break;
109  case ',':
110  name_sep();
111  break;
112  case '{':
113  reset_before_block();
114  block();
115  break;
116  default:
117  css::parse_error::throw_with("rule: failed to parse '", c, "'");
118  }
119  }
120 }
121 
122 template<typename _Handler>
124 {
125  assert(has_char());
126  assert(cur_char() == '@');
127  next();
128  char c = cur_char();
129  if (!is_alpha(c))
130  throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
131 
132  const char* p;
133  size_t len;
134  identifier(p, len);
135  skip_blanks();
136 
137  m_handler.at_rule_name(p, len);
138 #if ORCUS_DEBUG_CSS
139  std::string foo(p, len);
140  std::cout << "at-rule name: " << foo.c_str() << std::endl;
141 #endif
142 }
143 
144 template<typename _Handler>
146 {
147  assert(has_char());
148  char c = cur_char();
149  if (c == '@')
150  {
151  // This is the name of an at-rule.
152  at_rule_name();
153  return;
154  }
155 
156  if (m_simple_selector_count)
157  {
158 #if ORCUS_DEBUG_CSS
159  cout << "combinator: " << m_combinator << endl;
160 #endif
161  m_handler.combinator(m_combinator);
162  m_combinator = css::combinator_t::descendant;
163  }
164  assert(is_alpha(c) || c == '.' || c == '#');
165 
166  const char* p = NULL;
167  size_t n = 0;
168 
169 #if ORCUS_DEBUG_CSS
170  cout << "simple_selector_name: (" << m_simple_selector_count << ")";
171 #endif
172 
173  if (c != '.' && c != '#')
174  {
175  identifier(p, n);
176 #if ORCUS_DEBUG_CSS
177  std::string s(p, n);
178  cout << " type=" << s;
179 #endif
180  m_handler.simple_selector_type(p, n);
181  }
182 
183  bool in_loop = true;
184  while (in_loop && has_char())
185  {
186  switch (cur_char())
187  {
188  case '.':
189  {
190  next();
191  identifier(p, n);
192  m_handler.simple_selector_class(p, n);
193 #if ORCUS_DEBUG_CSS
194  std::string s(p, n);
195  std::cout << " class=" << s;
196 #endif
197  }
198  break;
199  case '#':
200  {
201  next();
202  identifier(p, n);
203  m_handler.simple_selector_id(p, n);
204 #if ORCUS_DEBUG_CSS
205  std::string s(p, n);
206  std::cout << " id=" << s;
207 #endif
208  }
209  break;
210  case ':':
211  {
212  // This could be either a pseudo element or pseudo class.
213  next();
214  if (cur_char() == ':')
215  {
216  // pseudo element.
217  next();
218  identifier(p, n);
219  css::pseudo_element_t elem = css::to_pseudo_element(p, n);
220  if (!elem)
221  css::parse_error::throw_with(
222  "selector_name: unknown pseudo element '", p, n, "'");
223 
224  m_handler.simple_selector_pseudo_element(elem);
225  }
226  else
227  {
228  // pseudo class (or pseudo element in the older version of CSS).
229  identifier(p, n);
230  css::pseudo_class_t pc = css::to_pseudo_class(p, n);
231  if (!pc)
232  css::parse_error::throw_with(
233  "selector_name: unknown pseudo class '", p, n, "'");
234 
235  m_handler.simple_selector_pseudo_class(pc);
236  }
237  }
238  break;
239  default:
240  in_loop = false;
241  }
242  }
243 
244  m_handler.end_simple_selector();
245  skip_comments_and_blanks();
246 
247  ++m_simple_selector_count;
248 
249 #if ORCUS_DEBUG_CSS
250  std::cout << std::endl;
251 #endif
252 }
253 
254 template<typename _Handler>
256 {
257  // <identifier>
258 
259  assert(has_char());
260  char c = cur_char();
261  if (!is_alpha(c) && c != '.')
262  css::parse_error::throw_with(
263  "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
264 
265  const char* p;
266  size_t len;
267  identifier(p, len);
268  skip_comments_and_blanks();
269 
270  m_handler.property_name(p, len);
271 #if ORCUS_DEBUG_CSS
272  std::string foo(p, len);
273  std::cout << "property name: " << foo.c_str() << std::endl;
274 #endif
275 }
276 
277 template<typename _Handler>
279 {
280  // <property name> : <value> , ... , <value>
281 
282  m_handler.begin_property();
283  property_name();
284  if (cur_char() != ':')
285  throw css::parse_error("property: ':' expected.");
286  next();
287  skip_comments_and_blanks();
288 
289  bool in_loop = true;
290  while (in_loop && has_char())
291  {
292  value();
293  char c = cur_char();
294  switch (c)
295  {
296  case ',':
297  {
298  // separated by commas.
299  next();
300  skip_comments_and_blanks();
301  }
302  break;
303  case ';':
304  case '}':
305  in_loop = false;
306  break;
307  default:
308  ;
309  }
310  }
311 
312  skip_comments_and_blanks();
313  m_handler.end_property();
314 }
315 
316 template<typename _Handler>
318 {
319  // Parse until the the end quote is reached.
320  const char* p = NULL;
321  size_t len = 0;
322  literal(p, len, '"');
323  next();
324  skip_blanks();
325 
326  m_handler.value(p, len);
327 #if ORCUS_DEBUG_CSS
328  std::string foo(p, len);
329  std::cout << "quoted value: " << foo.c_str() << std::endl;
330 #endif
331 }
332 
333 template<typename _Handler>
335 {
336  assert(has_char());
337  char c = cur_char();
338  if (c == '"')
339  {
340  quoted_value();
341  return;
342  }
343 
344  if (!is_alpha(c) && !is_numeric(c) && !is_in(c, "-+.#"))
345  css::parse_error::throw_with("value:: illegal first character of a value '", c, "'");
346 
347  const char* p = NULL;
348  size_t len = 0;
349  identifier(p, len, ".%");
350  if (cur_char() == '(')
351  {
352  function_value(p, len);
353  return;
354  }
355 
356  m_handler.value(p, len);
357 
358  skip_comments_and_blanks();
359 
360 #if ORCUS_DEBUG_CSS
361  std::string foo(p, len);
362  std::cout << "value: " << foo.c_str() << std::endl;
363 #endif
364 }
365 
366 template<typename _Handler>
367 void css_parser<_Handler>::function_value(const char* p, size_t len)
368 {
369  assert(cur_char() == '(');
370  css::property_function_t func = css::to_property_function(p, len);
371  if (func == css::property_function_t::unknown)
372  css::parse_error::throw_with("function_value: unknown function '", p, len, "'");
373 
374  // Move to the first character of the first argument.
375  next();
376  skip_comments_and_blanks();
377 
378  switch (func)
379  {
380  case css::property_function_t::rgb:
381  function_rgb(false);
382  break;
383  case css::property_function_t::rgba:
384  function_rgb(true);
385  break;
386  case css::property_function_t::hsl:
387  function_hsl(false);
388  break;
389  case css::property_function_t::hsla:
390  function_hsl(true);
391  break;
392  case css::property_function_t::url:
393  function_url();
394  break;
395  default:
396  css::parse_error::throw_with("function_value: unhandled function '", p, len, "'");
397  }
398 
399  char c = cur_char();
400  if (c != ')')
401  css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
402 
403  next();
404  skip_comments_and_blanks();
405 }
406 
407 template<typename _Handler>
408 void css_parser<_Handler>::function_rgb(bool alpha)
409 {
410  // rgb(num, num, num) rgba(num, num, num, float)
411 
412  uint8_t vals[3];
413  uint8_t* p = vals;
414  const uint8_t* plast = p + 2;
415  char c = 0;
416 
417  for (; ; ++p)
418  {
419  *p = parse_uint8();
420 
421  skip_comments_and_blanks();
422 
423  if (p == plast)
424  break;
425 
426  c = cur_char();
427 
428  if (c != ',')
429  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
430 
431  next();
432  skip_comments_and_blanks();
433  }
434 
435  if (alpha)
436  {
437  c = cur_char();
438  if (c != ',')
439  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
440 
441  next();
442  skip_comments_and_blanks();
443 
444  double alpha_val = parse_double_or_throw();
445 
446  alpha_val = clip(alpha_val, 0.0, 1.0);
447  m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
448  }
449  else
450  m_handler.rgb(vals[0], vals[1], vals[2]);
451 
452 #if ORCUS_DEBUG_CSS
453  std::cout << "rgb";
454  if (alpha)
455  std::cout << 'a';
456  std::cout << '(';
457  p = vals;
458  const uint8_t* pend = plast + 1;
459  for (; p != pend; ++p)
460  std::cout << ' ' << (int)*p;
461  std::cout << " )" << std::endl;
462 #endif
463 }
464 
465 template<typename _Handler>
466 void css_parser<_Handler>::function_hsl(bool alpha)
467 {
468  // hsl(num, percent, percent) hsla(num, percent, percent, float)
469 
470  double hue = parse_double_or_throw(); // casted to uint8_t eventually.
471  hue = clip(hue, 0.0, 360.0);
472  skip_comments_and_blanks();
473 
474  char c = cur_char();
475  if (c != ',')
476  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
477 
478  next();
479  skip_comments_and_blanks();
480 
481  double sat = parse_percent();
482  sat = clip(sat, 0.0, 100.0);
483  skip_comments_and_blanks();
484 
485  c = cur_char();
486  if (c != ',')
487  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
488 
489  next();
490  skip_comments_and_blanks();
491 
492  double light = parse_percent();
493  light = clip(light, 0.0, 100.0);
494  skip_comments_and_blanks();
495 
496  if (!alpha)
497  {
498  m_handler.hsl(hue, sat, light);
499  return;
500  }
501 
502  c = cur_char();
503  if (c != ',')
504  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
505 
506  next();
507  skip_comments_and_blanks();
508 
509  double alpha_val = parse_double_or_throw();
510  alpha_val = clip(alpha_val, 0.0, 1.0);
511  skip_comments_and_blanks();
512  m_handler.hsla(hue, sat, light, alpha_val);
513 }
514 
515 template<typename _Handler>
517 {
518  char c = cur_char();
519 
520  if (c == '"' || c == '\'')
521  {
522  // Quoted URL value.
523  const char* p;
524  size_t len;
525  literal(p, len, c);
526  next();
527  skip_comments_and_blanks();
528  m_handler.url(p, len);
529 #if ORCUS_DEBUG_CSS
530  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
531 #endif
532  return;
533  }
534 
535  // Unquoted URL value.
536  const char* p;
537  size_t len;
538  skip_to_or_blank(p, len, ")");
539  skip_comments_and_blanks();
540  m_handler.url(p, len);
541 #if ORCUS_DEBUG_CSS
542  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
543 #endif
544 }
545 
546 template<typename _Handler>
548 {
549  assert(cur_char() == ',');
550 #if ORCUS_DEBUG_CSS
551  std::cout << "," << std::endl;
552 #endif
553  next();
554  skip_blanks();
555  m_handler.end_selector();
556 }
557 
558 template<typename _Handler>
560 {
561 #if ORCUS_DEBUG_CSS
562  std::cout << ";" << std::endl;
563 #endif
564  next();
565  skip_comments_and_blanks();
566 }
567 
568 template<typename _Handler>
570 {
571  // '{' <property> ';' ... ';' <property> ';'(optional) '}'
572 
573  assert(cur_char() == '{');
574 #if ORCUS_DEBUG_CSS
575  std::cout << "{" << std::endl;
576 #endif
577  m_handler.end_selector();
578  m_handler.begin_block();
579 
580  next();
581  skip_comments_and_blanks();
582 
583  // parse properties.
584  while (has_char())
585  {
586  property();
587  if (cur_char() != ';')
588  break;
589  property_sep();
590  if (cur_char() == '}')
591  // ';' after the last property. This is optional but allowed.
592  break;
593  }
594 
595  if (cur_char() != '}')
596  throw css::parse_error("block: '}' expected.");
597 
598  m_handler.end_block();
599 
600  next();
601  skip_comments_and_blanks();
602 
603 #if ORCUS_DEBUG_CSS
604  std::cout << "}" << std::endl;
605 #endif
606 }
607 
608 }
609 
610 #endif
611 
612 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:30
Definition: css_parser_base.hpp:21
Definition: css_parser.hpp:27
Definition: base64.hpp:15
void skip_to_or_blank(const char *&p, size_t &len, const char *chars)