libdap  Updated for version 3.17.2
D4ParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG 1
28 
29 #include <iostream>
30 #include <sstream>
31 
32 #include <cstring>
33 #include <cstdarg>
34 #include <cassert>
35 
36 #include <libxml/parserInternals.h>
37 
38 #include "DMR.h"
39 
40 #include "BaseType.h"
41 #include "Array.h"
42 #include "D4Group.h"
43 #include "D4Attributes.h"
44 #include "D4Maps.h"
45 #include "D4Enum.h"
46 
47 #include "D4BaseTypeFactory.h"
48 
49 #include "D4ParserSax2.h"
50 
51 #include "util.h"
52 #include "debug.h"
53 
54 namespace libdap {
55 
56 static const char *states[] = {
57  "parser_start",
58 
59  "inside_dataset",
60 
61  // inside_group is the state just after parsing the start of a Group
62  // element.
63  "inside_group",
64 
65  "inside_attribute_container",
66  "inside_attribute",
67  "inside_attribute_value",
68  "inside_other_xml_attribute",
69 
70  "inside_enum_def",
71  "inside_enum_const",
72 
73  "inside_dim_def",
74 
75  // This covers Byte, ..., Url, Opaque
76  "inside_simple_type",
77 
78  // "inside_array",
79  "inside_dim",
80  "inside_map",
81 
82  "inside_constructor",
83 
84  "parser_unknown",
85  "parser_error",
86  "parser_fatal_error",
87 
88  "parser_end"
89 };
90 
91 static bool is_not(const char *name, const char *tag)
92 {
93  return strcmp(name, tag) != 0;
94 }
95 
104 D4EnumDef *
105 D4ParserSax2::enum_def()
106 {
107  if (!d_enum_def) d_enum_def = new D4EnumDef;
108 
109  return d_enum_def;
110 }
111 
118 D4Dimension *
119 D4ParserSax2::dim_def() {
120  if (!d_dim_def) d_dim_def = new D4Dimension;
121 
122  return d_dim_def;
123 }
124 
130 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
131 {
132  if (!xml_attrs.empty())
133  xml_attrs.clear(); // erase old attributes
134 
135  // Make a value using the attribute name and the prefix, namespace URI
136  // and the value. The prefix might be null.
137  unsigned int index = 0;
138  for (int i = 0; i < nb_attributes; ++i, index += 5) {
139  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
140  XMLAttribute(attributes + index + 1)));
141 
142  DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
143  << xml_attrs[(const char *)attributes[index]].value << endl);
144  }
145 }
146 
153 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
154 {
155  // make a value with the prefix and namespace URI. The prefix might be null.
156  for (int i = 0; i < nb_namespaces; ++i) {
157  namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
158  (const char *)namespaces[i * 2 + 1]));
159  }
160 }
161 
168 bool D4ParserSax2::check_required_attribute(const string & attr)
169 {
170  if (xml_attrs.find(attr) == xml_attrs.end()) {
171  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
172  return false;
173  }
174  else
175  return true;
176 }
177 
184 bool D4ParserSax2::check_attribute(const string & attr)
185 {
186  return (xml_attrs.find(attr) != xml_attrs.end());
187 }
188 
189 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
190 {
191  if (is_not(name, "Dimension"))
192  return false;
193 
194  transfer_xml_attrs(attrs, nb_attributes);
195 
196  if (!(check_required_attribute("name") && check_required_attribute("size"))) {
197  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
198  return false;
199  }
200 
201  // This getter (dim_def) allocates a new object if needed.
202  dim_def()->set_name(xml_attrs["name"].value);
203  try {
204  dim_def()->set_size(xml_attrs["size"].value);
205  }
206  catch (Error &e) {
207  dmr_error(this, e.get_error_message().c_str());
208  return false;
209  }
210 
211  return true;
212 }
213 
231 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
232 {
233  if (is_not(name, "Dim"))
234  return false;
235 
236  transfer_xml_attrs(attrs, nb_attributes);
237 
238  if (check_attribute("size") && check_attribute("name")) {
239  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
240  return false;
241  }
242  if (!(check_attribute("size") || check_attribute("name"))) {
243  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
244  return false;
245  }
246 
247  if (!top_basetype()->is_vector_type()) {
248  // Make the top BaseType* an array
249  BaseType *b = top_basetype();
250  pop_basetype();
251 
252  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
253  a->set_is_dap4(true);
254  a->add_var_nocopy(b);
255  a->set_attributes_nocopy(b->attributes());
256  // trick: instead of popping b's attributes, copying them and then pushing
257  // a's copy, just move the pointer (but make sure there's only one object that
258  // references that pointer).
259  b->set_attributes_nocopy(0);
260 
261  push_basetype(a);
262  }
263 
264  assert(top_basetype()->is_vector_type());
265 
266  Array *a = static_cast<Array*>(top_basetype());
267  if (check_attribute("size")) {
268  a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
269  return true;
270  }
271  else if (check_attribute("name")) {
272  string name = xml_attrs["name"].value;
273 
274  D4Dimension *dim = 0;
275  if (name[0] == '/') // lookup the Dimension in the root group
276  dim = dmr()->root()->find_dim(name);
277  else // get enclosing Group and lookup Dimension there
278  dim = top_group()->find_dim(name);
279 
280  if (!dim)
281  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
282  a->append_dim(dim);
283  return true;
284  }
285 
286  return false;
287 }
288 
289 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
290 {
291  if (is_not(name, "Map"))
292  return false;
293 
294  transfer_xml_attrs(attrs, nb_attributes);
295 
296  if (!check_attribute("name")) {
297  dmr_error(this, "The 'name' attribute must be used in a Map element.");
298  return false;
299  }
300 
301  if (!top_basetype()->is_vector_type()) {
302  // Make the top BaseType* an array
303  BaseType *b = top_basetype();
304  pop_basetype();
305 
306  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
307  a->set_is_dap4(true);
308  a->add_var_nocopy(b);
309  a->set_attributes_nocopy(b->attributes());
310  // trick: instead of popping b's attributes, copying them and then pushing
311  // a's copy, just move the pointer (but make sure there's only one object that
312  // references that pointer).
313  b->set_attributes_nocopy(0);
314 
315  push_basetype(a);
316  }
317 
318  assert(top_basetype()->is_vector_type());
319 
320  Array *a = static_cast<Array*>(top_basetype());
321 
322  string map_name = xml_attrs["name"].value;
323  if (xml_attrs["name"].value[0] != '/')
324  map_name = top_group()->FQN() + map_name;
325 
326  Array *map_source = 0; // The array variable that holds the data for the Map
327 
328  if (map_name[0] == '/') // lookup the Map in the root group
329  map_source = dmr()->root()->find_map_source(map_name);
330  else // get enclosing Group and lookup Map there
331  map_source = top_group()->find_map_source(map_name);
332 
333  if (!map_source)
334  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
335 
336  a->maps()->add_map(new D4Map(map_name, map_source));
337 
338  return true;
339 }
340 
341 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
342 {
343  if (is_not(name, "Group"))
344  return false;
345 
346  transfer_xml_attrs(attrs, nb_attributes);
347 
348  if (!check_required_attribute("name")) {
349  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
350  return false;
351  }
352 
353  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
354  if (!btp) {
355  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
356  return false;
357  }
358 
359  D4Group *grp = static_cast<D4Group*>(btp);
360 
361  // Need to set this to get the D4Attribute behavior in the type classes
362  // shared between DAP2 and DAP4. jhrg 4/18/13
363  grp->set_is_dap4(true);
364 
365  // link it up and change the current group
366  D4Group *parent = top_group();
367  if (!parent) {
368  dmr_fatal_error(this, "No Group on the Group stack.");
369  return false;
370  }
371 
372  grp->set_parent(parent);
373  parent->add_group_nocopy(grp);
374 
375  push_group(grp);
376  push_attributes(grp->attributes());
377  return true;
378 }
379 
386 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
387 {
388  if (is_not(name, "Attribute"))
389  return false;
390 
391  // These methods set the state to parser_error if a problem is found.
392  transfer_xml_attrs(attrs, nb_attributes);
393 
394  // add error
395  if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
396  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
397  return false;
398  }
399 
400  if (xml_attrs["type"].value == "Container") {
401  push_state(inside_attribute_container);
402 
403  DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
404  D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
405 
406  D4Attributes *tos = top_attributes();
407  // add return
408  if (!tos) {
409  delete child;
410  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
411  return false;
412  }
413 
414  tos->add_attribute_nocopy(child);
415  push_attributes(child->attributes());
416  }
417  else if (xml_attrs["type"].value == "OtherXML") {
418  push_state(inside_other_xml_attribute);
419 
420  dods_attr_name = xml_attrs["name"].value;
421  dods_attr_type = xml_attrs["type"].value;
422  }
423  else {
424  push_state(inside_attribute);
425 
426  dods_attr_name = xml_attrs["name"].value;
427  dods_attr_type = xml_attrs["type"].value;
428  }
429 
430  return true;
431 }
432 
438 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
439 {
440  if (is_not(name, "Enumeration"))
441  return false;
442 
443  transfer_xml_attrs(attrs, nb_attributes);
444 
445  if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
446  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
447  return false;
448  }
449 
450  Type t = get_type(xml_attrs["basetype"].value.c_str());
451  if (!is_integer_type(t)) {
452  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
453  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
454  return false;
455  }
456 
457  // This getter allocates a new object if needed.
458  string enum_def_path = xml_attrs["name"].value;
459 #if 0
460  // Use FQNs when things are referenced, not when they are defined
461  if (xml_attrs["name"].value[0] != '/')
462  enum_def_path = top_group()->FQN() + enum_def_path;
463 #endif
464  enum_def()->set_name(enum_def_path);
465  enum_def()->set_type(t);
466 
467  return true;
468 }
469 
470 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
471 {
472  if (is_not(name, "EnumConst"))
473  return false;
474 
475  // These methods set the state to parser_error if a problem is found.
476  transfer_xml_attrs(attrs, nb_attributes);
477 
478  if (!(check_required_attribute("name") && check_required_attribute("value"))) {
479  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
480  return false;
481  }
482 
483  istringstream iss(xml_attrs["value"].value);
484  long long value = 0;
485  iss >> skipws >> value;
486  if (iss.fail() || iss.bad()) {
487  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
488  xml_attrs["value"].value.c_str());
489  }
490  else if (!enum_def()->is_valid_enum_value(value)) {
491  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
492  xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
493  }
494  else {
495  // unfortunate choice of names... args are 'label' and 'value'
496  enum_def()->add_value(xml_attrs["name"].value, value);
497  }
498 
499  return true;
500 }
501 
507 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
508 {
509  Type t = get_type(name);
510  if (is_simple_type(t)) {
511  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
512  return true;
513  }
514  else {
515  switch(t) {
516  case dods_structure_c:
517  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
518  return true;
519 
520  case dods_sequence_c:
521  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
522  return true;
523 
524  default:
525  return false;
526  }
527  }
528 }
529 
537 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
538 {
539  transfer_xml_attrs(attrs, nb_attributes);
540 
541  if (check_required_attribute("name")) {
542  BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
543  if (!btp) {
544  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
545  return;
546  }
547 
548  if ((t == dods_enum_c) && check_required_attribute("enum")) {
549  D4EnumDef *enum_def = 0;
550  string enum_path = xml_attrs["enum"].value;
551  if (enum_path[0] == '/')
552  enum_def = dmr()->root()->find_enum_def(enum_path);
553  else
554  enum_def = top_group()->find_enum_def(enum_path);
555 
556  if (!enum_def)
557  dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
558 
559  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
560  }
561 
562  btp->set_is_dap4(true); // see comment above
563  push_basetype(btp);
564 
565  push_attributes(btp->attributes());
566 
567  push_state(s);
568  }
569 }
570 
577 
583 {
584  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
585  parser->error_msg = "";
586  parser->char_data = "";
587 
588  // Set this in intern_helper so that the loop test for the parser_end
589  // state works for the first iteration. It seems like XMLParseChunk calls this
590  // function on it's first run. jhrg 9/16/13
591  // parser->push_state(parser_start);
592 
593  parser->push_attributes(parser->dmr()->root()->attributes());
594 
595  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
596 }
597 
601 {
602  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
603 
604  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
605 
606  if (parser->get_state() != parser_end)
607  D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
608 
609  // If we've found any sort of error, don't make the DMR; intern() will
610  // take care of the error.
611  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
612  return;
613 
614  if (!parser->empty_basetype() || parser->empty_group())
615  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
616 
617  parser->pop_group(); // leave the stack 'clean'
618  parser->pop_attributes();
619 }
620 
621 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
622  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
623  const xmlChar **attributes)
624 {
625  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
626  const char *localname = (const char *) l;
627 
628  if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
629 
630  switch (parser->get_state()) {
631  case parser_start:
632  if (is_not(localname, "Dataset"))
633  D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
634 
635  parser->root_ns = URI ? (const char *) URI : "";
636  parser->transfer_xml_attrs(attributes, nb_attributes);
637 
638  if (parser->check_required_attribute(string("name")))
639  parser->dmr()->set_name(parser->xml_attrs["name"].value);
640 
641  if (parser->check_attribute("dapVersion"))
642  parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
643 
644  if (parser->check_attribute("dmrVersion"))
645  parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
646 
647  if (parser->check_attribute("base"))
648  parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
649 
650  if (!parser->root_ns.empty())
651  parser->dmr()->set_namespace(parser->root_ns);
652 
653  // Push the root Group on the stack
654  parser->push_group(parser->dmr()->root());
655 
656  parser->push_state(inside_dataset);
657 
658  break;
659 
660  // Both inside dataset and inside group can have the same stuff.
661  // The difference is that the Dataset holds the root group, which
662  // must be present; other groups are optional
663  case inside_dataset:
664  case inside_group:
665  if (parser->process_enum_def(localname, attributes, nb_attributes))
666  parser->push_state(inside_enum_def);
667  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
668  parser->push_state(inside_dim_def);
669  else if (parser->process_group(localname, attributes, nb_attributes))
670  parser->push_state(inside_group);
671  else if (parser->process_variable(localname, attributes, nb_attributes))
672  // This will push either inside_simple_type or inside_structure
673  // onto the parser state stack.
674  break;
675  else if (parser->process_attribute(localname, attributes, nb_attributes))
676  // This will push either inside_attribute, inside_attribute_container
677  // or inside_otherxml_attribute onto the parser state stack
678  break;
679  else
680  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
681  break;
682 
683  case inside_attribute_container:
684  if (parser->process_attribute(localname, attributes, nb_attributes))
685  break;
686  else
687  D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
688  break;
689 
690  case inside_attribute:
691  if (parser->process_attribute(localname, attributes, nb_attributes))
692  break;
693  else if (strcmp(localname, "Value") == 0)
694  parser->push_state(inside_attribute_value);
695  else
696  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
697  break;
698 
699  case inside_attribute_value:
700  // Attribute values are processed by the end element code.
701  break;
702 
703  case inside_other_xml_attribute:
704  parser->other_xml_depth++;
705 
706  // Accumulate the elements here
707  parser->other_xml.append("<");
708  if (prefix) {
709  parser->other_xml.append((const char *) prefix);
710  parser->other_xml.append(":");
711  }
712  parser->other_xml.append(localname);
713 
714  if (nb_namespaces != 0) {
715  parser->transfer_xml_ns(namespaces, nb_namespaces);
716 
717  for (map<string, string>::iterator i = parser->namespace_table.begin();
718  i != parser->namespace_table.end(); ++i) {
719  parser->other_xml.append(" xmlns");
720  if (!i->first.empty()) {
721  parser->other_xml.append(":");
722  parser->other_xml.append(i->first);
723  }
724  parser->other_xml.append("=\"");
725  parser->other_xml.append(i->second);
726  parser->other_xml.append("\"");
727  }
728  }
729 
730  if (nb_attributes != 0) {
731  parser->transfer_xml_attrs(attributes, nb_attributes);
732  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
733  parser->other_xml.append(" ");
734  if (!i->second.prefix.empty()) {
735  parser->other_xml.append(i->second.prefix);
736  parser->other_xml.append(":");
737  }
738  parser->other_xml.append(i->first);
739  parser->other_xml.append("=\"");
740  parser->other_xml.append(i->second.value);
741  parser->other_xml.append("\"");
742  }
743  }
744 
745  parser->other_xml.append(">");
746  break;
747 
748  case inside_enum_def:
749  // process an EnumConst element
750  if (parser->process_enum_const(localname, attributes, nb_attributes))
751  parser->push_state(inside_enum_const);
752  else
753  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
754  break;
755 
756  case inside_enum_const:
757  // No content; nothing to do
758  break;
759 
760  case inside_dim_def:
761  // No content; nothing to do
762  break;
763 #if 0
764  case inside_dimension:
765  // No content.
766  break;
767 #endif
768  case inside_dim:
769  // No content.
770  break;
771 
772  case inside_map:
773  // No content.
774  break;
775 
776  case inside_simple_type:
777  if (parser->process_attribute(localname, attributes, nb_attributes))
778  break;
779  else if (parser->process_dimension(localname, attributes, nb_attributes))
780  parser->push_state(inside_dim);
781  else if (parser->process_map(localname, attributes, nb_attributes))
782  parser->push_state(inside_map);
783  else
784  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
785  break;
786 
787  case inside_constructor:
788  if (parser->process_variable(localname, attributes, nb_attributes))
789  // This will push either inside_simple_type or inside_structure
790  // onto the parser state stack.
791  break;
792  else if (parser->process_attribute(localname, attributes, nb_attributes))
793  break;
794  else if (parser->process_dimension(localname, attributes, nb_attributes))
795  parser->push_state(inside_dim);
796  else if (parser->process_map(localname, attributes, nb_attributes))
797  parser->push_state(inside_map);
798  else
799  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
800  break;
801 
802  case parser_unknown:
803  // FIXME?
804  // *** Never used? If so remove/error
805  parser->push_state(parser_unknown);
806  break;
807 
808  case parser_error:
809  case parser_fatal_error:
810  break;
811 
812  case parser_end:
813  // FIXME Error?
814  break;
815  }
816 
817  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
818 }
819 
820 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
821 {
822  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
823  const char *localname = (const char *) l;
824 
825  if (parser->debug())
826  cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
827 
828  switch (parser->get_state()) {
829  case parser_start:
830  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
831  break;
832 
833  case inside_dataset:
834  if (is_not(localname, "Dataset"))
835  D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
836 
837  parser->pop_state();
838  if (parser->get_state() != parser_start)
839  dmr_fatal_error(parser, "Unexpected state, expected start state.");
840  else {
841  parser->pop_state();
842  parser->push_state(parser_end);
843  }
844  break;
845 
846  case inside_group: {
847  if (is_not(localname, "Group"))
848  D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
849 
850  if (!parser->empty_basetype() || parser->empty_group())
851  D4ParserSax2::dmr_error(parser,
852  "The document did not contain a valid root Group or contained unbalanced tags.");
853 
854  parser->pop_group();
855  parser->pop_state();
856  break;
857  }
858 
859  case inside_attribute_container:
860  if (is_not(localname, "Attribute"))
861  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
862 
863  parser->pop_state();
864  parser->pop_attributes();
865  break;
866 
867  case inside_attribute:
868  if (is_not(localname, "Attribute"))
869  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
870 
871  parser->pop_state();
872  break;
873 
874  case inside_attribute_value: {
875  if (is_not(localname, "Value"))
876  D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
877 
878  parser->pop_state();
879 
880  // The old code added more values using the name and type as
881  // indexes to find the correct attribute. Use get() for that
882  // now. Or fix this code to keep a pointer to the to attribute...
883  D4Attributes *attrs = parser->top_attributes();
884  D4Attribute *attr = attrs->get(parser->dods_attr_name);
885  if (!attr) {
886  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
887  attrs->add_attribute_nocopy(attr);
888  }
889  attr->add_value(parser->char_data);
890 
891  parser->char_data = ""; // Null this after use.
892  break;
893  }
894 
895  case inside_other_xml_attribute: {
896  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
897  parser->pop_state();
898 
899  // The old code added more values using the name and type as
900  // indexes to find the correct attribute. Use get() for that
901  // now. Or fix this code to keep a pointer to the to attribute...
902  D4Attributes *attrs = parser->top_attributes();
903  D4Attribute *attr = attrs->get(parser->dods_attr_name);
904  if (!attr) {
905  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
906  attrs->add_attribute_nocopy(attr);
907  }
908  attr->add_value(parser->other_xml);
909 
910  parser->other_xml = ""; // Null this after use.
911  }
912  else {
913  if (parser->other_xml_depth == 0) {
914  D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
915  localname);
916  break;
917  }
918  parser->other_xml_depth--;
919 
920  parser->other_xml.append("</");
921  if (prefix) {
922  parser->other_xml.append((const char *) prefix);
923  parser->other_xml.append(":");
924  }
925  parser->other_xml.append(localname);
926  parser->other_xml.append(">");
927  }
928  break;
929  }
930 
931  case inside_enum_def:
932  if (is_not(localname, "Enumeration"))
933  D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
934  if (!parser->top_group())
936  "Expected a Group to be the current item, while finishing up an Enumeration.");
937  else {
938  // copy the pointer; not a deep copy
939  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
940  // Set the enum_def to null; next call to enum_def() will
941  // allocate a new object
942  parser->clear_enum_def();
943  parser->pop_state();
944  }
945  break;
946 
947  case inside_enum_const:
948  if (is_not(localname, "EnumConst"))
949  D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
950 
951  parser->pop_state();
952  break;
953 
954  case inside_dim_def: {
955  if (is_not(localname, "Dimension"))
956  D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
957 
958  if (!parser->top_group())
959  D4ParserSax2::dmr_error(parser,
960  "Expected a Group to be the current item, while finishing up an Dimension.");
961 
962  // FIXME Use the Group on the top of the group stack
963  // copy the pointer; not a deep copy
964  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
965  //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
966  // Set the dim_def to null; next call to dim_def() will
967  // allocate a new object. Calling 'clear' is important because
968  // the cleanup method will free dim_def if it's not null and
969  // we just copied the pointer in the add_dim_nocopy() call
970  // above.
971  parser->clear_dim_def();
972  parser->pop_state();
973  break;
974  }
975 
976  case inside_simple_type:
977  if (is_simple_type(get_type(localname))) {
978  BaseType *btp = parser->top_basetype();
979  parser->pop_basetype();
980  parser->pop_attributes();
981 
982  BaseType *parent = 0;
983  if (!parser->empty_basetype())
984  parent = parser->top_basetype();
985  else if (!parser->empty_group())
986  parent = parser->top_group();
987  else {
988  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
989  localname);
990  delete btp;
991  parser->pop_state();
992  break;
993  }
994 
995  if (parent->type() == dods_array_c)
996  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
997  else
998  parent->add_var_nocopy(btp);
999  }
1000  else
1001  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1002 
1003  parser->pop_state();
1004  break;
1005 
1006  case inside_dim:
1007  if (is_not(localname, "Dim"))
1008  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1009 
1010  parser->pop_state();
1011  break;
1012 
1013  case inside_map:
1014  if (is_not(localname, "Map"))
1015  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1016 
1017  parser->pop_state();
1018  break;
1019 
1020  case inside_constructor: {
1021  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1022  D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1023  return;
1024  }
1025 
1026  BaseType *btp = parser->top_basetype();
1027  parser->pop_basetype();
1028  parser->pop_attributes();
1029 
1030  BaseType *parent = 0;
1031  if (!parser->empty_basetype())
1032  parent = parser->top_basetype();
1033  else if (!parser->empty_group())
1034  parent = parser->top_group();
1035  else {
1036  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1037  localname);
1038  delete btp;
1039  parser->pop_state();
1040  break;
1041  }
1042 
1043  // TODO Why doesn't this code mirror the simple_var case and test
1044  // for the parent being an array? jhrg 10/13/13
1045  parent->add_var_nocopy(btp);
1046  parser->pop_state();
1047  break;
1048  }
1049 
1050  case parser_unknown:
1051  parser->pop_state();
1052  break;
1053 
1054  case parser_error:
1055  case parser_fatal_error:
1056  break;
1057 
1058  case parser_end:
1059  // FIXME Error?
1060  break;
1061  }
1062 
1063  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1064 }
1065 
1069 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1070 {
1071  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1072 
1073  switch (parser->get_state()) {
1074  case inside_attribute_value:
1075  parser->char_data.append((const char *) (ch), len);
1076  DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1077  break;
1078 
1079  case inside_other_xml_attribute:
1080  parser->other_xml.append((const char *) (ch), len);
1081  DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1082  break;
1083 
1084  default:
1085  break;
1086  }
1087 }
1088 
1093 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1094 {
1095  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1096 
1097  switch (parser->get_state()) {
1098  case inside_other_xml_attribute:
1099  parser->other_xml.append((const char *) (ch), len);
1100  break;
1101 
1102  default:
1103  break;
1104  }
1105 }
1106 
1112 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1113 {
1114  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1115 
1116  switch (parser->get_state()) {
1117  case inside_other_xml_attribute:
1118  parser->other_xml.append((const char *) (value), len);
1119  break;
1120 
1121  case parser_unknown:
1122  break;
1123 
1124  default:
1125  D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1126 
1127  break;
1128  }
1129 }
1130 
1135 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1136 {
1137  return xmlGetPredefinedEntity(name);
1138 }
1139 
1150 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1151 {
1152  va_list args;
1153  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1154 
1155  parser->push_state(parser_fatal_error);
1156 
1157  va_start(args, msg);
1158  char str[1024];
1159  vsnprintf(str, 1024, msg, args);
1160  va_end(args);
1161 
1162  int line = xmlSAX2GetLineNumber(parser->context);
1163 
1164  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1165  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1166 }
1167 
1168 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1169 {
1170  va_list args;
1171  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1172 
1173  parser->push_state(parser_error);
1174 
1175  va_start(args, msg);
1176  char str[1024];
1177  vsnprintf(str, 1024, msg, args);
1178  va_end(args);
1179 
1180  int line = xmlSAX2GetLineNumber(parser->context);
1181 
1182  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1183  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1184 }
1186 
1190 void D4ParserSax2::cleanup_parse()
1191 {
1192  bool wellFormed = context->wellFormed;
1193  bool valid = context->valid;
1194 
1195  context->sax = NULL;
1196  xmlFreeParserCtxt(context);
1197 
1198  delete d_enum_def;
1199  d_enum_def = 0;
1200 
1201  delete d_dim_def;
1202  d_dim_def = 0;
1203 
1204  // If there's an error, there may still be items on the stack at the
1205  // end of the parse.
1206  while (!btp_stack.empty()) {
1207  delete top_basetype();
1208  pop_basetype();
1209  }
1210 
1211  if (!wellFormed)
1212  throw Error("The DMR was not well formed. " + error_msg);
1213  else if (!valid)
1214  throw Error("The DMR was not valid." + error_msg);
1215  else if (get_state() == parser_error)
1216  throw Error(error_msg);
1217  else if (get_state() == parser_fatal_error)
1218  throw InternalErr(error_msg);
1219 }
1220 
1235 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1236 {
1237  d_debug = debug;
1238 
1239  // Code example from libxml2 docs re: read from a stream.
1240 
1241  if (!f.good())
1242  throw Error("Input stream not open or read error");
1243  if (!dest_dmr)
1244  throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1245 
1246  d_dmr = dest_dmr; // dump values here
1247 
1248  const int size = 1024;
1249  char chars[size];
1250  int line = 1;
1251 
1252  f.getline(chars, size);
1253  int res = f.gcount();
1254  if (res == 0) throw Error("No input found while parsing the DMR.");
1255 
1256  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1257 
1258  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
1259  context->validate = true;
1260  push_state(parser_start);
1261 
1262  f.getline(chars, size);
1263  while ((f.gcount() > 0) && (get_state() != parser_end)) {
1264  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1265  xmlParseChunk(context, chars, f.gcount() - 1, 0);
1266  f.getline(chars, size);
1267  }
1268 
1269  // This call ends the parse.
1270  xmlParseChunk(context, chars, 0, 1/*terminate*/);
1271 
1272  // This checks that the state on the parser stack is parser_end and throws
1273  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1274  cleanup_parse();
1275 }
1276 
1287 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1288 {
1289  intern(document.c_str(), document.length(), dest_dmr, debug);
1290 }
1291 
1302 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1303 {
1304  if (!(size > 0)) return;
1305 
1306  d_debug = debug;
1307 
1308  // Code example from libxml2 docs re: read from a stream.
1309 
1310  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1311  d_dmr = dest_dmr; // dump values in dest_dmr
1312 
1313  push_state(parser_start);
1314  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
1315  context->validate = true;
1316  //push_state(parser_start);
1317  //xmlParseChunk(context, buffer, size, 0);
1318 
1319  // This call ends the parse.
1320  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1321 
1322  // This checks that the state on the parser stack is parser_end and throws
1323  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1324  cleanup_parse();
1325 }
1326 
1327 } // namespace libdap
static void dmr_end_document(void *parser)
void set_namespace(const string &ns)
Set the namespace for this DDS/DDX object/response.
Definition: DMR.h:158
bool is_valid_enum_value(long long value)
Definition: D4EnumDefs.cc:43
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:269
D4Group * root()
Definition: DMR.cc:242
static void dmr_start_document(void *parser)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:813
void set_request_xml_base(const string &xb)
Definition: DMR.h:152
Type
Identifies the data type.
Definition: Type.h:94
virtual Type type() const
Returns the type of the class instance.
Definition: BaseType.cc:310
A class for software fault reporting.
Definition: InternalErr.h:64
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
void add_dim_nocopy(D4Dimension *dim)
Definition: D4Dimensions.h:160
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:125
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:690
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:771
virtual D4Attributes * attributes()
Definition: BaseType.cc:544
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
void set_dap_version(const string &version_string)
Definition: DMR.cc:254
D4Attribute * get(const string &fqn)
A class for error processing.
Definition: Error.h:90
virtual std::string FQN() const
Definition: D4Group.cc:177
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition: D4Group.h:95
static void dmr_fatal_error(void *parser, const char *msg,...)
bool is_integer_type(Type t)
Definition: util.cc:902
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition: D4Group.h:80
virtual BaseType * NewVariable(Type t, const string &name) const