Package translate :: Package misc :: Module ourdom
[hide private]
[frames] | no frames]

Source Code for Module translate.misc.ourdom

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2007 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """module that provides modified DOM functionality for our needs 
 24   
 25  Note that users of ourdom should ensure that no code might still use classes 
 26  directly from minidom, like minidom.Element, minidom.Document or methods such 
 27  as minidom.parseString, since the functionality provided here will not be in 
 28  those objects. 
 29  """ 
 30   
 31  from xml.dom import minidom 
 32  from xml.dom import expatbuilder 
 33   
 34  # helper functions we use to do xml the way we want, used by modified classes below 
 35   
 36   
37 -def writexml_helper(self, writer, indent="", addindent="", newl=""):
38 """A replacement for writexml that formats it like typical XML files. 39 Nodes are intendented but text nodes, where whitespace can be significant, are not indented.""" 40 # indent = current indentation 41 # addindent = indentation to add to higher levels 42 # newl = newline string 43 writer.write(indent+"<" + self.tagName) 44 45 attrs = self._get_attributes() 46 a_names = attrs.keys() 47 a_names.sort() 48 49 for a_name in a_names: 50 writer.write(" %s=\"" % a_name) 51 minidom._write_data(writer, attrs[a_name].value) 52 writer.write("\"") 53 if self.childNodes: 54 # We need to write text nodes without newline and indentation, so 55 # we handle them differently. Note that we here assume that "empty" 56 # text nodes can be done away with (see the strip()). Note also that 57 # nested tags in a text node (like ph tags in xliff) should also not 58 # have newlines and indentation or an extra newline, since that will 59 # alter the text node. 60 haveText = False 61 for childNode in self.childNodes: 62 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip(): 63 haveText = True 64 break 65 if haveText: 66 writer.write(">") 67 for node in self.childNodes: 68 node.writexml(writer, "", "", "") 69 writer.write("</%s>%s" % (self.tagName, newl)) 70 else: 71 # This is the normal case that we do with pretty layout 72 writer.write(">%s"%(newl)) 73 for node in self.childNodes: 74 if node.nodeType != self.TEXT_NODE: 75 node.writexml(writer, indent+addindent, addindent, newl) 76 writer.write("%s</%s>%s" % (indent, self.tagName, newl)) 77 else: 78 writer.write("/>%s"%(newl))
79 80
81 -def getElementsByTagName_helper(parent, name, dummy=None):
82 """A reimplementation of getElementsByTagName as an iterator. 83 84 Note that this is not compatible with getElementsByTagName that returns a 85 list, therefore, the class below exposes this through yieldElementsByTagName""" 86 87 for node in parent.childNodes: 88 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 89 (name == "*" or node.tagName == name): 90 yield node 91 if node.hasChildNodes(): 92 for othernode in node.getElementsByTagName(name): 93 yield othernode
94 95
96 -def searchElementsByTagName_helper(parent, name, onlysearch):
97 """limits the search to within tags occuring in onlysearch""" 98 for node in parent.childNodes: 99 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 100 (name == "*" or node.tagName == name): 101 yield node 102 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch: 103 for node in node.searchElementsByTagName(name, onlysearch): 104 yield node
105 106
107 -def getFirstElementByTagName(node, name):
108 results = node.yieldElementsByTagName(name) 109 # if isinstance(results, list): 110 # if len(results) == 0: 111 # return None 112 # else: 113 # return results[0] 114 try: 115 result = results.next() 116 return result 117 except StopIteration: 118 return None
119 120
121 -def getnodetext(node):
122 """returns the node's text by iterating through the child nodes""" 123 if node is None: 124 return "" 125 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
126 127 # various modifications to minidom classes to add functionality we like 128 129
130 -class DOMImplementation(minidom.DOMImplementation):
131
132 - def _create_document(self):
133 return Document()
134 135
136 -class Element(minidom.Element):
137
138 - def yieldElementsByTagName(self, name):
140
141 - def searchElementsByTagName(self, name, onlysearch):
142 return searchElementsByTagName_helper(self, name, onlysearch)
143
144 - def writexml(self, writer, indent, addindent, newl):
145 return writexml_helper(self, writer, indent, addindent, newl)
146 147
148 -class Document(minidom.Document):
149 implementation = DOMImplementation() 150
151 - def yieldElementsByTagName(self, name):
153
154 - def searchElementsByTagName(self, name, onlysearch):
155 return searchElementsByTagName_helper(self, name, onlysearch)
156
157 - def createElement(self, tagName):
158 e = Element(tagName) 159 e.ownerDocument = self 160 return e
161
162 - def createElementNS(self, namespaceURI, qualifiedName):
163 prefix, localName = _nssplit(qualifiedName) 164 e = Element(qualifiedName, namespaceURI, prefix) 165 e.ownerDocument = self 166 return e
167 168 theDOMImplementation = DOMImplementation() 169 170 # an ExpatBuilder that allows us to use the above modifications 171 172
173 -class ExpatBuilderNS(expatbuilder.ExpatBuilderNS):
174
175 - def reset(self):
176 """Free all data structures used during DOM construction.""" 177 self.document = theDOMImplementation.createDocument( 178 expatbuilder.EMPTY_NAMESPACE, None, None) 179 self.curNode = self.document 180 self._elem_info = self.document._elem_info 181 self._cdata = False 182 self._initNamespaces()
183
184 - def start_element_handler(self, name, attributes):
185 # all we want to do is construct our own Element instead of minidom.Element 186 # unfortunately the only way to do this is to copy this whole function from expatbuilder.py 187 if ' ' in name: 188 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 189 else: 190 uri = expatbuilder.EMPTY_NAMESPACE 191 qname = name 192 localname = None 193 prefix = expatbuilder.EMPTY_PREFIX 194 node = Element(qname, uri, prefix, localname) 195 node.ownerDocument = self.document 196 expatbuilder._append_child(self.curNode, node) 197 self.curNode = node 198 199 if self._ns_ordered_prefixes: 200 for prefix, uri in self._ns_ordered_prefixes: 201 if prefix: 202 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix), 203 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns") 204 else: 205 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE, 206 "xmlns", expatbuilder.EMPTY_PREFIX) 207 d = a.childNodes[0].__dict__ 208 d['data'] = d['nodeValue'] = uri 209 d = a.__dict__ 210 d['value'] = d['nodeValue'] = uri 211 d['ownerDocument'] = self.document 212 expatbuilder._set_attribute_node(node, a) 213 del self._ns_ordered_prefixes[:] 214 215 if attributes: 216 _attrs = node._attrs 217 _attrsNS = node._attrsNS 218 for i in range(0, len(attributes), 2): 219 aname = attributes[i] 220 value = attributes[i+1] 221 if ' ' in aname: 222 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname) 223 a = minidom.Attr(qname, uri, localname, prefix) 224 _attrs[qname] = a 225 _attrsNS[(uri, localname)] = a 226 else: 227 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE, 228 aname, expatbuilder.EMPTY_PREFIX) 229 _attrs[aname] = a 230 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a 231 d = a.childNodes[0].__dict__ 232 d['data'] = d['nodeValue'] = value 233 d = a.__dict__ 234 d['ownerDocument'] = self.document 235 d['value'] = d['nodeValue'] = value 236 d['ownerElement'] = node
237 238 if __debug__: 239 # This only adds some asserts to the original 240 # end_element_handler(), so we only define this when -O is not 241 # used. If changing one, be sure to check the other to see if 242 # it needs to be changed as well. 243
244 - def end_element_handler(self, name):
245 curNode = self.curNode 246 if ' ' in name: 247 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 248 assert (curNode.namespaceURI == uri 249 and curNode.localName == localname 250 and curNode.prefix == prefix), \ 251 "element stack messed up! (namespace)" 252 else: 253 assert curNode.nodeName == name, \ 254 "element stack messed up - bad nodeName" 255 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \ 256 "element stack messed up - bad namespaceURI" 257 self.curNode = curNode.parentNode 258 self._finish_end_element(curNode)
259 260 # parser methods that use our modified xml classes 261 262
263 -def parse(file, parser=None, bufsize=None):
264 """Parse a file into a DOM by filename or file object.""" 265 builder = ExpatBuilderNS() 266 if isinstance(file, basestring): 267 fp = open(file, 'rb') 268 try: 269 result = builder.parseFile(fp) 270 finally: 271 fp.close() 272 else: 273 result = builder.parseFile(file) 274 return result
275 276
277 -def parseString(string, parser=None):
278 """Parse a file into a DOM from a string.""" 279 builder = ExpatBuilderNS() 280 return builder.parseString(string)
281