Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.5" 
  61  __versionTime__ = "13 Jun 2016 19:59 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import traceback 
  74  from datetime import datetime 
  75   
  76  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  77   
  78  __all__ = [ 
  79  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  80  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  81  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  82  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  83  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  84  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  85  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  86  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  87  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  88  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  89  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  90  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  91  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  92  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  93  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  94  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  95  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  96  'tokenMap', 'pyparsing_common', 
  97  ] 
  98   
  99  system_version = tuple(sys.version_info)[:3] 
 100  PY_3 = system_version[0] == 3 
 101  if PY_3: 
 102      _MAX_INT = sys.maxsize 
 103      basestring = str 
 104      unichr = chr 
 105      _ustr = str 
 106   
 107      # build list of single arg builtins, that can be used as parse actions 
 108      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 109   
 110  else: 
 111      _MAX_INT = sys.maxint 
 112      range = xrange 
113 114 - def _ustr(obj):
115 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 116 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 117 then < returns the unicode object | encodes it with the default encoding | ... >. 118 """ 119 if isinstance(obj,unicode): 120 return obj 121 122 try: 123 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 124 # it won't break any existing code. 125 return str(obj) 126 127 except UnicodeEncodeError: 128 # Else encode it 129 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 130 xmlcharref = Regex('&#\d+;') 131 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 132 return xmlcharref.transformString(ret)
133 134 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 135 singleArgBuiltins = [] 136 import __builtin__ 137 for fname in "sum len sorted reversed list tuple set any all min max".split(): 138 try: 139 singleArgBuiltins.append(getattr(__builtin__,fname)) 140 except AttributeError: 141 continue 142 143 _generatorType = type((y for y in range(1)))
144 145 -def _xml_escape(data):
146 """Escape &, <, >, ", ', etc. in a string of data.""" 147 148 # ampersand must be replaced first 149 from_symbols = '&><"\'' 150 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 151 for from_,to_ in zip(from_symbols, to_symbols): 152 data = data.replace(from_, to_) 153 return data
154
155 -class _Constants(object):
156 pass
157 158 alphas = string.ascii_uppercase + string.ascii_lowercase 159 nums = "0123456789" 160 hexnums = nums + "ABCDEFabcdef" 161 alphanums = alphas + nums 162 _bslash = chr(92) 163 printables = "".join(c for c in string.printable if c not in string.whitespace)
164 165 -class ParseBaseException(Exception):
166 """base exception class for all parsing runtime exceptions""" 167 # Performance tuning: we construct a *lot* of these, so keep this 168 # constructor as small and fast as possible
169 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
170 self.loc = loc 171 if msg is None: 172 self.msg = pstr 173 self.pstr = "" 174 else: 175 self.msg = msg 176 self.pstr = pstr 177 self.parserElement = elem
178
179 - def __getattr__( self, aname ):
180 """supported attributes by name are: 181 - lineno - returns the line number of the exception text 182 - col - returns the column number of the exception text 183 - line - returns the line containing the exception text 184 """ 185 if( aname == "lineno" ): 186 return lineno( self.loc, self.pstr ) 187 elif( aname in ("col", "column") ): 188 return col( self.loc, self.pstr ) 189 elif( aname == "line" ): 190 return line( self.loc, self.pstr ) 191 else: 192 raise AttributeError(aname)
193
194 - def __str__( self ):
195 return "%s (at char %d), (line:%d, col:%d)" % \ 196 ( self.msg, self.loc, self.lineno, self.column )
197 - def __repr__( self ):
198 return _ustr(self)
199 - def markInputline( self, markerString = ">!<" ):
200 """Extracts the exception line from the input string, and marks 201 the location of the exception with a special symbol. 202 """ 203 line_str = self.line 204 line_column = self.column - 1 205 if markerString: 206 line_str = "".join((line_str[:line_column], 207 markerString, line_str[line_column:])) 208 return line_str.strip()
209 - def __dir__(self):
210 return "lineno col line".split() + dir(type(self))
211
212 -class ParseException(ParseBaseException):
213 """exception thrown when parse expressions don't match class; 214 supported attributes by name are: 215 - lineno - returns the line number of the exception text 216 - col - returns the column number of the exception text 217 - line - returns the line containing the exception text 218 """ 219 pass
220
221 -class ParseFatalException(ParseBaseException):
222 """user-throwable exception thrown when inconsistent parse content 223 is found; stops all parsing immediately""" 224 pass
225
226 -class ParseSyntaxException(ParseFatalException):
227 """just like C{L{ParseFatalException}}, but thrown internally when an 228 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 229 an unbacktrackable syntax error has been found"""
230 - def __init__(self, pe):
231 super(ParseSyntaxException, self).__init__( 232 pe.pstr, pe.loc, pe.msg, pe.parserElement)
233
234 #~ class ReparseException(ParseBaseException): 235 #~ """Experimental class - parse actions can raise this exception to cause 236 #~ pyparsing to reparse the input string: 237 #~ - with a modified input string, and/or 238 #~ - with a modified start location 239 #~ Set the values of the ReparseException in the constructor, and raise the 240 #~ exception in a parse action to cause pyparsing to use the new string/location. 241 #~ Setting the values as None causes no change to be made. 242 #~ """ 243 #~ def __init_( self, newstring, restartLoc ): 244 #~ self.newParseText = newstring 245 #~ self.reparseLoc = restartLoc 246 247 -class RecursiveGrammarException(Exception):
248 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
249 - def __init__( self, parseElementList ):
250 self.parseElementTrace = parseElementList
251
252 - def __str__( self ):
253 return "RecursiveGrammarException: %s" % self.parseElementTrace
254
255 -class _ParseResultsWithOffset(object):
256 - def __init__(self,p1,p2):
257 self.tup = (p1,p2)
258 - def __getitem__(self,i):
259 return self.tup[i]
260 - def __repr__(self):
261 return repr(self.tup)
262 - def setOffset(self,i):
263 self.tup = (self.tup[0],i)
264
265 -class ParseResults(object):
266 """Structured parse results, to provide multiple means of access to the parsed data: 267 - as a list (C{len(results)}) 268 - by list index (C{results[0], results[1]}, etc.) 269 - by attribute (C{results.<resultsName>}) 270 """
271 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
272 if isinstance(toklist, cls): 273 return toklist 274 retobj = object.__new__(cls) 275 retobj.__doinit = True 276 return retobj
277 278 # Performance tuning: we construct a *lot* of these, so keep this 279 # constructor as small and fast as possible
280 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
281 if self.__doinit: 282 self.__doinit = False 283 self.__name = None 284 self.__parent = None 285 self.__accumNames = {} 286 self.__asList = asList 287 self.__modal = modal 288 if toklist is None: 289 toklist = [] 290 if isinstance(toklist, list): 291 self.__toklist = toklist[:] 292 elif isinstance(toklist, _generatorType): 293 self.__toklist = list(toklist) 294 else: 295 self.__toklist = [toklist] 296 self.__tokdict = dict() 297 298 if name is not None and name: 299 if not modal: 300 self.__accumNames[name] = 0 301 if isinstance(name,int): 302 name = _ustr(name) # will always return a str, but use _ustr for consistency 303 self.__name = name 304 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 305 if isinstance(toklist,basestring): 306 toklist = [ toklist ] 307 if asList: 308 if isinstance(toklist,ParseResults): 309 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 310 else: 311 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 312 self[name].__name = name 313 else: 314 try: 315 self[name] = toklist[0] 316 except (KeyError,TypeError,IndexError): 317 self[name] = toklist
318
319 - def __getitem__( self, i ):
320 if isinstance( i, (int,slice) ): 321 return self.__toklist[i] 322 else: 323 if i not in self.__accumNames: 324 return self.__tokdict[i][-1][0] 325 else: 326 return ParseResults([ v[0] for v in self.__tokdict[i] ])
327
328 - def __setitem__( self, k, v, isinstance=isinstance ):
329 if isinstance(v,_ParseResultsWithOffset): 330 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 331 sub = v[0] 332 elif isinstance(k,(int,slice)): 333 self.__toklist[k] = v 334 sub = v 335 else: 336 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 337 sub = v 338 if isinstance(sub,ParseResults): 339 sub.__parent = wkref(self)
340
341 - def __delitem__( self, i ):
342 if isinstance(i,(int,slice)): 343 mylen = len( self.__toklist ) 344 del self.__toklist[i] 345 346 # convert int to slice 347 if isinstance(i, int): 348 if i < 0: 349 i += mylen 350 i = slice(i, i+1) 351 # get removed indices 352 removed = list(range(*i.indices(mylen))) 353 removed.reverse() 354 # fixup indices in token dictionary 355 #~ for name in self.__tokdict: 356 #~ occurrences = self.__tokdict[name] 357 #~ for j in removed: 358 #~ for k, (value, position) in enumerate(occurrences): 359 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 360 for name,occurrences in self.__tokdict.items(): 361 for j in removed: 362 for k, (value, position) in enumerate(occurrences): 363 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 364 else: 365 del self.__tokdict[i]
366
367 - def __contains__( self, k ):
368 return k in self.__tokdict
369
370 - def __len__( self ): return len( self.__toklist )
371 - def __bool__(self): return ( not not self.__toklist )
372 __nonzero__ = __bool__
373 - def __iter__( self ): return iter( self.__toklist )
374 - def __reversed__( self ): return iter( self.__toklist[::-1] )
375 - def _iterkeys( self ):
376 if hasattr(self.__tokdict, "iterkeys"): 377 return self.__tokdict.iterkeys() 378 else: 379 return iter(self.__tokdict)
380
381 - def _itervalues( self ):
382 return (self[k] for k in self._iterkeys())
383
384 - def _iteritems( self ):
385 return ((k, self[k]) for k in self._iterkeys())
386 387 if PY_3: 388 keys = _iterkeys 389 """Returns an iterator of all named result keys (Python 3.x only).""" 390 391 values = _itervalues 392 """Returns an iterator of all named result values (Python 3.x only).""" 393 394 items = _iteritems 395 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 396 397 else: 398 iterkeys = _iterkeys 399 """Returns an iterator of all named result keys (Python 2.x only).""" 400 401 itervalues = _itervalues 402 """Returns an iterator of all named result values (Python 2.x only).""" 403 404 iteritems = _iteritems 405 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 406
407 - def keys( self ):
408 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 409 return list(self.iterkeys())
410
411 - def values( self ):
412 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 413 return list(self.itervalues())
414
415 - def items( self ):
416 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 417 return list(self.iteritems())
418
419 - def haskeys( self ):
420 """Since keys() returns an iterator, this method is helpful in bypassing 421 code that looks for the existence of any defined results names.""" 422 return bool(self.__tokdict)
423
424 - def pop( self, *args, **kwargs):
425 """Removes and returns item at specified index (default=last). 426 Supports both list and dict semantics for pop(). If passed no 427 argument or an integer argument, it will use list semantics 428 and pop tokens from the list of parsed tokens. If passed a 429 non-integer argument (most likely a string), it will use dict 430 semantics and pop the corresponding value from any defined 431 results names. A second default return value argument is 432 supported, just as in dict.pop().""" 433 if not args: 434 args = [-1] 435 for k,v in kwargs.items(): 436 if k == 'default': 437 args = (args[0], v) 438 else: 439 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 440 if (isinstance(args[0], int) or 441 len(args) == 1 or 442 args[0] in self): 443 index = args[0] 444 ret = self[index] 445 del self[index] 446 return ret 447 else: 448 defaultvalue = args[1] 449 return defaultvalue
450
451 - def get(self, key, defaultValue=None):
452 """Returns named result matching the given key, or if there is no 453 such name, then returns the given C{defaultValue} or C{None} if no 454 C{defaultValue} is specified.""" 455 if key in self: 456 return self[key] 457 else: 458 return defaultValue
459
460 - def insert( self, index, insStr ):
461 """Inserts new element at location index in the list of parsed tokens.""" 462 self.__toklist.insert(index, insStr) 463 # fixup indices in token dictionary 464 #~ for name in self.__tokdict: 465 #~ occurrences = self.__tokdict[name] 466 #~ for k, (value, position) in enumerate(occurrences): 467 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 468 for name,occurrences in self.__tokdict.items(): 469 for k, (value, position) in enumerate(occurrences): 470 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
471
472 - def append( self, item ):
473 """Add single element to end of ParseResults list of elements.""" 474 self.__toklist.append(item)
475
476 - def extend( self, itemseq ):
477 """Add sequence of elements to end of ParseResults list of elements.""" 478 if isinstance(itemseq, ParseResults): 479 self += itemseq 480 else: 481 self.__toklist.extend(itemseq)
482
483 - def clear( self ):
484 """Clear all elements and results names.""" 485 del self.__toklist[:] 486 self.__tokdict.clear()
487
488 - def __getattr__( self, name ):
489 try: 490 return self[name] 491 except KeyError: 492 return "" 493 494 if name in self.__tokdict: 495 if name not in self.__accumNames: 496 return self.__tokdict[name][-1][0] 497 else: 498 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 499 else: 500 return ""
501
502 - def __add__( self, other ):
503 ret = self.copy() 504 ret += other 505 return ret
506
507 - def __iadd__( self, other ):
508 if other.__tokdict: 509 offset = len(self.__toklist) 510 addoffset = lambda a: offset if a<0 else a+offset 511 otheritems = other.__tokdict.items() 512 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 513 for (k,vlist) in otheritems for v in vlist] 514 for k,v in otherdictitems: 515 self[k] = v 516 if isinstance(v[0],ParseResults): 517 v[0].__parent = wkref(self) 518 519 self.__toklist += other.__toklist 520 self.__accumNames.update( other.__accumNames ) 521 return self
522
523 - def __radd__(self, other):
524 if isinstance(other,int) and other == 0: 525 # useful for merging many ParseResults using sum() builtin 526 return self.copy() 527 else: 528 # this may raise a TypeError - so be it 529 return other + self
530
531 - def __repr__( self ):
532 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
533
534 - def __str__( self ):
535 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
536
537 - def _asStringList( self, sep='' ):
538 out = [] 539 for item in self.__toklist: 540 if out and sep: 541 out.append(sep) 542 if isinstance( item, ParseResults ): 543 out += item._asStringList() 544 else: 545 out.append( _ustr(item) ) 546 return out
547
548 - def asList( self ):
549 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 550 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
551
552 - def asDict( self ):
553 """Returns the named parse results as a nested dictionary.""" 554 if PY_3: 555 item_fn = self.items 556 else: 557 item_fn = self.iteritems 558 559 def toItem(obj): 560 if isinstance(obj, ParseResults): 561 if obj.haskeys(): 562 return obj.asDict() 563 else: 564 return [toItem(v) for v in obj] 565 else: 566 return obj
567 568 return dict((k,toItem(v)) for k,v in item_fn())
569
570 - def copy( self ):
571 """Returns a new copy of a C{ParseResults} object.""" 572 ret = ParseResults( self.__toklist ) 573 ret.__tokdict = self.__tokdict.copy() 574 ret.__parent = self.__parent 575 ret.__accumNames.update( self.__accumNames ) 576 ret.__name = self.__name 577 return ret
578
579 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
580 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 581 nl = "\n" 582 out = [] 583 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 584 for v in vlist) 585 nextLevelIndent = indent + " " 586 587 # collapse out indents if formatting is not desired 588 if not formatted: 589 indent = "" 590 nextLevelIndent = "" 591 nl = "" 592 593 selfTag = None 594 if doctag is not None: 595 selfTag = doctag 596 else: 597 if self.__name: 598 selfTag = self.__name 599 600 if not selfTag: 601 if namedItemsOnly: 602 return "" 603 else: 604 selfTag = "ITEM" 605 606 out += [ nl, indent, "<", selfTag, ">" ] 607 608 for i,res in enumerate(self.__toklist): 609 if isinstance(res,ParseResults): 610 if i in namedItems: 611 out += [ res.asXML(namedItems[i], 612 namedItemsOnly and doctag is None, 613 nextLevelIndent, 614 formatted)] 615 else: 616 out += [ res.asXML(None, 617 namedItemsOnly and doctag is None, 618 nextLevelIndent, 619 formatted)] 620 else: 621 # individual token, see if there is a name for it 622 resTag = None 623 if i in namedItems: 624 resTag = namedItems[i] 625 if not resTag: 626 if namedItemsOnly: 627 continue 628 else: 629 resTag = "ITEM" 630 xmlBodyText = _xml_escape(_ustr(res)) 631 out += [ nl, nextLevelIndent, "<", resTag, ">", 632 xmlBodyText, 633 "</", resTag, ">" ] 634 635 out += [ nl, indent, "</", selfTag, ">" ] 636 return "".join(out)
637
638 - def __lookup(self,sub):
639 for k,vlist in self.__tokdict.items(): 640 for v,loc in vlist: 641 if sub is v: 642 return k 643 return None
644
645 - def getName(self):
646 """Returns the results name for this token expression.""" 647 if self.__name: 648 return self.__name 649 elif self.__parent: 650 par = self.__parent() 651 if par: 652 return par.__lookup(self) 653 else: 654 return None 655 elif (len(self) == 1 and 656 len(self.__tokdict) == 1 and 657 self.__tokdict.values()[0][0][1] in (0,-1)): 658 return self.__tokdict.keys()[0] 659 else: 660 return None
661
662 - def dump(self,indent='',depth=0):
663 """Diagnostic method for listing out the contents of a C{ParseResults}. 664 Accepts an optional C{indent} argument so that this string can be embedded 665 in a nested display of other data.""" 666 out = [] 667 NL = '\n' 668 out.append( indent+_ustr(self.asList()) ) 669 if self.haskeys(): 670 items = sorted(self.items()) 671 for k,v in items: 672 if out: 673 out.append(NL) 674 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 675 if isinstance(v,ParseResults): 676 if v: 677 out.append( v.dump(indent,depth+1) ) 678 else: 679 out.append(_ustr(v)) 680 else: 681 out.append(_ustr(v)) 682 elif any(isinstance(vv,ParseResults) for vv in self): 683 v = self 684 for i,vv in enumerate(v): 685 if isinstance(vv,ParseResults): 686 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 687 else: 688 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 689 690 return "".join(out)
691
692 - def pprint(self, *args, **kwargs):
693 """Pretty-printer for parsed results as a list, using the C{pprint} module. 694 Accepts additional positional or keyword args as defined for the 695 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 696 pprint.pprint(self.asList(), *args, **kwargs)
697 698 # add support for pickle protocol
699 - def __getstate__(self):
700 return ( self.__toklist, 701 ( self.__tokdict.copy(), 702 self.__parent is not None and self.__parent() or None, 703 self.__accumNames, 704 self.__name ) )
705
706 - def __setstate__(self,state):
707 self.__toklist = state[0] 708 (self.__tokdict, 709 par, 710 inAccumNames, 711 self.__name) = state[1] 712 self.__accumNames = {} 713 self.__accumNames.update(inAccumNames) 714 if par is not None: 715 self.__parent = wkref(par) 716 else: 717 self.__parent = None
718
719 - def __getnewargs__(self):
720 return self.__toklist, self.__name, self.__asList, self.__modal
721
722 - def __dir__(self):
723 return (dir(type(self)) + list(self.keys()))
724 725 collections.MutableMapping.register(ParseResults)
726 727 -def col (loc,strg):
728 """Returns current column within a string, counting newlines as line separators. 729 The first column is number 1. 730 731 Note: the default parsing behavior is to expand tabs in the input string 732 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 733 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 734 consistent view of the parsed string, the parse location, and line and column 735 positions within the parsed string. 736 """ 737 s = strg 738 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
739
740 -def lineno(loc,strg):
741 """Returns current line number within a string, counting newlines as line separators. 742 The first line is number 1. 743 744 Note: the default parsing behavior is to expand tabs in the input string 745 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 746 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 747 consistent view of the parsed string, the parse location, and line and column 748 positions within the parsed string. 749 """ 750 return strg.count("\n",0,loc) + 1
751
752 -def line( loc, strg ):
753 """Returns the line of text containing loc within a string, counting newlines as line separators. 754 """ 755 lastCR = strg.rfind("\n", 0, loc) 756 nextCR = strg.find("\n", loc) 757 if nextCR >= 0: 758 return strg[lastCR+1:nextCR] 759 else: 760 return strg[lastCR+1:]
761
762 -def _defaultStartDebugAction( instring, loc, expr ):
763 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
764
765 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
766 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
767
768 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
769 print ("Exception raised:" + _ustr(exc))
770
771 -def nullDebugAction(*args):
772 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 773 pass
774 775 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 776 #~ 'decorator to trim function calls to match the arity of the target' 777 #~ def _trim_arity(func, maxargs=3): 778 #~ if func in singleArgBuiltins: 779 #~ return lambda s,l,t: func(t) 780 #~ limit = 0 781 #~ foundArity = False 782 #~ def wrapper(*args): 783 #~ nonlocal limit,foundArity 784 #~ while 1: 785 #~ try: 786 #~ ret = func(*args[limit:]) 787 #~ foundArity = True 788 #~ return ret 789 #~ except TypeError: 790 #~ if limit == maxargs or foundArity: 791 #~ raise 792 #~ limit += 1 793 #~ continue 794 #~ return wrapper 795 796 # this version is Python 2.x-3.x cross-compatible 797 'decorator to trim function calls to match the arity of the target'
798 -def _trim_arity(func, maxargs=2):
799 if func in singleArgBuiltins: 800 return lambda s,l,t: func(t) 801 limit = [0] 802 foundArity = [False] 803 804 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 805 if system_version[:2] >= (3,5): 806 def extract_stack(): 807 # special handling for Python 3.5.0 - extra deep call stack by 1 808 offset = -3 if system_version == (3,5,0) else -2 809 frame_summary = traceback.extract_stack()[offset] 810 return [(frame_summary.filename, frame_summary.lineno)]
811 def extract_tb(tb): 812 frames = traceback.extract_tb(tb) 813 frame_summary = frames[-1] 814 return [(frame_summary.filename, frame_summary.lineno)] 815 else: 816 extract_stack = traceback.extract_stack 817 extract_tb = traceback.extract_tb 818 819 # synthesize what would be returned by traceback.extract_stack at the call to 820 # user's parse action 'func', so that we don't incur call penalty at parse time 821 822 LINE_DIFF = 6 823 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 824 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 825 this_line = extract_stack()[-1] 826 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 827 828 def wrapper(*args): 829 while 1: 830 try: 831 ret = func(*args[limit[0]:]) 832 foundArity[0] = True 833 return ret 834 except TypeError: 835 # re-raise TypeErrors if they did not come from our arity testing 836 if foundArity[0]: 837 raise 838 else: 839 try: 840 tb = sys.exc_info()[-1] 841 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 842 raise 843 finally: 844 del tb 845 846 if limit[0] <= maxargs: 847 limit[0] += 1 848 continue 849 raise 850 851 # copy func name to wrapper for sensible debug output 852 func_name = "<parse action>" 853 try: 854 func_name = getattr(func, '__name__', 855 getattr(func, '__class__').__name__) 856 except Exception: 857 func_name = str(func) 858 wrapper.__name__ = func_name 859 860 return wrapper 861
862 -class ParserElement(object):
863 """Abstract base level parser element class.""" 864 DEFAULT_WHITE_CHARS = " \n\t\r" 865 verbose_stacktrace = False 866 867 @staticmethod
868 - def setDefaultWhitespaceChars( chars ):
869 """Overrides the default whitespace chars 870 """ 871 ParserElement.DEFAULT_WHITE_CHARS = chars
872 873 @staticmethod
874 - def inlineLiteralsUsing(cls):
875 """ 876 Set class to be used for inclusion of string literals into a parser. 877 """ 878 ParserElement._literalStringClass = cls
879
880 - def __init__( self, savelist=False ):
881 self.parseAction = list() 882 self.failAction = None 883 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 884 self.strRepr = None 885 self.resultsName = None 886 self.saveAsList = savelist 887 self.skipWhitespace = True 888 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 889 self.copyDefaultWhiteChars = True 890 self.mayReturnEmpty = False # used when checking for left-recursion 891 self.keepTabs = False 892 self.ignoreExprs = list() 893 self.debug = False 894 self.streamlined = False 895 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 896 self.errmsg = "" 897 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 898 self.debugActions = ( None, None, None ) #custom debug actions 899 self.re = None 900 self.callPreparse = True # used to avoid redundant calls to preParse 901 self.callDuringTry = False
902
903 - def copy( self ):
904 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 905 for the same parsing pattern, using copies of the original parse element.""" 906 cpy = copy.copy( self ) 907 cpy.parseAction = self.parseAction[:] 908 cpy.ignoreExprs = self.ignoreExprs[:] 909 if self.copyDefaultWhiteChars: 910 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 911 return cpy
912
913 - def setName( self, name ):
914 """Define name for this expression, for use in debugging.""" 915 self.name = name 916 self.errmsg = "Expected " + self.name 917 if hasattr(self,"exception"): 918 self.exception.msg = self.errmsg 919 return self
920
921 - def setResultsName( self, name, listAllMatches=False ):
922 """Define name for referencing matching tokens as a nested attribute 923 of the returned parse results. 924 NOTE: this returns a *copy* of the original C{ParserElement} object; 925 this is so that the client can define a basic element, such as an 926 integer, and reference it in multiple places with different names. 927 928 You can also set results names using the abbreviated syntax, 929 C{expr("name")} in place of C{expr.setResultsName("name")} - 930 see L{I{__call__}<__call__>}. 931 """ 932 newself = self.copy() 933 if name.endswith("*"): 934 name = name[:-1] 935 listAllMatches=True 936 newself.resultsName = name 937 newself.modalResults = not listAllMatches 938 return newself
939
940 - def setBreak(self,breakFlag = True):
941 """Method to invoke the Python pdb debugger when this element is 942 about to be parsed. Set C{breakFlag} to True to enable, False to 943 disable. 944 """ 945 if breakFlag: 946 _parseMethod = self._parse 947 def breaker(instring, loc, doActions=True, callPreParse=True): 948 import pdb 949 pdb.set_trace() 950 return _parseMethod( instring, loc, doActions, callPreParse )
951 breaker._originalParseMethod = _parseMethod 952 self._parse = breaker 953 else: 954 if hasattr(self._parse,"_originalParseMethod"): 955 self._parse = self._parse._originalParseMethod 956 return self
957
958 - def setParseAction( self, *fns, **kwargs ):
959 """Define action to perform when successfully matching parse element definition. 960 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 961 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 962 - s = the original string being parsed (see note below) 963 - loc = the location of the matching substring 964 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 965 If the functions in fns modify the tokens, they can return them as the return 966 value from fn, and the modified list of tokens will replace the original. 967 Otherwise, fn does not need to return any value. 968 969 Optional keyword arguments: 970 - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing 971 972 Note: the default parsing behavior is to expand tabs in the input string 973 before starting the parsing process. See L{I{parseString}<parseString>} for more information 974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 975 consistent view of the parsed string, the parse location, and line and column 976 positions within the parsed string. 977 """ 978 self.parseAction = list(map(_trim_arity, list(fns))) 979 self.callDuringTry = kwargs.get("callDuringTry", False) 980 return self
981
982 - def addParseAction( self, *fns, **kwargs ):
983 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 984 self.parseAction += list(map(_trim_arity, list(fns))) 985 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 986 return self
987
988 - def addCondition(self, *fns, **kwargs):
989 """Add a boolean predicate function to expression's list of parse actions. See 990 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 991 functions passed to C{addCondition} need to return boolean success/fail of the condition. 992 993 Optional keyword arguments: 994 - message = define a custom message to be used in the raised exception 995 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 996 """ 997 msg = kwargs.get("message", "failed user-defined condition") 998 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 999 for fn in fns: 1000 def pa(s,l,t): 1001 if not bool(_trim_arity(fn)(s,l,t)): 1002 raise exc_type(s,l,msg)
1003 self.parseAction.append(pa) 1004 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1005 return self 1006
1007 - def setFailAction( self, fn ):
1008 """Define action to perform if parsing fails at this expression. 1009 Fail acton fn is a callable function that takes the arguments 1010 C{fn(s,loc,expr,err)} where: 1011 - s = string being parsed 1012 - loc = location where expression match was attempted and failed 1013 - expr = the parse expression that failed 1014 - err = the exception thrown 1015 The function returns no value. It may throw C{L{ParseFatalException}} 1016 if it is desired to stop parsing immediately.""" 1017 self.failAction = fn 1018 return self
1019
1020 - def _skipIgnorables( self, instring, loc ):
1021 exprsFound = True 1022 while exprsFound: 1023 exprsFound = False 1024 for e in self.ignoreExprs: 1025 try: 1026 while 1: 1027 loc,dummy = e._parse( instring, loc ) 1028 exprsFound = True 1029 except ParseException: 1030 pass 1031 return loc
1032
1033 - def preParse( self, instring, loc ):
1034 if self.ignoreExprs: 1035 loc = self._skipIgnorables( instring, loc ) 1036 1037 if self.skipWhitespace: 1038 wt = self.whiteChars 1039 instrlen = len(instring) 1040 while loc < instrlen and instring[loc] in wt: 1041 loc += 1 1042 1043 return loc
1044
1045 - def parseImpl( self, instring, loc, doActions=True ):
1046 return loc, []
1047
1048 - def postParse( self, instring, loc, tokenlist ):
1049 return tokenlist
1050 1051 #~ @profile
1052 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1053 debugging = ( self.debug ) #and doActions ) 1054 1055 if debugging or self.failAction: 1056 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1057 if (self.debugActions[0] ): 1058 self.debugActions[0]( instring, loc, self ) 1059 if callPreParse and self.callPreparse: 1060 preloc = self.preParse( instring, loc ) 1061 else: 1062 preloc = loc 1063 tokensStart = preloc 1064 try: 1065 try: 1066 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1067 except IndexError: 1068 raise ParseException( instring, len(instring), self.errmsg, self ) 1069 except ParseBaseException as err: 1070 #~ print ("Exception raised:", err) 1071 if self.debugActions[2]: 1072 self.debugActions[2]( instring, tokensStart, self, err ) 1073 if self.failAction: 1074 self.failAction( instring, tokensStart, self, err ) 1075 raise 1076 else: 1077 if callPreParse and self.callPreparse: 1078 preloc = self.preParse( instring, loc ) 1079 else: 1080 preloc = loc 1081 tokensStart = preloc 1082 if self.mayIndexError or loc >= len(instring): 1083 try: 1084 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1085 except IndexError: 1086 raise ParseException( instring, len(instring), self.errmsg, self ) 1087 else: 1088 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1089 1090 tokens = self.postParse( instring, loc, tokens ) 1091 1092 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1093 if self.parseAction and (doActions or self.callDuringTry): 1094 if debugging: 1095 try: 1096 for fn in self.parseAction: 1097 tokens = fn( instring, tokensStart, retTokens ) 1098 if tokens is not None: 1099 retTokens = ParseResults( tokens, 1100 self.resultsName, 1101 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1102 modal=self.modalResults ) 1103 except ParseBaseException as err: 1104 #~ print "Exception raised in user parse action:", err 1105 if (self.debugActions[2] ): 1106 self.debugActions[2]( instring, tokensStart, self, err ) 1107 raise 1108 else: 1109 for fn in self.parseAction: 1110 tokens = fn( instring, tokensStart, retTokens ) 1111 if tokens is not None: 1112 retTokens = ParseResults( tokens, 1113 self.resultsName, 1114 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1115 modal=self.modalResults ) 1116 1117 if debugging: 1118 #~ print ("Matched",self,"->",retTokens.asList()) 1119 if (self.debugActions[1] ): 1120 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1121 1122 return loc, retTokens
1123
1124 - def tryParse( self, instring, loc ):
1125 try: 1126 return self._parse( instring, loc, doActions=False )[0] 1127 except ParseFatalException: 1128 raise ParseException( instring, loc, self.errmsg, self)
1129
1130 - def canParseNext(self, instring, loc):
1131 try: 1132 self.tryParse(instring, loc) 1133 except (ParseException, IndexError): 1134 return False 1135 else: 1136 return True
1137 1138 # this method gets repeatedly called during backtracking with the same arguments - 1139 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1140 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1141 lookup = (self,instring,loc,callPreParse,doActions) 1142 if lookup in ParserElement._exprArgCache: 1143 value = ParserElement._exprArgCache[ lookup ] 1144 if isinstance(value, Exception): 1145 raise value 1146 return (value[0],value[1].copy()) 1147 else: 1148 try: 1149 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1150 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1151 return value 1152 except ParseBaseException as pe: 1153 pe.__traceback__ = None 1154 ParserElement._exprArgCache[ lookup ] = pe 1155 raise
1156 1157 _parse = _parseNoCache 1158 1159 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1160 _exprArgCache = {} 1161 @staticmethod
1162 - def resetCache():
1163 ParserElement._exprArgCache.clear()
1164 1165 _packratEnabled = False 1166 @staticmethod
1167 - def enablePackrat():
1168 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1169 Repeated parse attempts at the same string location (which happens 1170 often in many complex grammars) can immediately return a cached value, 1171 instead of re-executing parsing/validating code. Memoizing is done of 1172 both valid results and parsing exceptions. 1173 1174 This speedup may break existing programs that use parse actions that 1175 have side-effects. For this reason, packrat parsing is disabled when 1176 you first import pyparsing. To activate the packrat feature, your 1177 program must call the class method C{ParserElement.enablePackrat()}. If 1178 your program uses C{psyco} to "compile as you go", you must call 1179 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1180 Python will crash. For best results, call C{enablePackrat()} immediately 1181 after importing pyparsing. 1182 """ 1183 if not ParserElement._packratEnabled: 1184 ParserElement._packratEnabled = True 1185 ParserElement._parse = ParserElement._parseCache
1186
1187 - def parseString( self, instring, parseAll=False ):
1188 """Execute the parse expression with the given string. 1189 This is the main interface to the client code, once the complete 1190 expression has been built. 1191 1192 If you want the grammar to require that the entire input string be 1193 successfully parsed, then set C{parseAll} to True (equivalent to ending 1194 the grammar with C{L{StringEnd()}}). 1195 1196 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1197 in order to report proper column numbers in parse actions. 1198 If the input string contains tabs and 1199 the grammar uses parse actions that use the C{loc} argument to index into the 1200 string being parsed, you can ensure you have a consistent view of the input 1201 string by: 1202 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1203 (see L{I{parseWithTabs}<parseWithTabs>}) 1204 - define your parse action using the full C{(s,loc,toks)} signature, and 1205 reference the input string using the parse action's C{s} argument 1206 - explictly expand the tabs in your input string before calling 1207 C{parseString} 1208 """ 1209 ParserElement.resetCache() 1210 if not self.streamlined: 1211 self.streamline() 1212 #~ self.saveAsList = True 1213 for e in self.ignoreExprs: 1214 e.streamline() 1215 if not self.keepTabs: 1216 instring = instring.expandtabs() 1217 try: 1218 loc, tokens = self._parse( instring, 0 ) 1219 if parseAll: 1220 loc = self.preParse( instring, loc ) 1221 se = Empty() + StringEnd() 1222 se._parse( instring, loc ) 1223 except ParseBaseException as exc: 1224 if ParserElement.verbose_stacktrace: 1225 raise 1226 else: 1227 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1228 raise exc 1229 else: 1230 return tokens
1231
1232 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1233 """Scan the input string for expression matches. Each match will return the 1234 matching tokens, start location, and end location. May be called with optional 1235 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1236 C{overlap} is specified, then overlapping matches will be reported. 1237 1238 Note that the start and end locations are reported relative to the string 1239 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1240 strings with embedded tabs.""" 1241 if not self.streamlined: 1242 self.streamline() 1243 for e in self.ignoreExprs: 1244 e.streamline() 1245 1246 if not self.keepTabs: 1247 instring = _ustr(instring).expandtabs() 1248 instrlen = len(instring) 1249 loc = 0 1250 preparseFn = self.preParse 1251 parseFn = self._parse 1252 ParserElement.resetCache() 1253 matches = 0 1254 try: 1255 while loc <= instrlen and matches < maxMatches: 1256 try: 1257 preloc = preparseFn( instring, loc ) 1258 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1259 except ParseException: 1260 loc = preloc+1 1261 else: 1262 if nextLoc > loc: 1263 matches += 1 1264 yield tokens, preloc, nextLoc 1265 if overlap: 1266 nextloc = preparseFn( instring, loc ) 1267 if nextloc > loc: 1268 loc = nextLoc 1269 else: 1270 loc += 1 1271 else: 1272 loc = nextLoc 1273 else: 1274 loc = preloc+1 1275 except ParseBaseException as exc: 1276 if ParserElement.verbose_stacktrace: 1277 raise 1278 else: 1279 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1280 raise exc
1281
1282 - def transformString( self, instring ):
1283 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1284 be returned from a parse action. To use C{transformString}, define a grammar and 1285 attach a parse action to it that modifies the returned token list. 1286 Invoking C{transformString()} on a target string will then scan for matches, 1287 and replace the matched text patterns according to the logic in the parse 1288 action. C{transformString()} returns the resulting transformed string.""" 1289 out = [] 1290 lastE = 0 1291 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1292 # keep string locs straight between transformString and scanString 1293 self.keepTabs = True 1294 try: 1295 for t,s,e in self.scanString( instring ): 1296 out.append( instring[lastE:s] ) 1297 if t: 1298 if isinstance(t,ParseResults): 1299 out += t.asList() 1300 elif isinstance(t,list): 1301 out += t 1302 else: 1303 out.append(t) 1304 lastE = e 1305 out.append(instring[lastE:]) 1306 out = [o for o in out if o] 1307 return "".join(map(_ustr,_flatten(out))) 1308 except ParseBaseException as exc: 1309 if ParserElement.verbose_stacktrace: 1310 raise 1311 else: 1312 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1313 raise exc
1314
1315 - def searchString( self, instring, maxMatches=_MAX_INT ):
1316 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1317 to match the given parse expression. May be called with optional 1318 C{maxMatches} argument, to clip searching after 'n' matches are found. 1319 """ 1320 try: 1321 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1322 except ParseBaseException as exc: 1323 if ParserElement.verbose_stacktrace: 1324 raise 1325 else: 1326 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1327 raise exc
1328
1329 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1330 """Generator method to split a string using the given expression as a separator. 1331 May be called with optional C{maxsplit} argument, to limit the number of splits; 1332 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1333 matching text should be included in the split results. 1334 """ 1335 splits = 0 1336 last = 0 1337 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1338 yield instring[last:s] 1339 if includeSeparators: 1340 yield t[0] 1341 last = e 1342 yield instring[last:]
1343
1344 - def __add__(self, other ):
1345 """Implementation of + operator - returns C{L{And}}""" 1346 if isinstance( other, basestring ): 1347 other = ParserElement._literalStringClass( other ) 1348 if not isinstance( other, ParserElement ): 1349 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1350 SyntaxWarning, stacklevel=2) 1351 return None 1352 return And( [ self, other ] )
1353
1354 - def __radd__(self, other ):
1355 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1356 if isinstance( other, basestring ): 1357 other = ParserElement._literalStringClass( other ) 1358 if not isinstance( other, ParserElement ): 1359 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1360 SyntaxWarning, stacklevel=2) 1361 return None 1362 return other + self
1363
1364 - def __sub__(self, other):
1365 """Implementation of - operator, returns C{L{And}} with error stop""" 1366 if isinstance( other, basestring ): 1367 other = ParserElement._literalStringClass( other ) 1368 if not isinstance( other, ParserElement ): 1369 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1370 SyntaxWarning, stacklevel=2) 1371 return None 1372 return And( [ self, And._ErrorStop(), other ] )
1373
1374 - def __rsub__(self, other ):
1375 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1376 if isinstance( other, basestring ): 1377 other = ParserElement._literalStringClass( other ) 1378 if not isinstance( other, ParserElement ): 1379 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1380 SyntaxWarning, stacklevel=2) 1381 return None 1382 return other - self
1383
1384 - def __mul__(self,other):
1385 """Implementation of * operator, allows use of C{expr * 3} in place of 1386 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1387 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1388 may also include C{None} as in: 1389 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1390 to C{expr*n + L{ZeroOrMore}(expr)} 1391 (read as "at least n instances of C{expr}") 1392 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1393 (read as "0 to n instances of C{expr}") 1394 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1395 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1396 1397 Note that C{expr*(None,n)} does not raise an exception if 1398 more than n exprs exist in the input stream; that is, 1399 C{expr*(None,n)} does not enforce a maximum number of expr 1400 occurrences. If this behavior is desired, then write 1401 C{expr*(None,n) + ~expr} 1402 1403 """ 1404 if isinstance(other,int): 1405 minElements, optElements = other,0 1406 elif isinstance(other,tuple): 1407 other = (other + (None, None))[:2] 1408 if other[0] is None: 1409 other = (0, other[1]) 1410 if isinstance(other[0],int) and other[1] is None: 1411 if other[0] == 0: 1412 return ZeroOrMore(self) 1413 if other[0] == 1: 1414 return OneOrMore(self) 1415 else: 1416 return self*other[0] + ZeroOrMore(self) 1417 elif isinstance(other[0],int) and isinstance(other[1],int): 1418 minElements, optElements = other 1419 optElements -= minElements 1420 else: 1421 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1422 else: 1423 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1424 1425 if minElements < 0: 1426 raise ValueError("cannot multiply ParserElement by negative value") 1427 if optElements < 0: 1428 raise ValueError("second tuple value must be greater or equal to first tuple value") 1429 if minElements == optElements == 0: 1430 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1431 1432 if (optElements): 1433 def makeOptionalList(n): 1434 if n>1: 1435 return Optional(self + makeOptionalList(n-1)) 1436 else: 1437 return Optional(self)
1438 if minElements: 1439 if minElements == 1: 1440 ret = self + makeOptionalList(optElements) 1441 else: 1442 ret = And([self]*minElements) + makeOptionalList(optElements) 1443 else: 1444 ret = makeOptionalList(optElements) 1445 else: 1446 if minElements == 1: 1447 ret = self 1448 else: 1449 ret = And([self]*minElements) 1450 return ret 1451
1452 - def __rmul__(self, other):
1453 return self.__mul__(other)
1454
1455 - def __or__(self, other ):
1456 """Implementation of | operator - returns C{L{MatchFirst}}""" 1457 if isinstance( other, basestring ): 1458 other = ParserElement._literalStringClass( other ) 1459 if not isinstance( other, ParserElement ): 1460 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1461 SyntaxWarning, stacklevel=2) 1462 return None 1463 return MatchFirst( [ self, other ] )
1464
1465 - def __ror__(self, other ):
1466 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1467 if isinstance( other, basestring ): 1468 other = ParserElement._literalStringClass( other ) 1469 if not isinstance( other, ParserElement ): 1470 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1471 SyntaxWarning, stacklevel=2) 1472 return None 1473 return other | self
1474
1475 - def __xor__(self, other ):
1476 """Implementation of ^ operator - returns C{L{Or}}""" 1477 if isinstance( other, basestring ): 1478 other = ParserElement._literalStringClass( other ) 1479 if not isinstance( other, ParserElement ): 1480 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1481 SyntaxWarning, stacklevel=2) 1482 return None 1483 return Or( [ self, other ] )
1484
1485 - def __rxor__(self, other ):
1486 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1487 if isinstance( other, basestring ): 1488 other = ParserElement._literalStringClass( other ) 1489 if not isinstance( other, ParserElement ): 1490 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1491 SyntaxWarning, stacklevel=2) 1492 return None 1493 return other ^ self
1494
1495 - def __and__(self, other ):
1496 """Implementation of & operator - returns C{L{Each}}""" 1497 if isinstance( other, basestring ): 1498 other = ParserElement._literalStringClass( other ) 1499 if not isinstance( other, ParserElement ): 1500 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1501 SyntaxWarning, stacklevel=2) 1502 return None 1503 return Each( [ self, other ] )
1504
1505 - def __rand__(self, other ):
1506 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1507 if isinstance( other, basestring ): 1508 other = ParserElement._literalStringClass( other ) 1509 if not isinstance( other, ParserElement ): 1510 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1511 SyntaxWarning, stacklevel=2) 1512 return None 1513 return other & self
1514
1515 - def __invert__( self ):
1516 """Implementation of ~ operator - returns C{L{NotAny}}""" 1517 return NotAny( self )
1518
1519 - def __call__(self, name=None):
1520 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1521 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1522 could be written as:: 1523 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1524 1525 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1526 passed as C{True}. 1527 1528 If C{name} is omitted, same as calling C{L{copy}}. 1529 """ 1530 if name is not None: 1531 return self.setResultsName(name) 1532 else: 1533 return self.copy()
1534
1535 - def suppress( self ):
1536 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1537 cluttering up returned output. 1538 """ 1539 return Suppress( self )
1540
1541 - def leaveWhitespace( self ):
1542 """Disables the skipping of whitespace before matching the characters in the 1543 C{ParserElement}'s defined pattern. This is normally only used internally by 1544 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1545 """ 1546 self.skipWhitespace = False 1547 return self
1548
1549 - def setWhitespaceChars( self, chars ):
1550 """Overrides the default whitespace chars 1551 """ 1552 self.skipWhitespace = True 1553 self.whiteChars = chars 1554 self.copyDefaultWhiteChars = False 1555 return self
1556
1557 - def parseWithTabs( self ):
1558 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1559 Must be called before C{parseString} when the input grammar contains elements that 1560 match C{<TAB>} characters.""" 1561 self.keepTabs = True 1562 return self
1563
1564 - def ignore( self, other ):
1565 """Define expression to be ignored (e.g., comments) while doing pattern 1566 matching; may be called repeatedly, to define multiple comment or other 1567 ignorable patterns. 1568 """ 1569 if isinstance(other, basestring): 1570 other = Suppress(other) 1571 1572 if isinstance( other, Suppress ): 1573 if other not in self.ignoreExprs: 1574 self.ignoreExprs.append(other) 1575 else: 1576 self.ignoreExprs.append( Suppress( other.copy() ) ) 1577 return self
1578
1579 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1580 """Enable display of debugging messages while doing pattern matching.""" 1581 self.debugActions = (startAction or _defaultStartDebugAction, 1582 successAction or _defaultSuccessDebugAction, 1583 exceptionAction or _defaultExceptionDebugAction) 1584 self.debug = True 1585 return self
1586
1587 - def setDebug( self, flag=True ):
1588 """Enable display of debugging messages while doing pattern matching. 1589 Set C{flag} to True to enable, False to disable.""" 1590 if flag: 1591 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1592 else: 1593 self.debug = False 1594 return self
1595
1596 - def __str__( self ):
1597 return self.name
1598
1599 - def __repr__( self ):
1600 return _ustr(self)
1601
1602 - def streamline( self ):
1603 self.streamlined = True 1604 self.strRepr = None 1605 return self
1606
1607 - def checkRecursion( self, parseElementList ):
1608 pass
1609
1610 - def validate( self, validateTrace=[] ):
1611 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1612 self.checkRecursion( [] )
1613
1614 - def parseFile( self, file_or_filename, parseAll=False ):
1615 """Execute the parse expression on the given file or filename. 1616 If a filename is specified (instead of a file object), 1617 the entire file is opened, read, and closed before parsing. 1618 """ 1619 try: 1620 file_contents = file_or_filename.read() 1621 except AttributeError: 1622 with open(file_or_filename, "r") as f: 1623 file_contents = f.read() 1624 try: 1625 return self.parseString(file_contents, parseAll) 1626 except ParseBaseException as exc: 1627 if ParserElement.verbose_stacktrace: 1628 raise 1629 else: 1630 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1631 raise exc
1632
1633 - def __eq__(self,other):
1634 if isinstance(other, ParserElement): 1635 return self is other or vars(self) == vars(other) 1636 elif isinstance(other, basestring): 1637 return self.matches(other) 1638 else: 1639 return super(ParserElement,self)==other
1640
1641 - def __ne__(self,other):
1642 return not (self == other)
1643
1644 - def __hash__(self):
1645 return hash(id(self))
1646
1647 - def __req__(self,other):
1648 return self == other
1649
1650 - def __rne__(self,other):
1651 return not (self == other)
1652
1653 - def matches(self, testString, parseAll=True):
1654 """Method for quick testing of a parser against a test string. Good for simple 1655 inline microtests of sub expressions while building up larger parser, as in:: 1656 1657 expr = Word(nums) 1658 assert expr.matches("100") 1659 1660 Parameters: 1661 - testString - to test against this expression for a match 1662 - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests 1663 """ 1664 try: 1665 self.parseString(_ustr(testString), parseAll=parseAll) 1666 return True 1667 except ParseBaseException: 1668 return False
1669
1670 - def runTests(self, tests, parseAll=True, comment='#', printResults=True, failureTests=False):
1671 """Execute the parse expression on a series of test strings, showing each 1672 test, the parsed results or where the parse failed. Quick and easy way to 1673 run a parse expression against a list of sample strings. 1674 1675 Parameters: 1676 - tests - a list of separate test strings, or a multiline string of test strings 1677 - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests 1678 - comment - (default='#') - expression for indicating embedded comments in the test 1679 string; pass None to disable comment filtering 1680 - printResults - (default=True) prints test output to stdout 1681 - failureTests - (default=False) indicates if these tests are expected to fail parsing 1682 1683 Returns: a (success, results) tuple, where success indicates that all tests succeeded 1684 (or failed if C{failureTest} is True), and the results contain a list of lines of each 1685 test's output 1686 """ 1687 if isinstance(tests, basestring): 1688 tests = list(map(str.strip, tests.rstrip().splitlines())) 1689 if isinstance(comment, basestring): 1690 comment = Literal(comment) 1691 allResults = [] 1692 comments = [] 1693 success = True 1694 for t in tests: 1695 if comment is not None and comment.matches(t, False) or comments and not t: 1696 comments.append(t) 1697 continue 1698 if not t: 1699 continue 1700 out = ['\n'.join(comments), t] 1701 comments = [] 1702 try: 1703 result = self.parseString(t, parseAll=parseAll) 1704 out.append(result.dump()) 1705 success = success and not failureTests 1706 except ParseBaseException as pe: 1707 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 1708 if '\n' in t: 1709 out.append(line(pe.loc, t)) 1710 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 1711 else: 1712 out.append(' '*pe.loc + '^' + fatal) 1713 out.append("FAIL: " + str(pe)) 1714 success = success and failureTests 1715 result = pe 1716 1717 if printResults: 1718 out.append('') 1719 print('\n'.join(out)) 1720 1721 allResults.append((t, result)) 1722 1723 return success, allResults
1724
1725 1726 -class Token(ParserElement):
1727 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1728 - def __init__( self ):
1729 super(Token,self).__init__( savelist=False )
1730
1731 1732 -class Empty(Token):
1733 """An empty token, will always match."""
1734 - def __init__( self ):
1735 super(Empty,self).__init__() 1736 self.name = "Empty" 1737 self.mayReturnEmpty = True 1738 self.mayIndexError = False
1739
1740 1741 -class NoMatch(Token):
1742 """A token that will never match."""
1743 - def __init__( self ):
1744 super(NoMatch,self).__init__() 1745 self.name = "NoMatch" 1746 self.mayReturnEmpty = True 1747 self.mayIndexError = False 1748 self.errmsg = "Unmatchable token"
1749
1750 - def parseImpl( self, instring, loc, doActions=True ):
1751 raise ParseException(instring, loc, self.errmsg, self)
1752
1753 1754 -class Literal(Token):
1755 """Token to exactly match a specified string."""
1756 - def __init__( self, matchString ):
1757 super(Literal,self).__init__() 1758 self.match = matchString 1759 self.matchLen = len(matchString) 1760 try: 1761 self.firstMatchChar = matchString[0] 1762 except IndexError: 1763 warnings.warn("null string passed to Literal; use Empty() instead", 1764 SyntaxWarning, stacklevel=2) 1765 self.__class__ = Empty 1766 self.name = '"%s"' % _ustr(self.match) 1767 self.errmsg = "Expected " + self.name 1768 self.mayReturnEmpty = False 1769 self.mayIndexError = False
1770 1771 # Performance tuning: this routine gets called a *lot* 1772 # if this is a single character match string and the first character matches, 1773 # short-circuit as quickly as possible, and avoid calling startswith 1774 #~ @profile
1775 - def parseImpl( self, instring, loc, doActions=True ):
1776 if (instring[loc] == self.firstMatchChar and 1777 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1778 return loc+self.matchLen, self.match 1779 raise ParseException(instring, loc, self.errmsg, self)
1780 _L = Literal 1781 ParserElement._literalStringClass = Literal
1782 1783 -class Keyword(Token):
1784 """Token to exactly match a specified string as a keyword, that is, it must be 1785 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 1786 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1787 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1788 Accepts two optional constructor arguments in addition to the keyword string: 1789 - C{identChars} is a string of characters that would be valid identifier characters, 1790 defaulting to all alphanumerics + "_" and "$" 1791 - C{caseless} allows case-insensitive matching, default is C{False}. 1792 """ 1793 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1794
1795 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1796 super(Keyword,self).__init__() 1797 self.match = matchString 1798 self.matchLen = len(matchString) 1799 try: 1800 self.firstMatchChar = matchString[0] 1801 except IndexError: 1802 warnings.warn("null string passed to Keyword; use Empty() instead", 1803 SyntaxWarning, stacklevel=2) 1804 self.name = '"%s"' % self.match 1805 self.errmsg = "Expected " + self.name 1806 self.mayReturnEmpty = False 1807 self.mayIndexError = False 1808 self.caseless = caseless 1809 if caseless: 1810 self.caselessmatch = matchString.upper() 1811 identChars = identChars.upper() 1812 self.identChars = set(identChars)
1813
1814 - def parseImpl( self, instring, loc, doActions=True ):
1815 if self.caseless: 1816 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1817 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1818 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1819 return loc+self.matchLen, self.match 1820 else: 1821 if (instring[loc] == self.firstMatchChar and 1822 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1823 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1824 (loc == 0 or instring[loc-1] not in self.identChars) ): 1825 return loc+self.matchLen, self.match 1826 raise ParseException(instring, loc, self.errmsg, self)
1827
1828 - def copy(self):
1829 c = super(Keyword,self).copy() 1830 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1831 return c
1832 1833 @staticmethod
1834 - def setDefaultKeywordChars( chars ):
1835 """Overrides the default Keyword chars 1836 """ 1837 Keyword.DEFAULT_KEYWORD_CHARS = chars
1838
1839 -class CaselessLiteral(Literal):
1840 """Token to match a specified string, ignoring case of letters. 1841 Note: the matched results will always be in the case of the given 1842 match string, NOT the case of the input text. 1843 """
1844 - def __init__( self, matchString ):
1845 super(CaselessLiteral,self).__init__( matchString.upper() ) 1846 # Preserve the defining literal. 1847 self.returnString = matchString 1848 self.name = "'%s'" % self.returnString 1849 self.errmsg = "Expected " + self.name
1850
1851 - def parseImpl( self, instring, loc, doActions=True ):
1852 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1853 return loc+self.matchLen, self.returnString 1854 raise ParseException(instring, loc, self.errmsg, self)
1855
1856 -class CaselessKeyword(Keyword):
1857 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1858 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1859
1860 - def parseImpl( self, instring, loc, doActions=True ):
1861 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1862 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1863 return loc+self.matchLen, self.match 1864 raise ParseException(instring, loc, self.errmsg, self)
1865
1866 -class Word(Token):
1867 """Token for matching words composed of allowed character sets. 1868 Defined with string containing all allowed initial characters, 1869 an optional string containing allowed body characters (if omitted, 1870 defaults to the initial character set), and an optional minimum, 1871 maximum, and/or exact length. The default value for C{min} is 1 (a 1872 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1873 are 0, meaning no maximum or exact length restriction. An optional 1874 C{excludeChars} parameter can list characters that might be found in 1875 the input C{bodyChars} string; useful to define a word of all printables 1876 except for one or two characters, for instance. 1877 """
1878 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1879 super(Word,self).__init__() 1880 if excludeChars: 1881 initChars = ''.join(c for c in initChars if c not in excludeChars) 1882 if bodyChars: 1883 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1884 self.initCharsOrig = initChars 1885 self.initChars = set(initChars) 1886 if bodyChars : 1887 self.bodyCharsOrig = bodyChars 1888 self.bodyChars = set(bodyChars) 1889 else: 1890 self.bodyCharsOrig = initChars 1891 self.bodyChars = set(initChars) 1892 1893 self.maxSpecified = max > 0 1894 1895 if min < 1: 1896 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1897 1898 self.minLen = min 1899 1900 if max > 0: 1901 self.maxLen = max 1902 else: 1903 self.maxLen = _MAX_INT 1904 1905 if exact > 0: 1906 self.maxLen = exact 1907 self.minLen = exact 1908 1909 self.name = _ustr(self) 1910 self.errmsg = "Expected " + self.name 1911 self.mayIndexError = False 1912 self.asKeyword = asKeyword 1913 1914 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1915 if self.bodyCharsOrig == self.initCharsOrig: 1916 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1917 elif len(self.initCharsOrig) == 1: 1918 self.reString = "%s[%s]*" % \ 1919 (re.escape(self.initCharsOrig), 1920 _escapeRegexRangeChars(self.bodyCharsOrig),) 1921 else: 1922 self.reString = "[%s][%s]*" % \ 1923 (_escapeRegexRangeChars(self.initCharsOrig), 1924 _escapeRegexRangeChars(self.bodyCharsOrig),) 1925 if self.asKeyword: 1926 self.reString = r"\b"+self.reString+r"\b" 1927 try: 1928 self.re = re.compile( self.reString ) 1929 except: 1930 self.re = None
1931
1932 - def parseImpl( self, instring, loc, doActions=True ):
1933 if self.re: 1934 result = self.re.match(instring,loc) 1935 if not result: 1936 raise ParseException(instring, loc, self.errmsg, self) 1937 1938 loc = result.end() 1939 return loc, result.group() 1940 1941 if not(instring[ loc ] in self.initChars): 1942 raise ParseException(instring, loc, self.errmsg, self) 1943 1944 start = loc 1945 loc += 1 1946 instrlen = len(instring) 1947 bodychars = self.bodyChars 1948 maxloc = start + self.maxLen 1949 maxloc = min( maxloc, instrlen ) 1950 while loc < maxloc and instring[loc] in bodychars: 1951 loc += 1 1952 1953 throwException = False 1954 if loc - start < self.minLen: 1955 throwException = True 1956 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1957 throwException = True 1958 if self.asKeyword: 1959 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1960 throwException = True 1961 1962 if throwException: 1963 raise ParseException(instring, loc, self.errmsg, self) 1964 1965 return loc, instring[start:loc]
1966
1967 - def __str__( self ):
1968 try: 1969 return super(Word,self).__str__() 1970 except: 1971 pass 1972 1973 1974 if self.strRepr is None: 1975 1976 def charsAsStr(s): 1977 if len(s)>4: 1978 return s[:4]+"..." 1979 else: 1980 return s
1981 1982 if ( self.initCharsOrig != self.bodyCharsOrig ): 1983 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1984 else: 1985 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1986 1987 return self.strRepr
1988
1989 1990 -class Regex(Token):
1991 """Token for matching strings that match a given regular expression. 1992 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1993 """ 1994 compiledREtype = type(re.compile("[A-Z]"))
1995 - def __init__( self, pattern, flags=0):
1996 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1997 super(Regex,self).__init__() 1998 1999 if isinstance(pattern, basestring): 2000 if not pattern: 2001 warnings.warn("null string passed to Regex; use Empty() instead", 2002 SyntaxWarning, stacklevel=2) 2003 2004 self.pattern = pattern 2005 self.flags = flags 2006 2007 try: 2008 self.re = re.compile(self.pattern, self.flags) 2009 self.reString = self.pattern 2010 except sre_constants.error: 2011 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2012 SyntaxWarning, stacklevel=2) 2013 raise 2014 2015 elif isinstance(pattern, Regex.compiledREtype): 2016 self.re = pattern 2017 self.pattern = \ 2018 self.reString = str(pattern) 2019 self.flags = flags 2020 2021 else: 2022 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2023 2024 self.name = _ustr(self) 2025 self.errmsg = "Expected " + self.name 2026 self.mayIndexError = False 2027 self.mayReturnEmpty = True
2028
2029 - def parseImpl( self, instring, loc, doActions=True ):
2030 result = self.re.match(instring,loc) 2031 if not result: 2032 raise ParseException(instring, loc, self.errmsg, self) 2033 2034 loc = result.end() 2035 d = result.groupdict() 2036 ret = ParseResults(result.group()) 2037 if d: 2038 for k in d: 2039 ret[k] = d[k] 2040 return loc,ret
2041
2042 - def __str__( self ):
2043 try: 2044 return super(Regex,self).__str__() 2045 except: 2046 pass 2047 2048 if self.strRepr is None: 2049 self.strRepr = "Re:(%s)" % repr(self.pattern) 2050 2051 return self.strRepr
2052
2053 2054 -class QuotedString(Token):
2055 """Token for matching strings that are delimited by quoting characters. 2056 """
2057 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2058 r"""Defined with the following parameters: 2059 - quoteChar - string of one or more characters defining the quote delimiting string 2060 - escChar - character to escape quotes, typically backslash (default=None) 2061 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 2062 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2063 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2064 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2065 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2066 """ 2067 super(QuotedString,self).__init__() 2068 2069 # remove white space from quote chars - wont work anyway 2070 quoteChar = quoteChar.strip() 2071 if not quoteChar: 2072 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2073 raise SyntaxError() 2074 2075 if endQuoteChar is None: 2076 endQuoteChar = quoteChar 2077 else: 2078 endQuoteChar = endQuoteChar.strip() 2079 if not endQuoteChar: 2080 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2081 raise SyntaxError() 2082 2083 self.quoteChar = quoteChar 2084 self.quoteCharLen = len(quoteChar) 2085 self.firstQuoteChar = quoteChar[0] 2086 self.endQuoteChar = endQuoteChar 2087 self.endQuoteCharLen = len(endQuoteChar) 2088 self.escChar = escChar 2089 self.escQuote = escQuote 2090 self.unquoteResults = unquoteResults 2091 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2092 2093 if multiline: 2094 self.flags = re.MULTILINE | re.DOTALL 2095 self.pattern = r'%s(?:[^%s%s]' % \ 2096 ( re.escape(self.quoteChar), 2097 _escapeRegexRangeChars(self.endQuoteChar[0]), 2098 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2099 else: 2100 self.flags = 0 2101 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2102 ( re.escape(self.quoteChar), 2103 _escapeRegexRangeChars(self.endQuoteChar[0]), 2104 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2105 if len(self.endQuoteChar) > 1: 2106 self.pattern += ( 2107 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2108 _escapeRegexRangeChars(self.endQuoteChar[i])) 2109 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2110 ) 2111 if escQuote: 2112 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2113 if escChar: 2114 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2115 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2116 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2117 2118 try: 2119 self.re = re.compile(self.pattern, self.flags) 2120 self.reString = self.pattern 2121 except sre_constants.error: 2122 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2123 SyntaxWarning, stacklevel=2) 2124 raise 2125 2126 self.name = _ustr(self) 2127 self.errmsg = "Expected " + self.name 2128 self.mayIndexError = False 2129 self.mayReturnEmpty = True
2130
2131 - def parseImpl( self, instring, loc, doActions=True ):
2132 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2133 if not result: 2134 raise ParseException(instring, loc, self.errmsg, self) 2135 2136 loc = result.end() 2137 ret = result.group() 2138 2139 if self.unquoteResults: 2140 2141 # strip off quotes 2142 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2143 2144 if isinstance(ret,basestring): 2145 # replace escaped whitespace 2146 if '\\' in ret and self.convertWhitespaceEscapes: 2147 ws_map = { 2148 r'\t' : '\t', 2149 r'\n' : '\n', 2150 r'\f' : '\f', 2151 r'\r' : '\r', 2152 } 2153 for wslit,wschar in ws_map.items(): 2154 ret = ret.replace(wslit, wschar) 2155 2156 # replace escaped characters 2157 if self.escChar: 2158 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2159 2160 # replace escaped quotes 2161 if self.escQuote: 2162 ret = ret.replace(self.escQuote, self.endQuoteChar) 2163 2164 return loc, ret
2165
2166 - def __str__( self ):
2167 try: 2168 return super(QuotedString,self).__str__() 2169 except: 2170 pass 2171 2172 if self.strRepr is None: 2173 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2174 2175 return self.strRepr
2176
2177 2178 -class CharsNotIn(Token):
2179 """Token for matching words composed of characters *not* in a given set. 2180 Defined with string containing all disallowed characters, and an optional 2181 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2182 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2183 are 0, meaning no maximum or exact length restriction. 2184 """
2185 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2186 super(CharsNotIn,self).__init__() 2187 self.skipWhitespace = False 2188 self.notChars = notChars 2189 2190 if min < 1: 2191 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2192 2193 self.minLen = min 2194 2195 if max > 0: 2196 self.maxLen = max 2197 else: 2198 self.maxLen = _MAX_INT 2199 2200 if exact > 0: 2201 self.maxLen = exact 2202 self.minLen = exact 2203 2204 self.name = _ustr(self) 2205 self.errmsg = "Expected " + self.name 2206 self.mayReturnEmpty = ( self.minLen == 0 ) 2207 self.mayIndexError = False
2208
2209 - def parseImpl( self, instring, loc, doActions=True ):
2210 if instring[loc] in self.notChars: 2211 raise ParseException(instring, loc, self.errmsg, self) 2212 2213 start = loc 2214 loc += 1 2215 notchars = self.notChars 2216 maxlen = min( start+self.maxLen, len(instring) ) 2217 while loc < maxlen and \ 2218 (instring[loc] not in notchars): 2219 loc += 1 2220 2221 if loc - start < self.minLen: 2222 raise ParseException(instring, loc, self.errmsg, self) 2223 2224 return loc, instring[start:loc]
2225
2226 - def __str__( self ):
2227 try: 2228 return super(CharsNotIn, self).__str__() 2229 except: 2230 pass 2231 2232 if self.strRepr is None: 2233 if len(self.notChars) > 4: 2234 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2235 else: 2236 self.strRepr = "!W:(%s)" % self.notChars 2237 2238 return self.strRepr
2239
2240 -class White(Token):
2241 """Special matching class for matching whitespace. Normally, whitespace is ignored 2242 by pyparsing grammars. This class is included when some whitespace structures 2243 are significant. Define with a string containing the whitespace characters to be 2244 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2245 as defined for the C{L{Word}} class.""" 2246 whiteStrs = { 2247 " " : "<SPC>", 2248 "\t": "<TAB>", 2249 "\n": "<LF>", 2250 "\r": "<CR>", 2251 "\f": "<FF>", 2252 }
2253 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2254 super(White,self).__init__() 2255 self.matchWhite = ws 2256 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2257 #~ self.leaveWhitespace() 2258 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2259 self.mayReturnEmpty = True 2260 self.errmsg = "Expected " + self.name 2261 2262 self.minLen = min 2263 2264 if max > 0: 2265 self.maxLen = max 2266 else: 2267 self.maxLen = _MAX_INT 2268 2269 if exact > 0: 2270 self.maxLen = exact 2271 self.minLen = exact
2272
2273 - def parseImpl( self, instring, loc, doActions=True ):
2274 if not(instring[ loc ] in self.matchWhite): 2275 raise ParseException(instring, loc, self.errmsg, self) 2276 start = loc 2277 loc += 1 2278 maxloc = start + self.maxLen 2279 maxloc = min( maxloc, len(instring) ) 2280 while loc < maxloc and instring[loc] in self.matchWhite: 2281 loc += 1 2282 2283 if loc - start < self.minLen: 2284 raise ParseException(instring, loc, self.errmsg, self) 2285 2286 return loc, instring[start:loc]
2287
2288 2289 -class _PositionToken(Token):
2290 - def __init__( self ):
2291 super(_PositionToken,self).__init__() 2292 self.name=self.__class__.__name__ 2293 self.mayReturnEmpty = True 2294 self.mayIndexError = False
2295
2296 -class GoToColumn(_PositionToken):
2297 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2298 - def __init__( self, colno ):
2299 super(GoToColumn,self).__init__() 2300 self.col = colno
2301
2302 - def preParse( self, instring, loc ):
2303 if col(loc,instring) != self.col: 2304 instrlen = len(instring) 2305 if self.ignoreExprs: 2306 loc = self._skipIgnorables( instring, loc ) 2307 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2308 loc += 1 2309 return loc
2310
2311 - def parseImpl( self, instring, loc, doActions=True ):
2312 thiscol = col( loc, instring ) 2313 if thiscol > self.col: 2314 raise ParseException( instring, loc, "Text not in expected column", self ) 2315 newloc = loc + self.col - thiscol 2316 ret = instring[ loc: newloc ] 2317 return newloc, ret
2318
2319 -class LineStart(_PositionToken):
2320 """Matches if current position is at the beginning of a line within the parse string"""
2321 - def __init__( self ):
2322 super(LineStart,self).__init__() 2323 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2324 self.errmsg = "Expected start of line"
2325
2326 - def preParse( self, instring, loc ):
2327 preloc = super(LineStart,self).preParse(instring,loc) 2328 if instring[preloc] == "\n": 2329 loc += 1 2330 return loc
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2333 if not( loc==0 or 2334 (loc == self.preParse( instring, 0 )) or 2335 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2336 raise ParseException(instring, loc, self.errmsg, self) 2337 return loc, []
2338
2339 -class LineEnd(_PositionToken):
2340 """Matches if current position is at the end of a line within the parse string"""
2341 - def __init__( self ):
2342 super(LineEnd,self).__init__() 2343 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2344 self.errmsg = "Expected end of line"
2345
2346 - def parseImpl( self, instring, loc, doActions=True ):
2347 if loc<len(instring): 2348 if instring[loc] == "\n": 2349 return loc+1, "\n" 2350 else: 2351 raise ParseException(instring, loc, self.errmsg, self) 2352 elif loc == len(instring): 2353 return loc+1, [] 2354 else: 2355 raise ParseException(instring, loc, self.errmsg, self)
2356
2357 -class StringStart(_PositionToken):
2358 """Matches if current position is at the beginning of the parse string"""
2359 - def __init__( self ):
2360 super(StringStart,self).__init__() 2361 self.errmsg = "Expected start of text"
2362
2363 - def parseImpl( self, instring, loc, doActions=True ):
2364 if loc != 0: 2365 # see if entire string up to here is just whitespace and ignoreables 2366 if loc != self.preParse( instring, 0 ): 2367 raise ParseException(instring, loc, self.errmsg, self) 2368 return loc, []
2369
2370 -class StringEnd(_PositionToken):
2371 """Matches if current position is at the end of the parse string"""
2372 - def __init__( self ):
2373 super(StringEnd,self).__init__() 2374 self.errmsg = "Expected end of text"
2375
2376 - def parseImpl( self, instring, loc, doActions=True ):
2377 if loc < len(instring): 2378 raise ParseException(instring, loc, self.errmsg, self) 2379 elif loc == len(instring): 2380 return loc+1, [] 2381 elif loc > len(instring): 2382 return loc, [] 2383 else: 2384 raise ParseException(instring, loc, self.errmsg, self)
2385
2386 -class WordStart(_PositionToken):
2387 """Matches if the current position is at the beginning of a Word, and 2388 is not preceded by any character in a given set of C{wordChars} 2389 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2390 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2391 the string being parsed, or at the beginning of a line. 2392 """
2393 - def __init__(self, wordChars = printables):
2394 super(WordStart,self).__init__() 2395 self.wordChars = set(wordChars) 2396 self.errmsg = "Not at the start of a word"
2397
2398 - def parseImpl(self, instring, loc, doActions=True ):
2399 if loc != 0: 2400 if (instring[loc-1] in self.wordChars or 2401 instring[loc] not in self.wordChars): 2402 raise ParseException(instring, loc, self.errmsg, self) 2403 return loc, []
2404
2405 -class WordEnd(_PositionToken):
2406 """Matches if the current position is at the end of a Word, and 2407 is not followed by any character in a given set of C{wordChars} 2408 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2409 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2410 the string being parsed, or at the end of a line. 2411 """
2412 - def __init__(self, wordChars = printables):
2413 super(WordEnd,self).__init__() 2414 self.wordChars = set(wordChars) 2415 self.skipWhitespace = False 2416 self.errmsg = "Not at the end of a word"
2417
2418 - def parseImpl(self, instring, loc, doActions=True ):
2419 instrlen = len(instring) 2420 if instrlen>0 and loc<instrlen: 2421 if (instring[loc] in self.wordChars or 2422 instring[loc-1] not in self.wordChars): 2423 raise ParseException(instring, loc, self.errmsg, self) 2424 return loc, []
2425
2426 2427 -class ParseExpression(ParserElement):
2428 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2429 - def __init__( self, exprs, savelist = False ):
2430 super(ParseExpression,self).__init__(savelist) 2431 if isinstance( exprs, _generatorType ): 2432 exprs = list(exprs) 2433 2434 if isinstance( exprs, basestring ): 2435 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 2436 elif isinstance( exprs, collections.Sequence ): 2437 # if sequence of strings provided, wrap with Literal 2438 if all(isinstance(expr, basestring) for expr in exprs): 2439 exprs = map(ParserElement._literalStringClass, exprs) 2440 self.exprs = list(exprs) 2441 else: 2442 try: 2443 self.exprs = list( exprs ) 2444 except TypeError: 2445 self.exprs = [ exprs ] 2446 self.callPreparse = False
2447
2448 - def __getitem__( self, i ):
2449 return self.exprs[i]
2450
2451 - def append( self, other ):
2452 self.exprs.append( other ) 2453 self.strRepr = None 2454 return self
2455
2456 - def leaveWhitespace( self ):
2457 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2458 all contained expressions.""" 2459 self.skipWhitespace = False 2460 self.exprs = [ e.copy() for e in self.exprs ] 2461 for e in self.exprs: 2462 e.leaveWhitespace() 2463 return self
2464
2465 - def ignore( self, other ):
2466 if isinstance( other, Suppress ): 2467 if other not in self.ignoreExprs: 2468 super( ParseExpression, self).ignore( other ) 2469 for e in self.exprs: 2470 e.ignore( self.ignoreExprs[-1] ) 2471 else: 2472 super( ParseExpression, self).ignore( other ) 2473 for e in self.exprs: 2474 e.ignore( self.ignoreExprs[-1] ) 2475 return self
2476
2477 - def __str__( self ):
2478 try: 2479 return super(ParseExpression,self).__str__() 2480 except: 2481 pass 2482 2483 if self.strRepr is None: 2484 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2485 return self.strRepr
2486
2487 - def streamline( self ):
2488 super(ParseExpression,self).streamline() 2489 2490 for e in self.exprs: 2491 e.streamline() 2492 2493 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2494 # but only if there are no parse actions or resultsNames on the nested And's 2495 # (likewise for Or's and MatchFirst's) 2496 if ( len(self.exprs) == 2 ): 2497 other = self.exprs[0] 2498 if ( isinstance( other, self.__class__ ) and 2499 not(other.parseAction) and 2500 other.resultsName is None and 2501 not other.debug ): 2502 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2503 self.strRepr = None 2504 self.mayReturnEmpty |= other.mayReturnEmpty 2505 self.mayIndexError |= other.mayIndexError 2506 2507 other = self.exprs[-1] 2508 if ( isinstance( other, self.__class__ ) and 2509 not(other.parseAction) and 2510 other.resultsName is None and 2511 not other.debug ): 2512 self.exprs = self.exprs[:-1] + other.exprs[:] 2513 self.strRepr = None 2514 self.mayReturnEmpty |= other.mayReturnEmpty 2515 self.mayIndexError |= other.mayIndexError 2516 2517 self.errmsg = "Expected " + _ustr(self) 2518 2519 return self
2520
2521 - def setResultsName( self, name, listAllMatches=False ):
2522 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2523 return ret
2524
2525 - def validate( self, validateTrace=[] ):
2526 tmp = validateTrace[:]+[self] 2527 for e in self.exprs: 2528 e.validate(tmp) 2529 self.checkRecursion( [] )
2530
2531 - def copy(self):
2532 ret = super(ParseExpression,self).copy() 2533 ret.exprs = [e.copy() for e in self.exprs] 2534 return ret
2535
2536 -class And(ParseExpression):
2537 """Requires all given C{ParseExpression}s to be found in the given order. 2538 Expressions may be separated by whitespace. 2539 May be constructed using the C{'+'} operator. 2540 May also be constructed using the C{'-'} operator, which will suppress backtracking. 2541 """ 2542
2543 - class _ErrorStop(Empty):
2544 - def __init__(self, *args, **kwargs):
2545 super(And._ErrorStop,self).__init__(*args, **kwargs) 2546 self.name = '-' 2547 self.leaveWhitespace()
2548
2549 - def __init__( self, exprs, savelist = True ):
2550 super(And,self).__init__(exprs, savelist) 2551 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2552 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2553 self.skipWhitespace = self.exprs[0].skipWhitespace 2554 self.callPreparse = True
2555
2556 - def parseImpl( self, instring, loc, doActions=True ):
2557 # pass False as last arg to _parse for first element, since we already 2558 # pre-parsed the string as part of our And pre-parsing 2559 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2560 errorStop = False 2561 for e in self.exprs[1:]: 2562 if isinstance(e, And._ErrorStop): 2563 errorStop = True 2564 continue 2565 if errorStop: 2566 try: 2567 loc, exprtokens = e._parse( instring, loc, doActions ) 2568 except ParseSyntaxException: 2569 raise 2570 except ParseBaseException as pe: 2571 pe.__traceback__ = None 2572 raise ParseSyntaxException(pe) 2573 except IndexError: 2574 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2575 else: 2576 loc, exprtokens = e._parse( instring, loc, doActions ) 2577 if exprtokens or exprtokens.haskeys(): 2578 resultlist += exprtokens 2579 return loc, resultlist
2580
2581 - def __iadd__(self, other ):
2582 if isinstance( other, basestring ): 2583 other = ParserElement._literalStringClass( other ) 2584 return self.append( other ) #And( [ self, other ] )
2585
2586 - def checkRecursion( self, parseElementList ):
2587 subRecCheckList = parseElementList[:] + [ self ] 2588 for e in self.exprs: 2589 e.checkRecursion( subRecCheckList ) 2590 if not e.mayReturnEmpty: 2591 break
2592
2593 - def __str__( self ):
2594 if hasattr(self,"name"): 2595 return self.name 2596 2597 if self.strRepr is None: 2598 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2599 2600 return self.strRepr
2601
2602 2603 -class Or(ParseExpression):
2604 """Requires that at least one C{ParseExpression} is found. 2605 If two expressions match, the expression that matches the longest string will be used. 2606 May be constructed using the C{'^'} operator. 2607 """
2608 - def __init__( self, exprs, savelist = False ):
2609 super(Or,self).__init__(exprs, savelist) 2610 if self.exprs: 2611 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2612 else: 2613 self.mayReturnEmpty = True
2614
2615 - def parseImpl( self, instring, loc, doActions=True ):
2616 maxExcLoc = -1 2617 maxException = None 2618 matches = [] 2619 for e in self.exprs: 2620 try: 2621 loc2 = e.tryParse( instring, loc ) 2622 except ParseException as err: 2623 err.__traceback__ = None 2624 if err.loc > maxExcLoc: 2625 maxException = err 2626 maxExcLoc = err.loc 2627 except IndexError: 2628 if len(instring) > maxExcLoc: 2629 maxException = ParseException(instring,len(instring),e.errmsg,self) 2630 maxExcLoc = len(instring) 2631 else: 2632 # save match among all matches, to retry longest to shortest 2633 matches.append((loc2, e)) 2634 2635 if matches: 2636 matches.sort(key=lambda x: -x[0]) 2637 for _,e in matches: 2638 try: 2639 return e._parse( instring, loc, doActions ) 2640 except ParseException as err: 2641 err.__traceback__ = None 2642 if err.loc > maxExcLoc: 2643 maxException = err 2644 maxExcLoc = err.loc 2645 2646 if maxException is not None: 2647 maxException.msg = self.errmsg 2648 raise maxException 2649 else: 2650 raise ParseException(instring, loc, "no defined alternatives to match", self)
2651 2652
2653 - def __ixor__(self, other ):
2654 if isinstance( other, basestring ): 2655 other = ParserElement._literalStringClass( other ) 2656 return self.append( other ) #Or( [ self, other ] )
2657
2658 - def __str__( self ):
2659 if hasattr(self,"name"): 2660 return self.name 2661 2662 if self.strRepr is None: 2663 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2664 2665 return self.strRepr
2666
2667 - def checkRecursion( self, parseElementList ):
2668 subRecCheckList = parseElementList[:] + [ self ] 2669 for e in self.exprs: 2670 e.checkRecursion( subRecCheckList )
2671
2672 2673 -class MatchFirst(ParseExpression):
2674 """Requires that at least one C{ParseExpression} is found. 2675 If two expressions match, the first one listed is the one that will match. 2676 May be constructed using the C{'|'} operator. 2677 """
2678 - def __init__( self, exprs, savelist = False ):
2679 super(MatchFirst,self).__init__(exprs, savelist) 2680 if self.exprs: 2681 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2682 else: 2683 self.mayReturnEmpty = True
2684
2685 - def parseImpl( self, instring, loc, doActions=True ):
2686 maxExcLoc = -1 2687 maxException = None 2688 for e in self.exprs: 2689 try: 2690 ret = e._parse( instring, loc, doActions ) 2691 return ret 2692 except ParseException as err: 2693 if err.loc > maxExcLoc: 2694 maxException = err 2695 maxExcLoc = err.loc 2696 except IndexError: 2697 if len(instring) > maxExcLoc: 2698 maxException = ParseException(instring,len(instring),e.errmsg,self) 2699 maxExcLoc = len(instring) 2700 2701 # only got here if no expression matched, raise exception for match that made it the furthest 2702 else: 2703 if maxException is not None: 2704 maxException.msg = self.errmsg 2705 raise maxException 2706 else: 2707 raise ParseException(instring, loc, "no defined alternatives to match", self)
2708
2709 - def __ior__(self, other ):
2710 if isinstance( other, basestring ): 2711 other = ParserElement._literalStringClass( other ) 2712 return self.append( other ) #MatchFirst( [ self, other ] )
2713
2714 - def __str__( self ):
2715 if hasattr(self,"name"): 2716 return self.name 2717 2718 if self.strRepr is None: 2719 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2720 2721 return self.strRepr
2722
2723 - def checkRecursion( self, parseElementList ):
2724 subRecCheckList = parseElementList[:] + [ self ] 2725 for e in self.exprs: 2726 e.checkRecursion( subRecCheckList )
2727
2728 2729 -class Each(ParseExpression):
2730 """Requires all given C{ParseExpression}s to be found, but in any order. 2731 Expressions may be separated by whitespace. 2732 May be constructed using the C{'&'} operator. 2733 """
2734 - def __init__( self, exprs, savelist = True ):
2735 super(Each,self).__init__(exprs, savelist) 2736 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2737 self.skipWhitespace = True 2738 self.initExprGroups = True
2739
2740 - def parseImpl( self, instring, loc, doActions=True ):
2741 if self.initExprGroups: 2742 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2743 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2744 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2745 self.optionals = opt1 + opt2 2746 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2747 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2748 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2749 self.required += self.multirequired 2750 self.initExprGroups = False 2751 tmpLoc = loc 2752 tmpReqd = self.required[:] 2753 tmpOpt = self.optionals[:] 2754 matchOrder = [] 2755 2756 keepMatching = True 2757 while keepMatching: 2758 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2759 failed = [] 2760 for e in tmpExprs: 2761 try: 2762 tmpLoc = e.tryParse( instring, tmpLoc ) 2763 except ParseException: 2764 failed.append(e) 2765 else: 2766 matchOrder.append(self.opt1map.get(id(e),e)) 2767 if e in tmpReqd: 2768 tmpReqd.remove(e) 2769 elif e in tmpOpt: 2770 tmpOpt.remove(e) 2771 if len(failed) == len(tmpExprs): 2772 keepMatching = False 2773 2774 if tmpReqd: 2775 missing = ", ".join(_ustr(e) for e in tmpReqd) 2776 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2777 2778 # add any unmatched Optionals, in case they have default values defined 2779 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2780 2781 resultlist = [] 2782 for e in matchOrder: 2783 loc,results = e._parse(instring,loc,doActions) 2784 resultlist.append(results) 2785 2786 finalResults = ParseResults() 2787 for r in resultlist: 2788 dups = {} 2789 for k in r.keys(): 2790 if k in finalResults: 2791 tmp = ParseResults(finalResults[k]) 2792 tmp += ParseResults(r[k]) 2793 dups[k] = tmp 2794 finalResults += ParseResults(r) 2795 for k,v in dups.items(): 2796 finalResults[k] = v 2797 return loc, finalResults
2798
2799 - def __str__( self ):
2800 if hasattr(self,"name"): 2801 return self.name 2802 2803 if self.strRepr is None: 2804 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2805 2806 return self.strRepr
2807
2808 - def checkRecursion( self, parseElementList ):
2809 subRecCheckList = parseElementList[:] + [ self ] 2810 for e in self.exprs: 2811 e.checkRecursion( subRecCheckList )
2812
2813 2814 -class ParseElementEnhance(ParserElement):
2815 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2816 - def __init__( self, expr, savelist=False ):
2817 super(ParseElementEnhance,self).__init__(savelist) 2818 if isinstance( expr, basestring ): 2819 expr = ParserElement._literalStringClass(expr) 2820 self.expr = expr 2821 self.strRepr = None 2822 if expr is not None: 2823 self.mayIndexError = expr.mayIndexError 2824 self.mayReturnEmpty = expr.mayReturnEmpty 2825 self.setWhitespaceChars( expr.whiteChars ) 2826 self.skipWhitespace = expr.skipWhitespace 2827 self.saveAsList = expr.saveAsList 2828 self.callPreparse = expr.callPreparse 2829 self.ignoreExprs.extend(expr.ignoreExprs)
2830
2831 - def parseImpl( self, instring, loc, doActions=True ):
2832 if self.expr is not None: 2833 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2834 else: 2835 raise ParseException("",loc,self.errmsg,self)
2836
2837 - def leaveWhitespace( self ):
2838 self.skipWhitespace = False 2839 self.expr = self.expr.copy() 2840 if self.expr is not None: 2841 self.expr.leaveWhitespace() 2842 return self
2843
2844 - def ignore( self, other ):
2845 if isinstance( other, Suppress ): 2846 if other not in self.ignoreExprs: 2847 super( ParseElementEnhance, self).ignore( other ) 2848 if self.expr is not None: 2849 self.expr.ignore( self.ignoreExprs[-1] ) 2850 else: 2851 super( ParseElementEnhance, self).ignore( other ) 2852 if self.expr is not None: 2853 self.expr.ignore( self.ignoreExprs[-1] ) 2854 return self
2855
2856 - def streamline( self ):
2857 super(ParseElementEnhance,self).streamline() 2858 if self.expr is not None: 2859 self.expr.streamline() 2860 return self
2861
2862 - def checkRecursion( self, parseElementList ):
2863 if self in parseElementList: 2864 raise RecursiveGrammarException( parseElementList+[self] ) 2865 subRecCheckList = parseElementList[:] + [ self ] 2866 if self.expr is not None: 2867 self.expr.checkRecursion( subRecCheckList )
2868
2869 - def validate( self, validateTrace=[] ):
2870 tmp = validateTrace[:]+[self] 2871 if self.expr is not None: 2872 self.expr.validate(tmp) 2873 self.checkRecursion( [] )
2874
2875 - def __str__( self ):
2876 try: 2877 return super(ParseElementEnhance,self).__str__() 2878 except: 2879 pass 2880 2881 if self.strRepr is None and self.expr is not None: 2882 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2883 return self.strRepr
2884
2885 2886 -class FollowedBy(ParseElementEnhance):
2887 """Lookahead matching of the given parse expression. C{FollowedBy} 2888 does *not* advance the parsing position within the input string, it only 2889 verifies that the specified parse expression matches at the current 2890 position. C{FollowedBy} always returns a null token list."""
2891 - def __init__( self, expr ):
2892 super(FollowedBy,self).__init__(expr) 2893 self.mayReturnEmpty = True
2894
2895 - def parseImpl( self, instring, loc, doActions=True ):
2896 self.expr.tryParse( instring, loc ) 2897 return loc, []
2898
2899 2900 -class NotAny(ParseElementEnhance):
2901 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2902 does *not* advance the parsing position within the input string, it only 2903 verifies that the specified parse expression does *not* match at the current 2904 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2905 always returns a null token list. May be constructed using the '~' operator."""
2906 - def __init__( self, expr ):
2907 super(NotAny,self).__init__(expr) 2908 #~ self.leaveWhitespace() 2909 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2910 self.mayReturnEmpty = True 2911 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2912
2913 - def parseImpl( self, instring, loc, doActions=True ):
2914 if self.expr.canParseNext(instring, loc): 2915 raise ParseException(instring, loc, self.errmsg, self) 2916 return loc, []
2917
2918 - def __str__( self ):
2919 if hasattr(self,"name"): 2920 return self.name 2921 2922 if self.strRepr is None: 2923 self.strRepr = "~{" + _ustr(self.expr) + "}" 2924 2925 return self.strRepr
2926
2927 2928 -class OneOrMore(ParseElementEnhance):
2929 """Repetition of one or more of the given expression. 2930 2931 Parameters: 2932 - expr - expression that must match one or more times 2933 - stopOn - (default=None) - expression for a terminating sentinel 2934 (only required if the sentinel would ordinarily match the repetition 2935 expression) 2936 """
2937 - def __init__( self, expr, stopOn=None):
2938 super(OneOrMore, self).__init__(expr) 2939 ender = stopOn 2940 if isinstance(ender, basestring): 2941 ender = ParserElement._literalStringClass(ender) 2942 self.not_ender = ~ender if ender is not None else None
2943
2944 - def parseImpl( self, instring, loc, doActions=True ):
2945 self_expr_parse = self.expr._parse 2946 self_skip_ignorables = self._skipIgnorables 2947 check_ender = self.not_ender is not None 2948 if check_ender: 2949 try_not_ender = self.not_ender.tryParse 2950 2951 # must be at least one (but first see if we are the stopOn sentinel; 2952 # if so, fail) 2953 if check_ender: 2954 try_not_ender(instring, loc) 2955 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2956 try: 2957 hasIgnoreExprs = (not not self.ignoreExprs) 2958 while 1: 2959 if check_ender: 2960 try_not_ender(instring, loc) 2961 if hasIgnoreExprs: 2962 preloc = self_skip_ignorables( instring, loc ) 2963 else: 2964 preloc = loc 2965 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2966 if tmptokens or tmptokens.haskeys(): 2967 tokens += tmptokens 2968 except (ParseException,IndexError): 2969 pass 2970 2971 return loc, tokens
2972
2973 - def __str__( self ):
2974 if hasattr(self,"name"): 2975 return self.name 2976 2977 if self.strRepr is None: 2978 self.strRepr = "{" + _ustr(self.expr) + "}..." 2979 2980 return self.strRepr
2981
2982 - def setResultsName( self, name, listAllMatches=False ):
2983 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2984 ret.saveAsList = True 2985 return ret
2986
2987 -class ZeroOrMore(OneOrMore):
2988 """Optional repetition of zero or more of the given expression. 2989 2990 Parameters: 2991 - expr - expression that must match zero or more times 2992 - stopOn - (default=None) - expression for a terminating sentinel 2993 (only required if the sentinel would ordinarily match the repetition 2994 expression) 2995 """
2996 - def __init__( self, expr, stopOn=None):
2997 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2998 self.mayReturnEmpty = True
2999
3000 - def parseImpl( self, instring, loc, doActions=True ):
3001 try: 3002 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3003 except (ParseException,IndexError): 3004 return loc, []
3005
3006 - def __str__( self ):
3007 if hasattr(self,"name"): 3008 return self.name 3009 3010 if self.strRepr is None: 3011 self.strRepr = "[" + _ustr(self.expr) + "]..." 3012 3013 return self.strRepr
3014
3015 -class _NullToken(object):
3016 - def __bool__(self):
3017 return False
3018 __nonzero__ = __bool__
3019 - def __str__(self):
3020 return ""
3021 3022 _optionalNotMatched = _NullToken()
3023 -class Optional(ParseElementEnhance):
3024 """Optional matching of the given expression. 3025 3026 Parameters: 3027 - expr - expression that must match zero or more times 3028 - default (optional) - value to be returned if the optional expression 3029 is not found. 3030 """
3031 - def __init__( self, expr, default=_optionalNotMatched ):
3032 super(Optional,self).__init__( expr, savelist=False ) 3033 self.defaultValue = default 3034 self.mayReturnEmpty = True
3035
3036 - def parseImpl( self, instring, loc, doActions=True ):
3037 try: 3038 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3039 except (ParseException,IndexError): 3040 if self.defaultValue is not _optionalNotMatched: 3041 if self.expr.resultsName: 3042 tokens = ParseResults([ self.defaultValue ]) 3043 tokens[self.expr.resultsName] = self.defaultValue 3044 else: 3045 tokens = [ self.defaultValue ] 3046 else: 3047 tokens = [] 3048 return loc, tokens
3049
3050 - def __str__( self ):
3051 if hasattr(self,"name"): 3052 return self.name 3053 3054 if self.strRepr is None: 3055 self.strRepr = "[" + _ustr(self.expr) + "]" 3056 3057 return self.strRepr
3058
3059 -class SkipTo(ParseElementEnhance):
3060 """Token for skipping over all undefined text until the matched expression is found. 3061 3062 Parameters: 3063 - expr - target expression marking the end of the data to be skipped 3064 - include - (default=False) if True, the target expression is also parsed 3065 (the skipped text and target expression are returned as a 2-element list). 3066 - ignore - (default=None) used to define grammars (typically quoted strings and 3067 comments) that might contain false matches to the target expression 3068 - failOn - (default=None) define expressions that are not allowed to be 3069 included in the skipped test; if found before the target expression is found, 3070 the SkipTo is not a match 3071 """
3072 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3073 super( SkipTo, self ).__init__( other ) 3074 self.ignoreExpr = ignore 3075 self.mayReturnEmpty = True 3076 self.mayIndexError = False 3077 self.includeMatch = include 3078 self.asList = False 3079 if isinstance(failOn, basestring): 3080 self.failOn = ParserElement._literalStringClass(failOn) 3081 else: 3082 self.failOn = failOn 3083 self.errmsg = "No match found for "+_ustr(self.expr)
3084
3085 - def parseImpl( self, instring, loc, doActions=True ):
3086 startloc = loc 3087 instrlen = len(instring) 3088 expr = self.expr 3089 expr_parse = self.expr._parse 3090 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3091 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3092 3093 tmploc = loc 3094 while tmploc <= instrlen: 3095 if self_failOn_canParseNext is not None: 3096 # break if failOn expression matches 3097 if self_failOn_canParseNext(instring, tmploc): 3098 break 3099 3100 if self_ignoreExpr_tryParse is not None: 3101 # advance past ignore expressions 3102 while 1: 3103 try: 3104 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3105 except ParseBaseException: 3106 break 3107 3108 try: 3109 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3110 except (ParseException, IndexError): 3111 # no match, advance loc in string 3112 tmploc += 1 3113 else: 3114 # matched skipto expr, done 3115 break 3116 3117 else: 3118 # ran off the end of the input string without matching skipto expr, fail 3119 raise ParseException(instring, loc, self.errmsg, self) 3120 3121 # build up return values 3122 loc = tmploc 3123 skiptext = instring[startloc:loc] 3124 skipresult = ParseResults(skiptext) 3125 3126 if self.includeMatch: 3127 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3128 skipresult += mat 3129 3130 return loc, skipresult
3131
3132 -class Forward(ParseElementEnhance):
3133 """Forward declaration of an expression to be defined later - 3134 used for recursive grammars, such as algebraic infix notation. 3135 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3136 3137 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3138 Specifically, '|' has a lower precedence than '<<', so that:: 3139 fwdExpr << a | b | c 3140 will actually be evaluated as:: 3141 (fwdExpr << a) | b | c 3142 thereby leaving b and c out as parseable alternatives. It is recommended that you 3143 explicitly group the values inserted into the C{Forward}:: 3144 fwdExpr << (a | b | c) 3145 Converting to use the '<<=' operator instead will avoid this problem. 3146 """
3147 - def __init__( self, other=None ):
3148 super(Forward,self).__init__( other, savelist=False )
3149
3150 - def __lshift__( self, other ):
3151 if isinstance( other, basestring ): 3152 other = ParserElement._literalStringClass(other) 3153 self.expr = other 3154 self.strRepr = None 3155 self.mayIndexError = self.expr.mayIndexError 3156 self.mayReturnEmpty = self.expr.mayReturnEmpty 3157 self.setWhitespaceChars( self.expr.whiteChars ) 3158 self.skipWhitespace = self.expr.skipWhitespace 3159 self.saveAsList = self.expr.saveAsList 3160 self.ignoreExprs.extend(self.expr.ignoreExprs) 3161 return self
3162
3163 - def __ilshift__(self, other):
3164 return self << other
3165
3166 - def leaveWhitespace( self ):
3167 self.skipWhitespace = False 3168 return self
3169
3170 - def streamline( self ):
3171 if not self.streamlined: 3172 self.streamlined = True 3173 if self.expr is not None: 3174 self.expr.streamline() 3175 return self
3176
3177 - def validate( self, validateTrace=[] ):
3178 if self not in validateTrace: 3179 tmp = validateTrace[:]+[self] 3180 if self.expr is not None: 3181 self.expr.validate(tmp) 3182 self.checkRecursion([])
3183
3184 - def __str__( self ):
3185 if hasattr(self,"name"): 3186 return self.name 3187 return self.__class__.__name__ + ": ..." 3188 3189 # stubbed out for now - creates awful memory and perf issues 3190 self._revertClass = self.__class__ 3191 self.__class__ = _ForwardNoRecurse 3192 try: 3193 if self.expr is not None: 3194 retString = _ustr(self.expr) 3195 else: 3196 retString = "None" 3197 finally: 3198 self.__class__ = self._revertClass 3199 return self.__class__.__name__ + ": " + retString
3200
3201 - def copy(self):
3202 if self.expr is not None: 3203 return super(Forward,self).copy() 3204 else: 3205 ret = Forward() 3206 ret <<= self 3207 return ret
3208
3209 -class _ForwardNoRecurse(Forward):
3210 - def __str__( self ):
3211 return "..."
3212
3213 -class TokenConverter(ParseElementEnhance):
3214 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3215 - def __init__( self, expr, savelist=False ):
3216 super(TokenConverter,self).__init__( expr )#, savelist ) 3217 self.saveAsList = False
3218
3219 -class Combine(TokenConverter):
3220 """Converter to concatenate all matching tokens to a single string. 3221 By default, the matching patterns must also be contiguous in the input string; 3222 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3223 """
3224 - def __init__( self, expr, joinString="", adjacent=True ):
3225 super(Combine,self).__init__( expr ) 3226 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3227 if adjacent: 3228 self.leaveWhitespace() 3229 self.adjacent = adjacent 3230 self.skipWhitespace = True 3231 self.joinString = joinString 3232 self.callPreparse = True
3233
3234 - def ignore( self, other ):
3235 if self.adjacent: 3236 ParserElement.ignore(self, other) 3237 else: 3238 super( Combine, self).ignore( other ) 3239 return self
3240
3241 - def postParse( self, instring, loc, tokenlist ):
3242 retToks = tokenlist.copy() 3243 del retToks[:] 3244 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3245 3246 if self.resultsName and retToks.haskeys(): 3247 return [ retToks ] 3248 else: 3249 return retToks
3250
3251 -class Group(TokenConverter):
3252 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3253 - def __init__( self, expr ):
3254 super(Group,self).__init__( expr ) 3255 self.saveAsList = True
3256
3257 - def postParse( self, instring, loc, tokenlist ):
3258 return [ tokenlist ]
3259
3260 -class Dict(TokenConverter):
3261 """Converter to return a repetitive expression as a list, but also as a dictionary. 3262 Each element can also be referenced using the first token in the expression as its key. 3263 Useful for tabular report scraping when the first column can be used as a item key. 3264 """
3265 - def __init__( self, expr ):
3266 super(Dict,self).__init__( expr ) 3267 self.saveAsList = True
3268
3269 - def postParse( self, instring, loc, tokenlist ):
3270 for i,tok in enumerate(tokenlist): 3271 if len(tok) == 0: 3272 continue 3273 ikey = tok[0] 3274 if isinstance(ikey,int): 3275 ikey = _ustr(tok[0]).strip() 3276 if len(tok)==1: 3277 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3278 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3279 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3280 else: 3281 dictvalue = tok.copy() #ParseResults(i) 3282 del dictvalue[0] 3283 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3284 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3285 else: 3286 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3287 3288 if self.resultsName: 3289 return [ tokenlist ] 3290 else: 3291 return tokenlist
3292
3293 3294 -class Suppress(TokenConverter):
3295 """Converter for ignoring the results of a parsed expression."""
3296 - def postParse( self, instring, loc, tokenlist ):
3297 return []
3298
3299 - def suppress( self ):
3300 return self
3301
3302 3303 -class OnlyOnce(object):
3304 """Wrapper for parse actions, to ensure they are only called once."""
3305 - def __init__(self, methodCall):
3306 self.callable = _trim_arity(methodCall) 3307 self.called = False
3308 - def __call__(self,s,l,t):
3309 if not self.called: 3310 results = self.callable(s,l,t) 3311 self.called = True 3312 return results 3313 raise ParseException(s,l,"")
3314 - def reset(self):
3315 self.called = False
3316
3317 -def traceParseAction(f):
3318 """Decorator for debugging parse actions.""" 3319 f = _trim_arity(f) 3320 def z(*paArgs): 3321 thisFunc = f.__name__ 3322 s,l,t = paArgs[-3:] 3323 if len(paArgs)>3: 3324 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3325 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3326 try: 3327 ret = f(*paArgs) 3328 except Exception as exc: 3329 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3330 raise 3331 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3332 return ret
3333 try: 3334 z.__name__ = f.__name__ 3335 except AttributeError: 3336 pass 3337 return z 3338
3339 # 3340 # global helpers 3341 # 3342 -def delimitedList( expr, delim=",", combine=False ):
3343 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3344 By default, the list elements and delimiters can have intervening whitespace, and 3345 comments, but this can be overridden by passing C{combine=True} in the constructor. 3346 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3347 string, with the delimiters included; otherwise, the matching tokens are returned 3348 as a list of tokens, with the delimiters suppressed. 3349 """ 3350 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3351 if combine: 3352 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3353 else: 3354 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3355
3356 -def countedArray( expr, intExpr=None ):
3357 """Helper to define a counted list of expressions. 3358 This helper defines a pattern of the form:: 3359 integer expr expr expr... 3360 where the leading integer tells how many expr expressions follow. 3361 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3362 """ 3363 arrayExpr = Forward() 3364 def countFieldParseAction(s,l,t): 3365 n = t[0] 3366 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3367 return []
3368 if intExpr is None: 3369 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3370 else: 3371 intExpr = intExpr.copy() 3372 intExpr.setName("arrayLen") 3373 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3374 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3375
3376 -def _flatten(L):
3377 ret = [] 3378 for i in L: 3379 if isinstance(i,list): 3380 ret.extend(_flatten(i)) 3381 else: 3382 ret.append(i) 3383 return ret
3384
3385 -def matchPreviousLiteral(expr):
3386 """Helper to define an expression that is indirectly defined from 3387 the tokens matched in a previous expression, that is, it looks 3388 for a 'repeat' of a previous expression. For example:: 3389 first = Word(nums) 3390 second = matchPreviousLiteral(first) 3391 matchExpr = first + ":" + second 3392 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3393 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3394 If this is not desired, use C{matchPreviousExpr}. 3395 Do *not* use with packrat parsing enabled. 3396 """ 3397 rep = Forward() 3398 def copyTokenToRepeater(s,l,t): 3399 if t: 3400 if len(t) == 1: 3401 rep << t[0] 3402 else: 3403 # flatten t tokens 3404 tflat = _flatten(t.asList()) 3405 rep << And(Literal(tt) for tt in tflat) 3406 else: 3407 rep << Empty()
3408 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3409 rep.setName('(prev) ' + _ustr(expr)) 3410 return rep 3411
3412 -def matchPreviousExpr(expr):
3413 """Helper to define an expression that is indirectly defined from 3414 the tokens matched in a previous expression, that is, it looks 3415 for a 'repeat' of a previous expression. For example:: 3416 first = Word(nums) 3417 second = matchPreviousExpr(first) 3418 matchExpr = first + ":" + second 3419 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3420 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3421 the expressions are evaluated first, and then compared, so 3422 C{"1"} is compared with C{"10"}. 3423 Do *not* use with packrat parsing enabled. 3424 """ 3425 rep = Forward() 3426 e2 = expr.copy() 3427 rep <<= e2 3428 def copyTokenToRepeater(s,l,t): 3429 matchTokens = _flatten(t.asList()) 3430 def mustMatchTheseTokens(s,l,t): 3431 theseTokens = _flatten(t.asList()) 3432 if theseTokens != matchTokens: 3433 raise ParseException("",0,"")
3434 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3435 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3436 rep.setName('(prev) ' + _ustr(expr)) 3437 return rep 3438
3439 -def _escapeRegexRangeChars(s):
3440 #~ escape these chars: ^-] 3441 for c in r"\^-]": 3442 s = s.replace(c,_bslash+c) 3443 s = s.replace("\n",r"\n") 3444 s = s.replace("\t",r"\t") 3445 return _ustr(s)
3446
3447 -def oneOf( strs, caseless=False, useRegex=True ):
3448 """Helper to quickly define a set of alternative Literals, and makes sure to do 3449 longest-first testing when there is a conflict, regardless of the input order, 3450 but returns a C{L{MatchFirst}} for best performance. 3451 3452 Parameters: 3453 - strs - a string of space-delimited literals, or a list of string literals 3454 - caseless - (default=False) - treat all literals as caseless 3455 - useRegex - (default=True) - as an optimization, will generate a Regex 3456 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3457 if creating a C{Regex} raises an exception) 3458 """ 3459 if caseless: 3460 isequal = ( lambda a,b: a.upper() == b.upper() ) 3461 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3462 parseElementClass = CaselessLiteral 3463 else: 3464 isequal = ( lambda a,b: a == b ) 3465 masks = ( lambda a,b: b.startswith(a) ) 3466 parseElementClass = Literal 3467 3468 symbols = [] 3469 if isinstance(strs,basestring): 3470 symbols = strs.split() 3471 elif isinstance(strs, collections.Sequence): 3472 symbols = list(strs[:]) 3473 elif isinstance(strs, _generatorType): 3474 symbols = list(strs) 3475 else: 3476 warnings.warn("Invalid argument to oneOf, expected string or list", 3477 SyntaxWarning, stacklevel=2) 3478 if not symbols: 3479 return NoMatch() 3480 3481 i = 0 3482 while i < len(symbols)-1: 3483 cur = symbols[i] 3484 for j,other in enumerate(symbols[i+1:]): 3485 if ( isequal(other, cur) ): 3486 del symbols[i+j+1] 3487 break 3488 elif ( masks(cur, other) ): 3489 del symbols[i+j+1] 3490 symbols.insert(i,other) 3491 cur = other 3492 break 3493 else: 3494 i += 1 3495 3496 if not caseless and useRegex: 3497 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3498 try: 3499 if len(symbols)==len("".join(symbols)): 3500 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3501 else: 3502 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3503 except: 3504 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3505 SyntaxWarning, stacklevel=2) 3506 3507 3508 # last resort, just use MatchFirst 3509 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3510
3511 -def dictOf( key, value ):
3512 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3513 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3514 in the proper order. The key pattern can include delimiting markers or punctuation, 3515 as long as they are suppressed, thereby leaving the significant key text. The value 3516 pattern can include named results, so that the C{Dict} results can include named token 3517 fields. 3518 """ 3519 return Dict( ZeroOrMore( Group ( key + value ) ) )
3520
3521 -def originalTextFor(expr, asString=True):
3522 """Helper to return the original, untokenized text for a given expression. Useful to 3523 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3524 revert separate tokens with intervening whitespace back to the original matching 3525 input text. By default, returns astring containing the original parsed text. 3526 3527 If the optional C{asString} argument is passed as C{False}, then the return value is a 3528 C{L{ParseResults}} containing any results names that were originally matched, and a 3529 single token containing the original matched text from the input string. So if 3530 the expression passed to C{L{originalTextFor}} contains expressions with defined 3531 results names, you must set C{asString} to C{False} if you want to preserve those 3532 results name values.""" 3533 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3534 endlocMarker = locMarker.copy() 3535 endlocMarker.callPreparse = False 3536 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3537 if asString: 3538 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3539 else: 3540 def extractText(s,l,t): 3541 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3542 matchExpr.setParseAction(extractText) 3543 matchExpr.ignoreExprs = expr.ignoreExprs 3544 return matchExpr 3545
3546 -def ungroup(expr):
3547 """Helper to undo pyparsing's default grouping of And expressions, even 3548 if all but one are non-empty.""" 3549 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3550
3551 -def locatedExpr(expr):
3552 """Helper to decorate a returned token with its starting and ending locations in the input string. 3553 This helper adds the following results names: 3554 - locn_start = location where matched expression begins 3555 - locn_end = location where matched expression ends 3556 - value = the actual parsed results 3557 3558 Be careful if the input text contains C{<TAB>} characters, you may want to call 3559 C{L{ParserElement.parseWithTabs}} 3560 """ 3561 locator = Empty().setParseAction(lambda s,l,t: l) 3562 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3563 3564 3565 # convenience constants for positional expressions 3566 empty = Empty().setName("empty") 3567 lineStart = LineStart().setName("lineStart") 3568 lineEnd = LineEnd().setName("lineEnd") 3569 stringStart = StringStart().setName("stringStart") 3570 stringEnd = StringEnd().setName("stringEnd") 3571 3572 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3573 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3574 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3575 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3576 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3577 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3578 3579 -def srange(s):
3580 r"""Helper to easily define string ranges for use in Word construction. Borrows 3581 syntax from regexp '[]' string range definitions:: 3582 srange("[0-9]") -> "0123456789" 3583 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3584 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3585 The input string must be enclosed in []'s, and the returned string is the expanded 3586 character set joined into a single string. 3587 The values enclosed in the []'s may be:: 3588 a single character 3589 an escaped character with a leading backslash (such as \- or \]) 3590 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3591 (\0x## is also supported for backwards compatibility) 3592 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3593 a range of any of the above, separated by a dash ('a-z', etc.) 3594 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3595 """ 3596 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3597 try: 3598 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3599 except: 3600 return ""
3601
3602 -def matchOnlyAtCol(n):
3603 """Helper method for defining parse actions that require matching at a specific 3604 column in the input text. 3605 """ 3606 def verifyCol(strg,locn,toks): 3607 if col(locn,strg) != n: 3608 raise ParseException(strg,locn,"matched token not at column %d" % n)
3609 return verifyCol 3610
3611 -def replaceWith(replStr):
3612 """Helper method for common parse actions that simply return a literal value. Especially 3613 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3614 """ 3615 return lambda s,l,t: [replStr]
3616
3617 -def removeQuotes(s,l,t):
3618 """Helper parse action for removing quotation marks from parsed quoted strings. 3619 To use, add this parse action to quoted string using:: 3620 quotedString.setParseAction( removeQuotes ) 3621 """ 3622 return t[0][1:-1]
3623
3624 -def tokenMap(func, *args):
3625 """Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 3626 args are passed, they are forwarded to the given function as additional arguments after 3627 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 3628 parsed data to an integer using base 16. 3629 """ 3630 def pa(s,l,t): 3631 t[:] = [func(tokn, *args) for tokn in t]
3632 3633 try: 3634 func_name = getattr(func, '__name__', 3635 getattr(func, '__class__').__name__) 3636 except Exception: 3637 func_name = str(func) 3638 pa.__name__ = func_name 3639 3640 return pa 3641 3642 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 3643 """Helper parse action to convert tokens to upper case.""" 3644 3645 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 3646 """Helper parse action to convert tokens to lower case."""
3647 3648 -def _makeTags(tagStr, xml):
3649 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3650 if isinstance(tagStr,basestring): 3651 resname = tagStr 3652 tagStr = Keyword(tagStr, caseless=not xml) 3653 else: 3654 resname = tagStr.name 3655 3656 tagAttrName = Word(alphas,alphanums+"_-:") 3657 if (xml): 3658 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3659 openTag = Suppress("<") + tagStr("tag") + \ 3660 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3661 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3662 else: 3663 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3664 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3665 openTag = Suppress("<") + tagStr("tag") + \ 3666 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3667 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3668 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3669 closeTag = Combine(_L("</") + tagStr + ">") 3670 3671 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3672 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3673 openTag.tag = resname 3674 closeTag.tag = resname 3675 return openTag, closeTag
3676
3677 -def makeHTMLTags(tagStr):
3678 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3679 return _makeTags( tagStr, False )
3680
3681 -def makeXMLTags(tagStr):
3682 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3683 return _makeTags( tagStr, True )
3684
3685 -def withAttribute(*args,**attrDict):
3686 """Helper to create a validating parse action to be used with start tags created 3687 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3688 with a required attribute value, to avoid false matches on common tags such as 3689 C{<TD>} or C{<DIV>}. 3690 3691 Call C{withAttribute} with a series of attribute names and values. Specify the list 3692 of filter attributes names and values as: 3693 - keyword arguments, as in C{(align="right")}, or 3694 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3695 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3696 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3697 For attribute names with a namespace prefix, you must use the second form. Attribute 3698 names are matched insensitive to upper/lower case. 3699 3700 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3701 3702 To verify that the attribute exists, but without specifying a value, pass 3703 C{withAttribute.ANY_VALUE} as the value. 3704 """ 3705 if args: 3706 attrs = args[:] 3707 else: 3708 attrs = attrDict.items() 3709 attrs = [(k,v) for k,v in attrs] 3710 def pa(s,l,tokens): 3711 for attrName,attrValue in attrs: 3712 if attrName not in tokens: 3713 raise ParseException(s,l,"no matching attribute " + attrName) 3714 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3715 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3716 (attrName, tokens[attrName], attrValue))
3717 return pa 3718 withAttribute.ANY_VALUE = object()
3719 3720 -def withClass(classname, namespace=''):
3721 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3722 difficult because C{class} is a reserved word in Python. 3723 """ 3724 classattr = "%s:class" % namespace if namespace else "class" 3725 return withAttribute(**{classattr : classname})
3726 3727 opAssoc = _Constants() 3728 opAssoc.LEFT = object() 3729 opAssoc.RIGHT = object()
3730 3731 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3732 """Helper method for constructing grammars of expressions made up of 3733 operators working in a precedence hierarchy. Operators may be unary or 3734 binary, left- or right-associative. Parse actions can also be attached 3735 to operator expressions. 3736 3737 Parameters: 3738 - baseExpr - expression representing the most basic element for the nested 3739 - opList - list of tuples, one for each operator precedence level in the 3740 expression grammar; each tuple is of the form 3741 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3742 - opExpr is the pyparsing expression for the operator; 3743 may also be a string, which will be converted to a Literal; 3744 if numTerms is 3, opExpr is a tuple of two expressions, for the 3745 two operators separating the 3 terms 3746 - numTerms is the number of terms for this operator (must 3747 be 1, 2, or 3) 3748 - rightLeftAssoc is the indicator whether the operator is 3749 right or left associative, using the pyparsing-defined 3750 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3751 - parseAction is the parse action to be associated with 3752 expressions matching this operator expression (the 3753 parse action tuple member may be omitted) 3754 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3755 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3756 """ 3757 ret = Forward() 3758 lastExpr = baseExpr | ( lpar + ret + rpar ) 3759 for i,operDef in enumerate(opList): 3760 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3761 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3762 if arity == 3: 3763 if opExpr is None or len(opExpr) != 2: 3764 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3765 opExpr1, opExpr2 = opExpr 3766 thisExpr = Forward().setName(termName) 3767 if rightLeftAssoc == opAssoc.LEFT: 3768 if arity == 1: 3769 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3770 elif arity == 2: 3771 if opExpr is not None: 3772 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3773 else: 3774 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3775 elif arity == 3: 3776 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3777 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3778 else: 3779 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3780 elif rightLeftAssoc == opAssoc.RIGHT: 3781 if arity == 1: 3782 # try to avoid LR with this extra test 3783 if not isinstance(opExpr, Optional): 3784 opExpr = Optional(opExpr) 3785 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3786 elif arity == 2: 3787 if opExpr is not None: 3788 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3789 else: 3790 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3791 elif arity == 3: 3792 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3793 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3794 else: 3795 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3796 else: 3797 raise ValueError("operator must indicate right or left associativity") 3798 if pa: 3799 matchExpr.setParseAction( pa ) 3800 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3801 lastExpr = thisExpr 3802 ret <<= lastExpr 3803 return ret
3804 3805 operatorPrecedence = infixNotation 3806 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 3807 3808 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 3809 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 3810 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 3811 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 3812 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3813 3814 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3815 """Helper method for defining nested lists enclosed in opening and closing 3816 delimiters ("(" and ")" are the default). 3817 3818 Parameters: 3819 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3820 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3821 - content - expression for items within the nested lists (default=None) 3822 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3823 3824 If an expression is not provided for the content argument, the nested 3825 expression will capture all whitespace-delimited content between delimiters 3826 as a list of separate values. 3827 3828 Use the C{ignoreExpr} argument to define expressions that may contain 3829 opening or closing characters that should not be treated as opening 3830 or closing characters for nesting, such as quotedString or a comment 3831 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3832 The default is L{quotedString}, but if no expressions are to be ignored, 3833 then pass C{None} for this argument. 3834 """ 3835 if opener == closer: 3836 raise ValueError("opening and closing strings cannot be the same") 3837 if content is None: 3838 if isinstance(opener,basestring) and isinstance(closer,basestring): 3839 if len(opener) == 1 and len(closer)==1: 3840 if ignoreExpr is not None: 3841 content = (Combine(OneOrMore(~ignoreExpr + 3842 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3843 ).setParseAction(lambda t:t[0].strip())) 3844 else: 3845 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3846 ).setParseAction(lambda t:t[0].strip())) 3847 else: 3848 if ignoreExpr is not None: 3849 content = (Combine(OneOrMore(~ignoreExpr + 3850 ~Literal(opener) + ~Literal(closer) + 3851 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3852 ).setParseAction(lambda t:t[0].strip())) 3853 else: 3854 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3855 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3856 ).setParseAction(lambda t:t[0].strip())) 3857 else: 3858 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3859 ret = Forward() 3860 if ignoreExpr is not None: 3861 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3862 else: 3863 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3864 ret.setName('nested %s%s expression' % (opener,closer)) 3865 return ret
3866
3867 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3868 """Helper method for defining space-delimited indentation blocks, such as 3869 those used to define block statements in Python source code. 3870 3871 Parameters: 3872 - blockStatementExpr - expression defining syntax of statement that 3873 is repeated within the indented block 3874 - indentStack - list created by caller to manage indentation stack 3875 (multiple statementWithIndentedBlock expressions within a single grammar 3876 should share a common indentStack) 3877 - indent - boolean indicating whether block must be indented beyond the 3878 the current level; set to False for block of left-most statements 3879 (default=True) 3880 3881 A valid block must contain at least one C{blockStatement}. 3882 """ 3883 def checkPeerIndent(s,l,t): 3884 if l >= len(s): return 3885 curCol = col(l,s) 3886 if curCol != indentStack[-1]: 3887 if curCol > indentStack[-1]: 3888 raise ParseFatalException(s,l,"illegal nesting") 3889 raise ParseException(s,l,"not a peer entry")
3890 3891 def checkSubIndent(s,l,t): 3892 curCol = col(l,s) 3893 if curCol > indentStack[-1]: 3894 indentStack.append( curCol ) 3895 else: 3896 raise ParseException(s,l,"not a subentry") 3897 3898 def checkUnindent(s,l,t): 3899 if l >= len(s): return 3900 curCol = col(l,s) 3901 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3902 raise ParseException(s,l,"not an unindent") 3903 indentStack.pop() 3904 3905 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3906 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3907 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3908 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3909 if indent: 3910 smExpr = Group( Optional(NL) + 3911 #~ FollowedBy(blockStatementExpr) + 3912 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3913 else: 3914 smExpr = Group( Optional(NL) + 3915 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3916 blockStatementExpr.ignore(_bslash + LineEnd()) 3917 return smExpr.setName('indented block') 3918 3919 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3920 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3921 3922 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3923 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3924 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3925 -def replaceHTMLEntity(t):
3926 """Helper parser action to replace common HTML entities with their special characters""" 3927 return _htmlEntityMap.get(t.entity)
3928 3929 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3930 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 3931 "Comment of the form C{/* ... */}" 3932 3933 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3934 "Comment of the form C{<!-- ... -->}" 3935 3936 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3937 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 3938 "Comment of the form C{// ... (to end of line)}" 3939 3940 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 3941 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 3942 3943 javaStyleComment = cppStyleComment 3944 "Same as C{L{cppStyleComment}}" 3945 3946 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3947 "Comment of the form C{# ... (to end of line)}" 3948 3949 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3950 Optional( Word(" \t") + 3951 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3952 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3953 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
3954 3955 # some other useful expressions - using lower-case class name since we are really using this as a namespace 3956 -class pyparsing_common:
3957 """ 3958 Here are some common low-level expressions that may be useful in jump-starting parser development: 3959 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>}) 3960 - common L{programming identifiers<identifier>} 3961 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 3962 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 3963 - L{UUID<uuid>} 3964 Parse actions: 3965 - C{L{convertToInteger}} 3966 - C{L{convertToFloat}} 3967 - C{L{convertToDate}} 3968 - C{L{convertToDatetime}} 3969 - C{L{stripHTMLTags}} 3970 """ 3971 3972 convertToInteger = tokenMap(int) 3973 """ 3974 Parse action for converting parsed integers to Python int 3975 """ 3976 3977 convertToFloat = tokenMap(float) 3978 """ 3979 Parse action for converting parsed numbers to Python float 3980 """ 3981 3982 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 3983 """expression that parses an unsigned integer, returns an int""" 3984 3985 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 3986 """expression that parses a hexadecimal integer, returns an int""" 3987 3988 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 3989 """expression that parses an integer with optional leading sign, returns an int""" 3990 3991 fraction = (signedInteger.addParseAction(convertToFloat) + '/' + signedInteger.addParseAction(convertToFloat)).setName("fraction") 3992 """fractional expression of an integer divided by an integer, returns a float""" 3993 fraction.addParseAction(lambda t: t[0]/t[-1]) 3994 3995 mixed_integer = (fraction | integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 3996 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 3997 mixed_integer.addParseAction(sum) 3998 3999 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 4000 """expression that parses a floating point number and returns a float""" 4001 4002 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 4003 """expression that parses a floating point number with optional scientific notation and returns a float""" 4004 4005 # streamlining this expression makes the docs nicer-looking 4006 numeric = (sciReal | real | signedInteger).streamline() 4007 """any numeric expression, returns the corresponding Python type""" 4008 4009 number = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("number").setParseAction(convertToFloat) 4010 """any int or real number, returned as float""" 4011 4012 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 4013 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 4014 4015 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 4016 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 4017 4018 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 4019 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 4020 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 4021 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 4022 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 4023 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 4024 "IPv6 address (long, short, or mixed form)" 4025 4026 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 4027 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 4028 4029 @staticmethod
4030 - def convertToDate(fmt="%Y-%m-%d"):
4031 """ 4032 Helper to create a parse action for converting parsed date string to Python datetime.date 4033 4034 Params - 4035 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 4036 """ 4037 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
4038 4039 @staticmethod
4040 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
4041 """ 4042 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 4043 4044 Params - 4045 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 4046 """ 4047 return lambda s,l,t: datetime.strptime(t[0], fmt)
4048 4049 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 4050 "ISO8601 date (C{yyyy-mm-dd})" 4051 4052 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 4053 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 4054 4055 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 4056 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 4057 4058 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 4059 @staticmethod
4060 - def stripHTMLTags(s, l, tokens):
4061 """Parse action to remove HTML tags from web page HTML source""" 4062 return pyparsing_common._html_stripper.transformString(tokens[0])
4063 4064 if __name__ == "__main__": 4065 4066 selectToken = CaselessLiteral("select") 4067 fromToken = CaselessLiteral("from") 4068 4069 ident = Word(alphas, alphanums + "_$") 4070 4071 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 4072 columnNameList = Group(delimitedList(columnName)).setName("columns") 4073 columnSpec = ('*' | columnNameList) 4074 4075 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 4076 tableNameList = Group(delimitedList(tableName)).setName("tables") 4077 4078 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 4079 4080 # demo runTests method, including embedded comments in test string 4081 simpleSQL.runTests(""" 4082 # '*' as column list and dotted table name 4083 select * from SYS.XYZZY 4084 4085 # caseless match on "SELECT", and casts back to "select" 4086 SELECT * from XYZZY, ABC 4087 4088 # list of column names, and mixed case SELECT keyword 4089 Select AA,BB,CC from Sys.dual 4090 4091 # multiple tables 4092 Select A, B, C from Sys.dual, Table2 4093 4094 # invalid SELECT keyword - should fail 4095 Xelect A, B, C from Sys.dual 4096 4097 # incomplete command - should fail 4098 Select 4099 4100 # invalid column name - should fail 4101 Select ^^^ frox Sys.dual 4102 4103 """) 4104 4105 pyparsing_common.numeric.runTests(""" 4106 100 4107 -100 4108 +100 4109 3.14159 4110 6.02e23 4111 1e-12 4112 """) 4113 4114 # any int or real number, returned as float 4115 pyparsing_common.number.runTests(""" 4116 100 4117 -100 4118 +100 4119 3.14159 4120 6.02e23 4121 1e-12 4122 """) 4123 4124 pyparsing_common.hex_integer.runTests(""" 4125 100 4126 FF 4127 """) 4128 4129 import uuid 4130 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 4131 pyparsing_common.uuid.runTests(""" 4132 12345678-1234-5678-1234-567812345678 4133 """) 4134