Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2011  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.6" 
  62  __versionTime__ = "1 May 2011 23:41" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104      alphas = string.ascii_lowercase + string.ascii_uppercase 
 105  else: 
 106      _MAX_INT = sys.maxint 
 107      range = xrange 
 108      set = lambda s : dict( [(c,0) for c in s] ) 
 109      alphas = string.lowercase + string.uppercase 
 110   
111 - def _ustr(obj):
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 114 then < returns the unicode object | encodes it with the default encoding | ... >. 115 """ 116 if isinstance(obj,unicode): 117 return obj 118 119 try: 120 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 121 # it won't break any existing code. 122 return str(obj) 123 124 except UnicodeEncodeError: 125 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 126 # state that "The return value must be a string object". However, does a 127 # unicode object (being a subclass of basestring) count as a "string 128 # object"? 129 # If so, then return a unicode object: 130 return unicode(obj)
131 # Else encode it... but how? There are many choices... :) 132 # Replace unprintables with escape codes? 133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 134 # Replace unprintables with question marks? 135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 136 # ... 137 138 alphas = string.lowercase + string.uppercase 139 140 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 141 singleArgBuiltins = [] 142 import __builtin__ 143 for fname in "sum len enumerate sorted reversed list tuple set any all".split(): 144 try: 145 singleArgBuiltins.append(getattr(__builtin__,fname)) 146 except AttributeError: 147 continue 148
149 -def _xml_escape(data):
150 """Escape &, <, >, ", ', etc. in a string of data.""" 151 152 # ampersand must be replaced first 153 from_symbols = '&><"\'' 154 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 155 for from_,to_ in zip(from_symbols, to_symbols): 156 data = data.replace(from_, to_) 157 return data
158
159 -class _Constants(object):
160 pass
161 162 nums = string.digits 163 hexnums = nums + "ABCDEFabcdef" 164 alphanums = alphas + nums 165 _bslash = chr(92) 166 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 167
168 -class ParseBaseException(Exception):
169 """base exception class for all parsing runtime exceptions""" 170 # Performance tuning: we construct a *lot* of these, so keep this 171 # constructor as small and fast as possible
172 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
173 self.loc = loc 174 if msg is None: 175 self.msg = pstr 176 self.pstr = "" 177 else: 178 self.msg = msg 179 self.pstr = pstr 180 self.parserElement = elem
181
182 - def __getattr__( self, aname ):
183 """supported attributes by name are: 184 - lineno - returns the line number of the exception text 185 - col - returns the column number of the exception text 186 - line - returns the line containing the exception text 187 """ 188 if( aname == "lineno" ): 189 return lineno( self.loc, self.pstr ) 190 elif( aname in ("col", "column") ): 191 return col( self.loc, self.pstr ) 192 elif( aname == "line" ): 193 return line( self.loc, self.pstr ) 194 else: 195 raise AttributeError(aname)
196
197 - def __str__( self ):
198 return "%s (at char %d), (line:%d, col:%d)" % \ 199 ( self.msg, self.loc, self.lineno, self.column )
200 - def __repr__( self ):
201 return _ustr(self)
202 - def markInputline( self, markerString = ">!<" ):
203 """Extracts the exception line from the input string, and marks 204 the location of the exception with a special symbol. 205 """ 206 line_str = self.line 207 line_column = self.column - 1 208 if markerString: 209 line_str = "".join( [line_str[:line_column], 210 markerString, line_str[line_column:]]) 211 return line_str.strip()
212 - def __dir__(self):
213 return "loc msg pstr parserElement lineno col line " \ 214 "markInputLine __str__ __repr__".split()
215
216 -class ParseException(ParseBaseException):
217 """exception thrown when parse expressions don't match class; 218 supported attributes by name are: 219 - lineno - returns the line number of the exception text 220 - col - returns the column number of the exception text 221 - line - returns the line containing the exception text 222 """ 223 pass
224
225 -class ParseFatalException(ParseBaseException):
226 """user-throwable exception thrown when inconsistent parse content 227 is found; stops all parsing immediately""" 228 pass
229
230 -class ParseSyntaxException(ParseFatalException):
231 """just like C{ParseFatalException}, but thrown internally when an 232 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because 233 an unbacktrackable syntax error has been found"""
234 - def __init__(self, pe):
235 super(ParseSyntaxException, self).__init__( 236 pe.pstr, pe.loc, pe.msg, pe.parserElement)
237 238 #~ class ReparseException(ParseBaseException): 239 #~ """Experimental class - parse actions can raise this exception to cause 240 #~ pyparsing to reparse the input string: 241 #~ - with a modified input string, and/or 242 #~ - with a modified start location 243 #~ Set the values of the ReparseException in the constructor, and raise the 244 #~ exception in a parse action to cause pyparsing to use the new string/location. 245 #~ Setting the values as None causes no change to be made. 246 #~ """ 247 #~ def __init_( self, newstring, restartLoc ): 248 #~ self.newParseText = newstring 249 #~ self.reparseLoc = restartLoc 250
251 -class RecursiveGrammarException(Exception):
252 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
253 - def __init__( self, parseElementList ):
254 self.parseElementTrace = parseElementList
255
256 - def __str__( self ):
257 return "RecursiveGrammarException: %s" % self.parseElementTrace
258
259 -class _ParseResultsWithOffset(object):
260 - def __init__(self,p1,p2):
261 self.tup = (p1,p2)
262 - def __getitem__(self,i):
263 return self.tup[i]
264 - def __repr__(self):
265 return repr(self.tup)
266 - def setOffset(self,i):
267 self.tup = (self.tup[0],i)
268
269 -class ParseResults(object):
270 """Structured parse results, to provide multiple means of access to the parsed data: 271 - as a list (C{len(results)}) 272 - by list index (C{results[0], results[1]}, etc.) 273 - by attribute (C{results.<resultsName>}) 274 """ 275 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
276 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
277 if isinstance(toklist, cls): 278 return toklist 279 retobj = object.__new__(cls) 280 retobj.__doinit = True 281 return retobj
282 283 # Performance tuning: we construct a *lot* of these, so keep this 284 # constructor as small and fast as possible
285 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
286 if self.__doinit: 287 self.__doinit = False 288 self.__name = None 289 self.__parent = None 290 self.__accumNames = {} 291 if isinstance(toklist, list): 292 self.__toklist = toklist[:] 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not toklist in (None,'',[]): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,int): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 for name in self.__tokdict: 355 occurrences = self.__tokdict[name] 356 for j in removed: 357 for k, (value, position) in enumerate(occurrences): 358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 else: 360 del self.__tokdict[i]
361
362 - def __contains__( self, k ):
363 return k in self.__tokdict
364
365 - def __len__( self ): return len( self.__toklist )
366 - def __bool__(self): return len( self.__toklist ) > 0
367 __nonzero__ = __bool__
368 - def __iter__( self ): return iter( self.__toklist )
369 - def __reversed__( self ): return iter( self.__toklist[::-1] )
370 - def keys( self ):
371 """Returns all named result keys.""" 372 return self.__tokdict.keys()
373
374 - def pop( self, index=-1 ):
375 """Removes and returns item at specified index (default=last). 376 Will work with either numeric indices or dict-key indicies.""" 377 ret = self[index] 378 del self[index] 379 return ret
380
381 - def get(self, key, defaultValue=None):
382 """Returns named result matching the given key, or if there is no 383 such name, then returns the given C{defaultValue} or C{None} if no 384 C{defaultValue} is specified.""" 385 if key in self: 386 return self[key] 387 else: 388 return defaultValue
389
390 - def insert( self, index, insStr ):
391 """Inserts new element at location index in the list of parsed tokens.""" 392 self.__toklist.insert(index, insStr) 393 # fixup indices in token dictionary 394 for name in self.__tokdict: 395 occurrences = self.__tokdict[name] 396 for k, (value, position) in enumerate(occurrences): 397 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
398
399 - def items( self ):
400 """Returns all named result keys and values as a list of tuples.""" 401 return [(k,self[k]) for k in self.__tokdict]
402
403 - def values( self ):
404 """Returns all named result values.""" 405 return [ v[-1][0] for v in self.__tokdict.values() ]
406
407 - def __getattr__( self, name ):
408 if True: #name not in self.__slots__: 409 if name in self.__tokdict: 410 if name not in self.__accumNames: 411 return self.__tokdict[name][-1][0] 412 else: 413 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 414 else: 415 return "" 416 return None
417
418 - def __add__( self, other ):
419 ret = self.copy() 420 ret += other 421 return ret
422
423 - def __iadd__( self, other ):
424 if other.__tokdict: 425 offset = len(self.__toklist) 426 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 427 otheritems = other.__tokdict.items() 428 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 429 for (k,vlist) in otheritems for v in vlist] 430 for k,v in otherdictitems: 431 self[k] = v 432 if isinstance(v[0],ParseResults): 433 v[0].__parent = wkref(self) 434 435 self.__toklist += other.__toklist 436 self.__accumNames.update( other.__accumNames ) 437 return self
438
439 - def __radd__(self, other):
440 if isinstance(other,int) and other == 0: 441 return self.copy()
442
443 - def __repr__( self ):
444 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
445
446 - def __str__( self ):
447 out = "[" 448 sep = "" 449 for i in self.__toklist: 450 if isinstance(i, ParseResults): 451 out += sep + _ustr(i) 452 else: 453 out += sep + repr(i) 454 sep = ", " 455 out += "]" 456 return out
457
458 - def _asStringList( self, sep='' ):
459 out = [] 460 for item in self.__toklist: 461 if out and sep: 462 out.append(sep) 463 if isinstance( item, ParseResults ): 464 out += item._asStringList() 465 else: 466 out.append( _ustr(item) ) 467 return out
468
469 - def asList( self ):
470 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 471 out = [] 472 for res in self.__toklist: 473 if isinstance(res,ParseResults): 474 out.append( res.asList() ) 475 else: 476 out.append( res ) 477 return out
478
479 - def asDict( self ):
480 """Returns the named parse results as dictionary.""" 481 return dict( self.items() )
482
483 - def copy( self ):
484 """Returns a new copy of a C{ParseResults} object.""" 485 ret = ParseResults( self.__toklist ) 486 ret.__tokdict = self.__tokdict.copy() 487 ret.__parent = self.__parent 488 ret.__accumNames.update( self.__accumNames ) 489 ret.__name = self.__name 490 return ret
491
492 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
493 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 494 nl = "\n" 495 out = [] 496 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 497 for v in vlist ] ) 498 nextLevelIndent = indent + " " 499 500 # collapse out indents if formatting is not desired 501 if not formatted: 502 indent = "" 503 nextLevelIndent = "" 504 nl = "" 505 506 selfTag = None 507 if doctag is not None: 508 selfTag = doctag 509 else: 510 if self.__name: 511 selfTag = self.__name 512 513 if not selfTag: 514 if namedItemsOnly: 515 return "" 516 else: 517 selfTag = "ITEM" 518 519 out += [ nl, indent, "<", selfTag, ">" ] 520 521 worklist = self.__toklist 522 for i,res in enumerate(worklist): 523 if isinstance(res,ParseResults): 524 if i in namedItems: 525 out += [ res.asXML(namedItems[i], 526 namedItemsOnly and doctag is None, 527 nextLevelIndent, 528 formatted)] 529 else: 530 out += [ res.asXML(None, 531 namedItemsOnly and doctag is None, 532 nextLevelIndent, 533 formatted)] 534 else: 535 # individual token, see if there is a name for it 536 resTag = None 537 if i in namedItems: 538 resTag = namedItems[i] 539 if not resTag: 540 if namedItemsOnly: 541 continue 542 else: 543 resTag = "ITEM" 544 xmlBodyText = _xml_escape(_ustr(res)) 545 out += [ nl, nextLevelIndent, "<", resTag, ">", 546 xmlBodyText, 547 "</", resTag, ">" ] 548 549 out += [ nl, indent, "</", selfTag, ">" ] 550 return "".join(out)
551
552 - def __lookup(self,sub):
553 for k,vlist in self.__tokdict.items(): 554 for v,loc in vlist: 555 if sub is v: 556 return k 557 return None
558
559 - def getName(self):
560 """Returns the results name for this token expression.""" 561 if self.__name: 562 return self.__name 563 elif self.__parent: 564 par = self.__parent() 565 if par: 566 return par.__lookup(self) 567 else: 568 return None 569 elif (len(self) == 1 and 570 len(self.__tokdict) == 1 and 571 self.__tokdict.values()[0][0][1] in (0,-1)): 572 return self.__tokdict.keys()[0] 573 else: 574 return None
575
576 - def dump(self,indent='',depth=0):
577 """Diagnostic method for listing out the contents of a C{ParseResults}. 578 Accepts an optional C{indent} argument so that this string can be embedded 579 in a nested display of other data.""" 580 out = [] 581 out.append( indent+_ustr(self.asList()) ) 582 keys = self.items() 583 keys.sort() 584 for k,v in keys: 585 if out: 586 out.append('\n') 587 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 588 if isinstance(v,ParseResults): 589 if v.keys(): 590 out.append( v.dump(indent,depth+1) ) 591 else: 592 out.append(_ustr(v)) 593 else: 594 out.append(_ustr(v)) 595 return "".join(out)
596 597 # add support for pickle protocol
598 - def __getstate__(self):
599 return ( self.__toklist, 600 ( self.__tokdict.copy(), 601 self.__parent is not None and self.__parent() or None, 602 self.__accumNames, 603 self.__name ) )
604
605 - def __setstate__(self,state):
606 self.__toklist = state[0] 607 (self.__tokdict, 608 par, 609 inAccumNames, 610 self.__name) = state[1] 611 self.__accumNames = {} 612 self.__accumNames.update(inAccumNames) 613 if par is not None: 614 self.__parent = wkref(par) 615 else: 616 self.__parent = None
617
618 - def __dir__(self):
619 return dir(super(ParseResults,self)) + self.keys()
620
621 -def col (loc,strg):
622 """Returns current column within a string, counting newlines as line separators. 623 The first column is number 1. 624 625 Note: the default parsing behavior is to expand tabs in the input string 626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 627 on parsing strings containing <TAB>s, and suggested methods to maintain a 628 consistent view of the parsed string, the parse location, and line and column 629 positions within the parsed string. 630 """ 631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
632
633 -def lineno(loc,strg):
634 """Returns current line number within a string, counting newlines as line separators. 635 The first line is number 1. 636 637 Note: the default parsing behavior is to expand tabs in the input string 638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 639 on parsing strings containing <TAB>s, and suggested methods to maintain a 640 consistent view of the parsed string, the parse location, and line and column 641 positions within the parsed string. 642 """ 643 return strg.count("\n",0,loc) + 1
644
645 -def line( loc, strg ):
646 """Returns the line of text containing loc within a string, counting newlines as line separators. 647 """ 648 lastCR = strg.rfind("\n", 0, loc) 649 nextCR = strg.find("\n", loc) 650 if nextCR >= 0: 651 return strg[lastCR+1:nextCR] 652 else: 653 return strg[lastCR+1:]
654
655 -def _defaultStartDebugAction( instring, loc, expr ):
656 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
657
658 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
660
661 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
662 print ("Exception raised:" + _ustr(exc))
663
664 -def nullDebugAction(*args):
665 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 666 pass
667 668 'decorator to trim function calls to match the arity of the target' 669 if not _PY3K:
670 - def _trim_arity(func, maxargs=2):
671 limit = [0] 672 def wrapper(*args): 673 while 1: 674 try: 675 return func(*args[limit[0]:]) 676 except TypeError: 677 if limit[0] <= maxargs: 678 limit[0] += 1 679 continue 680 raise
681 return wrapper 682 else:
683 - def _trim_arity(func, maxargs=2):
684 limit = maxargs 685 def wrapper(*args): 686 #~ nonlocal limit 687 while 1: 688 try: 689 return func(*args[limit:]) 690 except TypeError: 691 if limit: 692 limit -= 1 693 continue 694 raise
695 return wrapper 696
697 -class ParserElement(object):
698 """Abstract base level parser element class.""" 699 DEFAULT_WHITE_CHARS = " \n\t\r" 700 verbose_stacktrace = False 701
702 - def setDefaultWhitespaceChars( chars ):
703 """Overrides the default whitespace chars 704 """ 705 ParserElement.DEFAULT_WHITE_CHARS = chars
706 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 707
708 - def __init__( self, savelist=False ):
709 self.parseAction = list() 710 self.failAction = None 711 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 712 self.strRepr = None 713 self.resultsName = None 714 self.saveAsList = savelist 715 self.skipWhitespace = True 716 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 717 self.copyDefaultWhiteChars = True 718 self.mayReturnEmpty = False # used when checking for left-recursion 719 self.keepTabs = False 720 self.ignoreExprs = list() 721 self.debug = False 722 self.streamlined = False 723 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 724 self.errmsg = "" 725 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 726 self.debugActions = ( None, None, None ) #custom debug actions 727 self.re = None 728 self.callPreparse = True # used to avoid redundant calls to preParse 729 self.callDuringTry = False
730
731 - def copy( self ):
732 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 733 for the same parsing pattern, using copies of the original parse element.""" 734 cpy = copy.copy( self ) 735 cpy.parseAction = self.parseAction[:] 736 cpy.ignoreExprs = self.ignoreExprs[:] 737 if self.copyDefaultWhiteChars: 738 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 739 return cpy
740
741 - def setName( self, name ):
742 """Define name for this expression, for use in debugging.""" 743 self.name = name 744 self.errmsg = "Expected " + self.name 745 if hasattr(self,"exception"): 746 self.exception.msg = self.errmsg 747 return self
748
749 - def setResultsName( self, name, listAllMatches=False ):
750 """Define name for referencing matching tokens as a nested attribute 751 of the returned parse results. 752 NOTE: this returns a *copy* of the original C{ParserElement} object; 753 this is so that the client can define a basic element, such as an 754 integer, and reference it in multiple places with different names. 755 756 You can also set results names using the abbreviated syntax, 757 C{expr("name")} in place of C{expr.setResultsName("name")} - 758 see L{I{__call__}<__call__>}. 759 """ 760 newself = self.copy() 761 if name.endswith("*"): 762 name = name[:-1] 763 listAllMatches=True 764 newself.resultsName = name 765 newself.modalResults = not listAllMatches 766 return newself
767
768 - def setBreak(self,breakFlag = True):
769 """Method to invoke the Python pdb debugger when this element is 770 about to be parsed. Set C{breakFlag} to True to enable, False to 771 disable. 772 """ 773 if breakFlag: 774 _parseMethod = self._parse 775 def breaker(instring, loc, doActions=True, callPreParse=True): 776 import pdb 777 pdb.set_trace() 778 return _parseMethod( instring, loc, doActions, callPreParse )
779 breaker._originalParseMethod = _parseMethod 780 self._parse = breaker 781 else: 782 if hasattr(self._parse,"_originalParseMethod"): 783 self._parse = self._parse._originalParseMethod 784 return self
785
786 - def setParseAction( self, *fns, **kwargs ):
787 """Define action to perform when successfully matching parse element definition. 788 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 789 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 790 - s = the original string being parsed (see note below) 791 - loc = the location of the matching substring 792 - toks = a list of the matched tokens, packaged as a ParseResults object 793 If the functions in fns modify the tokens, they can return them as the return 794 value from fn, and the modified list of tokens will replace the original. 795 Otherwise, fn does not need to return any value. 796 797 Note: the default parsing behavior is to expand tabs in the input string 798 before starting the parsing process. See L{I{parseString}<parseString>} for more information 799 on parsing strings containing <TAB>s, and suggested methods to maintain a 800 consistent view of the parsed string, the parse location, and line and column 801 positions within the parsed string. 802 """ 803 self.parseAction = list(map(_trim_arity, list(fns))) 804 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 805 return self
806
807 - def addParseAction( self, *fns, **kwargs ):
808 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 809 self.parseAction += list(map(_trim_arity, list(fns))) 810 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 811 return self
812
813 - def setFailAction( self, fn ):
814 """Define action to perform if parsing fails at this expression. 815 Fail acton fn is a callable function that takes the arguments 816 C{fn(s,loc,expr,err)} where: 817 - s = string being parsed 818 - loc = location where expression match was attempted and failed 819 - expr = the parse expression that failed 820 - err = the exception thrown 821 The function returns no value. It may throw C{ParseFatalException} 822 if it is desired to stop parsing immediately.""" 823 self.failAction = fn 824 return self
825
826 - def _skipIgnorables( self, instring, loc ):
827 exprsFound = True 828 while exprsFound: 829 exprsFound = False 830 for e in self.ignoreExprs: 831 try: 832 while 1: 833 loc,dummy = e._parse( instring, loc ) 834 exprsFound = True 835 except ParseException: 836 pass 837 return loc
838
839 - def preParse( self, instring, loc ):
840 if self.ignoreExprs: 841 loc = self._skipIgnorables( instring, loc ) 842 843 if self.skipWhitespace: 844 wt = self.whiteChars 845 instrlen = len(instring) 846 while loc < instrlen and instring[loc] in wt: 847 loc += 1 848 849 return loc
850
851 - def parseImpl( self, instring, loc, doActions=True ):
852 return loc, []
853
854 - def postParse( self, instring, loc, tokenlist ):
855 return tokenlist
856 857 #~ @profile
858 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
859 debugging = ( self.debug ) #and doActions ) 860 861 if debugging or self.failAction: 862 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 863 if (self.debugActions[0] ): 864 self.debugActions[0]( instring, loc, self ) 865 if callPreParse and self.callPreparse: 866 preloc = self.preParse( instring, loc ) 867 else: 868 preloc = loc 869 tokensStart = preloc 870 try: 871 try: 872 loc,tokens = self.parseImpl( instring, preloc, doActions ) 873 except IndexError: 874 raise ParseException( instring, len(instring), self.errmsg, self ) 875 except ParseBaseException: 876 #~ print ("Exception raised:", err) 877 err = None 878 if self.debugActions[2]: 879 err = sys.exc_info()[1] 880 self.debugActions[2]( instring, tokensStart, self, err ) 881 if self.failAction: 882 if err is None: 883 err = sys.exc_info()[1] 884 self.failAction( instring, tokensStart, self, err ) 885 raise 886 else: 887 if callPreParse and self.callPreparse: 888 preloc = self.preParse( instring, loc ) 889 else: 890 preloc = loc 891 tokensStart = preloc 892 if self.mayIndexError or loc >= len(instring): 893 try: 894 loc,tokens = self.parseImpl( instring, preloc, doActions ) 895 except IndexError: 896 raise ParseException( instring, len(instring), self.errmsg, self ) 897 else: 898 loc,tokens = self.parseImpl( instring, preloc, doActions ) 899 900 tokens = self.postParse( instring, loc, tokens ) 901 902 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 903 if self.parseAction and (doActions or self.callDuringTry): 904 if debugging: 905 try: 906 for fn in self.parseAction: 907 tokens = fn( instring, tokensStart, retTokens ) 908 if tokens is not None: 909 retTokens = ParseResults( tokens, 910 self.resultsName, 911 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 912 modal=self.modalResults ) 913 except ParseBaseException: 914 #~ print "Exception raised in user parse action:", err 915 if (self.debugActions[2] ): 916 err = sys.exc_info()[1] 917 self.debugActions[2]( instring, tokensStart, self, err ) 918 raise 919 else: 920 for fn in self.parseAction: 921 tokens = fn( instring, tokensStart, retTokens ) 922 if tokens is not None: 923 retTokens = ParseResults( tokens, 924 self.resultsName, 925 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 926 modal=self.modalResults ) 927 928 if debugging: 929 #~ print ("Matched",self,"->",retTokens.asList()) 930 if (self.debugActions[1] ): 931 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 932 933 return loc, retTokens
934
935 - def tryParse( self, instring, loc ):
936 try: 937 return self._parse( instring, loc, doActions=False )[0] 938 except ParseFatalException: 939 raise ParseException( instring, loc, self.errmsg, self)
940 941 # this method gets repeatedly called during backtracking with the same arguments - 942 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
943 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
944 lookup = (self,instring,loc,callPreParse,doActions) 945 if lookup in ParserElement._exprArgCache: 946 value = ParserElement._exprArgCache[ lookup ] 947 if isinstance(value, Exception): 948 raise value 949 return (value[0],value[1].copy()) 950 else: 951 try: 952 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 953 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 954 return value 955 except ParseBaseException: 956 pe = sys.exc_info()[1] 957 ParserElement._exprArgCache[ lookup ] = pe 958 raise
959 960 _parse = _parseNoCache 961 962 # argument cache for optimizing repeated calls when backtracking through recursive expressions 963 _exprArgCache = {}
964 - def resetCache():
965 ParserElement._exprArgCache.clear()
966 resetCache = staticmethod(resetCache) 967 968 _packratEnabled = False
969 - def enablePackrat():
970 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 971 Repeated parse attempts at the same string location (which happens 972 often in many complex grammars) can immediately return a cached value, 973 instead of re-executing parsing/validating code. Memoizing is done of 974 both valid results and parsing exceptions. 975 976 This speedup may break existing programs that use parse actions that 977 have side-effects. For this reason, packrat parsing is disabled when 978 you first import pyparsing. To activate the packrat feature, your 979 program must call the class method C{ParserElement.enablePackrat()}. If 980 your program uses C{psyco} to "compile as you go", you must call 981 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 982 Python will crash. For best results, call C{enablePackrat()} immediately 983 after importing pyparsing. 984 """ 985 if not ParserElement._packratEnabled: 986 ParserElement._packratEnabled = True 987 ParserElement._parse = ParserElement._parseCache
988 enablePackrat = staticmethod(enablePackrat) 989
990 - def parseString( self, instring, parseAll=False ):
991 """Execute the parse expression with the given string. 992 This is the main interface to the client code, once the complete 993 expression has been built. 994 995 If you want the grammar to require that the entire input string be 996 successfully parsed, then set C{parseAll} to True (equivalent to ending 997 the grammar with C{StringEnd()}). 998 999 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1000 in order to report proper column numbers in parse actions. 1001 If the input string contains tabs and 1002 the grammar uses parse actions that use the C{loc} argument to index into the 1003 string being parsed, you can ensure you have a consistent view of the input 1004 string by: 1005 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1006 (see L{I{parseWithTabs}<parseWithTabs>}) 1007 - define your parse action using the full C{(s,loc,toks)} signature, and 1008 reference the input string using the parse action's C{s} argument 1009 - explictly expand the tabs in your input string before calling 1010 C{parseString} 1011 """ 1012 ParserElement.resetCache() 1013 if not self.streamlined: 1014 self.streamline() 1015 #~ self.saveAsList = True 1016 for e in self.ignoreExprs: 1017 e.streamline() 1018 if not self.keepTabs: 1019 instring = instring.expandtabs() 1020 try: 1021 loc, tokens = self._parse( instring, 0 ) 1022 if parseAll: 1023 loc = self.preParse( instring, loc ) 1024 se = Empty() + StringEnd() 1025 se._parse( instring, loc ) 1026 except ParseBaseException: 1027 if ParserElement.verbose_stacktrace: 1028 raise 1029 else: 1030 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1031 exc = sys.exc_info()[1] 1032 raise exc 1033 else: 1034 return tokens
1035
1036 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1037 """Scan the input string for expression matches. Each match will return the 1038 matching tokens, start location, and end location. May be called with optional 1039 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1040 C{overlap} is specified, then overlapping matches will be reported. 1041 1042 Note that the start and end locations are reported relative to the string 1043 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1044 strings with embedded tabs.""" 1045 if not self.streamlined: 1046 self.streamline() 1047 for e in self.ignoreExprs: 1048 e.streamline() 1049 1050 if not self.keepTabs: 1051 instring = _ustr(instring).expandtabs() 1052 instrlen = len(instring) 1053 loc = 0 1054 preparseFn = self.preParse 1055 parseFn = self._parse 1056 ParserElement.resetCache() 1057 matches = 0 1058 try: 1059 while loc <= instrlen and matches < maxMatches: 1060 try: 1061 preloc = preparseFn( instring, loc ) 1062 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1063 except ParseException: 1064 loc = preloc+1 1065 else: 1066 if nextLoc > loc: 1067 matches += 1 1068 yield tokens, preloc, nextLoc 1069 if overlap: 1070 nextloc = preparseFn( instring, loc ) 1071 if nextloc > loc: 1072 loc = nextLoc 1073 else: 1074 loc += 1 1075 else: 1076 loc = nextLoc 1077 else: 1078 loc = preloc+1 1079 except ParseBaseException: 1080 if ParserElement.verbose_stacktrace: 1081 raise 1082 else: 1083 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1084 exc = sys.exc_info()[1] 1085 raise exc
1086
1087 - def transformString( self, instring ):
1088 """Extension to C{scanString}, to modify matching text with modified tokens that may 1089 be returned from a parse action. To use C{transformString}, define a grammar and 1090 attach a parse action to it that modifies the returned token list. 1091 Invoking C{transformString()} on a target string will then scan for matches, 1092 and replace the matched text patterns according to the logic in the parse 1093 action. C{transformString()} returns the resulting transformed string.""" 1094 out = [] 1095 lastE = 0 1096 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1097 # keep string locs straight between transformString and scanString 1098 self.keepTabs = True 1099 try: 1100 for t,s,e in self.scanString( instring ): 1101 out.append( instring[lastE:s] ) 1102 if t: 1103 if isinstance(t,ParseResults): 1104 out += t.asList() 1105 elif isinstance(t,list): 1106 out += t 1107 else: 1108 out.append(t) 1109 lastE = e 1110 out.append(instring[lastE:]) 1111 out = [o for o in out if o] 1112 return "".join(map(_ustr,_flatten(out))) 1113 except ParseBaseException: 1114 if ParserElement.verbose_stacktrace: 1115 raise 1116 else: 1117 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1118 exc = sys.exc_info()[1] 1119 raise exc
1120
1121 - def searchString( self, instring, maxMatches=_MAX_INT ):
1122 """Another extension to C{scanString}, simplifying the access to the tokens found 1123 to match the given parse expression. May be called with optional 1124 C{maxMatches} argument, to clip searching after 'n' matches are found. 1125 """ 1126 try: 1127 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1128 except ParseBaseException: 1129 if ParserElement.verbose_stacktrace: 1130 raise 1131 else: 1132 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1133 exc = sys.exc_info()[1] 1134 raise exc
1135
1136 - def __add__(self, other ):
1137 """Implementation of + operator - returns And""" 1138 if isinstance( other, basestring ): 1139 other = Literal( other ) 1140 if not isinstance( other, ParserElement ): 1141 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1142 SyntaxWarning, stacklevel=2) 1143 return None 1144 return And( [ self, other ] )
1145
1146 - def __radd__(self, other ):
1147 """Implementation of + operator when left operand is not a C{ParserElement}""" 1148 if isinstance( other, basestring ): 1149 other = Literal( other ) 1150 if not isinstance( other, ParserElement ): 1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1152 SyntaxWarning, stacklevel=2) 1153 return None 1154 return other + self
1155
1156 - def __sub__(self, other):
1157 """Implementation of - operator, returns C{And} with error stop""" 1158 if isinstance( other, basestring ): 1159 other = Literal( other ) 1160 if not isinstance( other, ParserElement ): 1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1162 SyntaxWarning, stacklevel=2) 1163 return None 1164 return And( [ self, And._ErrorStop(), other ] )
1165
1166 - def __rsub__(self, other ):
1167 """Implementation of - operator when left operand is not a C{ParserElement}""" 1168 if isinstance( other, basestring ): 1169 other = Literal( other ) 1170 if not isinstance( other, ParserElement ): 1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1172 SyntaxWarning, stacklevel=2) 1173 return None 1174 return other - self
1175
1176 - def __mul__(self,other):
1177 """Implementation of * operator, allows use of C{expr * 3} in place of 1178 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1179 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1180 may also include C{None} as in: 1181 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1182 to C{expr*n + ZeroOrMore(expr)} 1183 (read as "at least n instances of C{expr}") 1184 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1185 (read as "0 to n instances of C{expr}") 1186 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} 1187 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} 1188 1189 Note that C{expr*(None,n)} does not raise an exception if 1190 more than n exprs exist in the input stream; that is, 1191 C{expr*(None,n)} does not enforce a maximum number of expr 1192 occurrences. If this behavior is desired, then write 1193 C{expr*(None,n) + ~expr} 1194 1195 """ 1196 if isinstance(other,int): 1197 minElements, optElements = other,0 1198 elif isinstance(other,tuple): 1199 other = (other + (None, None))[:2] 1200 if other[0] is None: 1201 other = (0, other[1]) 1202 if isinstance(other[0],int) and other[1] is None: 1203 if other[0] == 0: 1204 return ZeroOrMore(self) 1205 if other[0] == 1: 1206 return OneOrMore(self) 1207 else: 1208 return self*other[0] + ZeroOrMore(self) 1209 elif isinstance(other[0],int) and isinstance(other[1],int): 1210 minElements, optElements = other 1211 optElements -= minElements 1212 else: 1213 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1214 else: 1215 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1216 1217 if minElements < 0: 1218 raise ValueError("cannot multiply ParserElement by negative value") 1219 if optElements < 0: 1220 raise ValueError("second tuple value must be greater or equal to first tuple value") 1221 if minElements == optElements == 0: 1222 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1223 1224 if (optElements): 1225 def makeOptionalList(n): 1226 if n>1: 1227 return Optional(self + makeOptionalList(n-1)) 1228 else: 1229 return Optional(self)
1230 if minElements: 1231 if minElements == 1: 1232 ret = self + makeOptionalList(optElements) 1233 else: 1234 ret = And([self]*minElements) + makeOptionalList(optElements) 1235 else: 1236 ret = makeOptionalList(optElements) 1237 else: 1238 if minElements == 1: 1239 ret = self 1240 else: 1241 ret = And([self]*minElements) 1242 return ret 1243
1244 - def __rmul__(self, other):
1245 return self.__mul__(other)
1246
1247 - def __or__(self, other ):
1248 """Implementation of | operator - returns C{MatchFirst}""" 1249 if isinstance( other, basestring ): 1250 other = Literal( other ) 1251 if not isinstance( other, ParserElement ): 1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1253 SyntaxWarning, stacklevel=2) 1254 return None 1255 return MatchFirst( [ self, other ] )
1256
1257 - def __ror__(self, other ):
1258 """Implementation of | operator when left operand is not a C{ParserElement}""" 1259 if isinstance( other, basestring ): 1260 other = Literal( other ) 1261 if not isinstance( other, ParserElement ): 1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1263 SyntaxWarning, stacklevel=2) 1264 return None 1265 return other | self
1266
1267 - def __xor__(self, other ):
1268 """Implementation of ^ operator - returns C{Or}""" 1269 if isinstance( other, basestring ): 1270 other = Literal( other ) 1271 if not isinstance( other, ParserElement ): 1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1273 SyntaxWarning, stacklevel=2) 1274 return None 1275 return Or( [ self, other ] )
1276
1277 - def __rxor__(self, other ):
1278 """Implementation of ^ operator when left operand is not a C{ParserElement}""" 1279 if isinstance( other, basestring ): 1280 other = Literal( other ) 1281 if not isinstance( other, ParserElement ): 1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1283 SyntaxWarning, stacklevel=2) 1284 return None 1285 return other ^ self
1286
1287 - def __and__(self, other ):
1288 """Implementation of & operator - returns C{Each}""" 1289 if isinstance( other, basestring ): 1290 other = Literal( other ) 1291 if not isinstance( other, ParserElement ): 1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1293 SyntaxWarning, stacklevel=2) 1294 return None 1295 return Each( [ self, other ] )
1296
1297 - def __rand__(self, other ):
1298 """Implementation of & operator when left operand is not a C{ParserElement}""" 1299 if isinstance( other, basestring ): 1300 other = Literal( other ) 1301 if not isinstance( other, ParserElement ): 1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1303 SyntaxWarning, stacklevel=2) 1304 return None 1305 return other & self
1306
1307 - def __invert__( self ):
1308 """Implementation of ~ operator - returns C{NotAny}""" 1309 return NotAny( self )
1310
1311 - def __call__(self, name):
1312 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: 1313 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1314 could be written as:: 1315 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1316 1317 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1318 passed as C{True}. 1319 """ 1320 return self.setResultsName(name)
1321
1322 - def suppress( self ):
1323 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1324 cluttering up returned output. 1325 """ 1326 return Suppress( self )
1327
1328 - def leaveWhitespace( self ):
1329 """Disables the skipping of whitespace before matching the characters in the 1330 C{ParserElement}'s defined pattern. This is normally only used internally by 1331 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1332 """ 1333 self.skipWhitespace = False 1334 return self
1335
1336 - def setWhitespaceChars( self, chars ):
1337 """Overrides the default whitespace chars 1338 """ 1339 self.skipWhitespace = True 1340 self.whiteChars = chars 1341 self.copyDefaultWhiteChars = False 1342 return self
1343
1344 - def parseWithTabs( self ):
1345 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1346 Must be called before C{parseString} when the input grammar contains elements that 1347 match C{<TAB>} characters.""" 1348 self.keepTabs = True 1349 return self
1350
1351 - def ignore( self, other ):
1352 """Define expression to be ignored (e.g., comments) while doing pattern 1353 matching; may be called repeatedly, to define multiple comment or other 1354 ignorable patterns. 1355 """ 1356 if isinstance( other, Suppress ): 1357 if other not in self.ignoreExprs: 1358 self.ignoreExprs.append( other.copy() ) 1359 else: 1360 self.ignoreExprs.append( Suppress( other.copy() ) ) 1361 return self
1362
1363 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1364 """Enable display of debugging messages while doing pattern matching.""" 1365 self.debugActions = (startAction or _defaultStartDebugAction, 1366 successAction or _defaultSuccessDebugAction, 1367 exceptionAction or _defaultExceptionDebugAction) 1368 self.debug = True 1369 return self
1370
1371 - def setDebug( self, flag=True ):
1372 """Enable display of debugging messages while doing pattern matching. 1373 Set C{flag} to True to enable, False to disable.""" 1374 if flag: 1375 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1376 else: 1377 self.debug = False 1378 return self
1379
1380 - def __str__( self ):
1381 return self.name
1382
1383 - def __repr__( self ):
1384 return _ustr(self)
1385
1386 - def streamline( self ):
1387 self.streamlined = True 1388 self.strRepr = None 1389 return self
1390
1391 - def checkRecursion( self, parseElementList ):
1392 pass
1393
1394 - def validate( self, validateTrace=[] ):
1395 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1396 self.checkRecursion( [] )
1397
1398 - def parseFile( self, file_or_filename, parseAll=False ):
1399 """Execute the parse expression on the given file or filename. 1400 If a filename is specified (instead of a file object), 1401 the entire file is opened, read, and closed before parsing. 1402 """ 1403 try: 1404 file_contents = file_or_filename.read() 1405 except AttributeError: 1406 f = open(file_or_filename, "rb") 1407 file_contents = f.read() 1408 f.close() 1409 try: 1410 return self.parseString(file_contents, parseAll) 1411 except ParseBaseException: 1412 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1413 exc = sys.exc_info()[1] 1414 raise exc
1415
1416 - def getException(self):
1417 return ParseException("",0,self.errmsg,self)
1418
1419 - def __getattr__(self,aname):
1420 if aname == "myException": 1421 self.myException = ret = self.getException(); 1422 return ret; 1423 else: 1424 raise AttributeError("no such attribute " + aname)
1425
1426 - def __eq__(self,other):
1427 if isinstance(other, ParserElement): 1428 return self is other or self.__dict__ == other.__dict__ 1429 elif isinstance(other, basestring): 1430 try: 1431 self.parseString(_ustr(other), parseAll=True) 1432 return True 1433 except ParseBaseException: 1434 return False 1435 else: 1436 return super(ParserElement,self)==other
1437
1438 - def __ne__(self,other):
1439 return not (self == other)
1440
1441 - def __hash__(self):
1442 return hash(id(self))
1443
1444 - def __req__(self,other):
1445 return self == other
1446
1447 - def __rne__(self,other):
1448 return not (self == other)
1449 1450
1451 -class Token(ParserElement):
1452 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1453 - def __init__( self ):
1454 super(Token,self).__init__( savelist=False )
1455
1456 - def setName(self, name):
1457 s = super(Token,self).setName(name) 1458 self.errmsg = "Expected " + self.name 1459 return s
1460 1461
1462 -class Empty(Token):
1463 """An empty token, will always match."""
1464 - def __init__( self ):
1465 super(Empty,self).__init__() 1466 self.name = "Empty" 1467 self.mayReturnEmpty = True 1468 self.mayIndexError = False
1469 1470
1471 -class NoMatch(Token):
1472 """A token that will never match."""
1473 - def __init__( self ):
1474 super(NoMatch,self).__init__() 1475 self.name = "NoMatch" 1476 self.mayReturnEmpty = True 1477 self.mayIndexError = False 1478 self.errmsg = "Unmatchable token"
1479
1480 - def parseImpl( self, instring, loc, doActions=True ):
1481 exc = self.myException 1482 exc.loc = loc 1483 exc.pstr = instring 1484 raise exc
1485 1486
1487 -class Literal(Token):
1488 """Token to exactly match a specified string."""
1489 - def __init__( self, matchString ):
1490 super(Literal,self).__init__() 1491 self.match = matchString 1492 self.matchLen = len(matchString) 1493 try: 1494 self.firstMatchChar = matchString[0] 1495 except IndexError: 1496 warnings.warn("null string passed to Literal; use Empty() instead", 1497 SyntaxWarning, stacklevel=2) 1498 self.__class__ = Empty 1499 self.name = '"%s"' % _ustr(self.match) 1500 self.errmsg = "Expected " + self.name 1501 self.mayReturnEmpty = False 1502 self.mayIndexError = False
1503 1504 # Performance tuning: this routine gets called a *lot* 1505 # if this is a single character match string and the first character matches, 1506 # short-circuit as quickly as possible, and avoid calling startswith 1507 #~ @profile
1508 - def parseImpl( self, instring, loc, doActions=True ):
1509 if (instring[loc] == self.firstMatchChar and 1510 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1511 return loc+self.matchLen, self.match 1512 #~ raise ParseException( instring, loc, self.errmsg ) 1513 exc = self.myException 1514 exc.loc = loc 1515 exc.pstr = instring 1516 raise exc
1517 _L = Literal 1518
1519 -class Keyword(Token):
1520 """Token to exactly match a specified string as a keyword, that is, it must be 1521 immediately followed by a non-keyword character. Compare with C{Literal}:: 1522 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1523 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1524 Accepts two optional constructor arguments in addition to the keyword string: 1525 C{identChars} is a string of characters that would be valid identifier characters, 1526 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1527 matching, default is C{False}. 1528 """ 1529 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1530
1531 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1532 super(Keyword,self).__init__() 1533 self.match = matchString 1534 self.matchLen = len(matchString) 1535 try: 1536 self.firstMatchChar = matchString[0] 1537 except IndexError: 1538 warnings.warn("null string passed to Keyword; use Empty() instead", 1539 SyntaxWarning, stacklevel=2) 1540 self.name = '"%s"' % self.match 1541 self.errmsg = "Expected " + self.name 1542 self.mayReturnEmpty = False 1543 self.mayIndexError = False 1544 self.caseless = caseless 1545 if caseless: 1546 self.caselessmatch = matchString.upper() 1547 identChars = identChars.upper() 1548 self.identChars = set(identChars)
1549
1550 - def parseImpl( self, instring, loc, doActions=True ):
1551 if self.caseless: 1552 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1553 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1554 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1555 return loc+self.matchLen, self.match 1556 else: 1557 if (instring[loc] == self.firstMatchChar and 1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1560 (loc == 0 or instring[loc-1] not in self.identChars) ): 1561 return loc+self.matchLen, self.match 1562 #~ raise ParseException( instring, loc, self.errmsg ) 1563 exc = self.myException 1564 exc.loc = loc 1565 exc.pstr = instring 1566 raise exc
1567
1568 - def copy(self):
1569 c = super(Keyword,self).copy() 1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1571 return c
1572
1573 - def setDefaultKeywordChars( chars ):
1574 """Overrides the default Keyword chars 1575 """ 1576 Keyword.DEFAULT_KEYWORD_CHARS = chars
1577 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1578
1579 -class CaselessLiteral(Literal):
1580 """Token to match a specified string, ignoring case of letters. 1581 Note: the matched results will always be in the case of the given 1582 match string, NOT the case of the input text. 1583 """
1584 - def __init__( self, matchString ):
1585 super(CaselessLiteral,self).__init__( matchString.upper() ) 1586 # Preserve the defining literal. 1587 self.returnString = matchString 1588 self.name = "'%s'" % self.returnString 1589 self.errmsg = "Expected " + self.name
1590
1591 - def parseImpl( self, instring, loc, doActions=True ):
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1593 return loc+self.matchLen, self.returnString 1594 #~ raise ParseException( instring, loc, self.errmsg ) 1595 exc = self.myException 1596 exc.loc = loc 1597 exc.pstr = instring 1598 raise exc
1599
1600 -class CaselessKeyword(Keyword):
1601 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603
1604 - def parseImpl( self, instring, loc, doActions=True ):
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1607 return loc+self.matchLen, self.match 1608 #~ raise ParseException( instring, loc, self.errmsg ) 1609 exc = self.myException 1610 exc.loc = loc 1611 exc.pstr = instring 1612 raise exc
1613
1614 -class Word(Token):
1615 """Token for matching words composed of allowed character sets. 1616 Defined with string containing all allowed initial characters, 1617 an optional string containing allowed body characters (if omitted, 1618 defaults to the initial character set), and an optional minimum, 1619 maximum, and/or exact length. The default value for C{min} is 1 (a 1620 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1621 are 0, meaning no maximum or exact length restriction. An optional 1622 C{exclude} parameter can list characters that might be found in 1623 the input C{bodyChars} string; useful to define a word of all printables 1624 except for one or two characters, for instance. 1625 """
1626 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1627 super(Word,self).__init__() 1628 if excludeChars: 1629 initChars = ''.join([c for c in initChars if c not in excludeChars]) 1630 if bodyChars: 1631 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) 1632 self.initCharsOrig = initChars 1633 self.initChars = set(initChars) 1634 if bodyChars : 1635 self.bodyCharsOrig = bodyChars 1636 self.bodyChars = set(bodyChars) 1637 else: 1638 self.bodyCharsOrig = initChars 1639 self.bodyChars = set(initChars) 1640 1641 self.maxSpecified = max > 0 1642 1643 if min < 1: 1644 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1645 1646 self.minLen = min 1647 1648 if max > 0: 1649 self.maxLen = max 1650 else: 1651 self.maxLen = _MAX_INT 1652 1653 if exact > 0: 1654 self.maxLen = exact 1655 self.minLen = exact 1656 1657 self.name = _ustr(self) 1658 self.errmsg = "Expected " + self.name 1659 self.mayIndexError = False 1660 self.asKeyword = asKeyword 1661 1662 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1663 if self.bodyCharsOrig == self.initCharsOrig: 1664 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1665 elif len(self.bodyCharsOrig) == 1: 1666 self.reString = "%s[%s]*" % \ 1667 (re.escape(self.initCharsOrig), 1668 _escapeRegexRangeChars(self.bodyCharsOrig),) 1669 else: 1670 self.reString = "[%s][%s]*" % \ 1671 (_escapeRegexRangeChars(self.initCharsOrig), 1672 _escapeRegexRangeChars(self.bodyCharsOrig),) 1673 if self.asKeyword: 1674 self.reString = r"\b"+self.reString+r"\b" 1675 try: 1676 self.re = re.compile( self.reString ) 1677 except: 1678 self.re = None
1679
1680 - def parseImpl( self, instring, loc, doActions=True ):
1681 if self.re: 1682 result = self.re.match(instring,loc) 1683 if not result: 1684 exc = self.myException 1685 exc.loc = loc 1686 exc.pstr = instring 1687 raise exc 1688 1689 loc = result.end() 1690 return loc, result.group() 1691 1692 if not(instring[ loc ] in self.initChars): 1693 #~ raise ParseException( instring, loc, self.errmsg ) 1694 exc = self.myException 1695 exc.loc = loc 1696 exc.pstr = instring 1697 raise exc 1698 start = loc 1699 loc += 1 1700 instrlen = len(instring) 1701 bodychars = self.bodyChars 1702 maxloc = start + self.maxLen 1703 maxloc = min( maxloc, instrlen ) 1704 while loc < maxloc and instring[loc] in bodychars: 1705 loc += 1 1706 1707 throwException = False 1708 if loc - start < self.minLen: 1709 throwException = True 1710 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1711 throwException = True 1712 if self.asKeyword: 1713 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1714 throwException = True 1715 1716 if throwException: 1717 #~ raise ParseException( instring, loc, self.errmsg ) 1718 exc = self.myException 1719 exc.loc = loc 1720 exc.pstr = instring 1721 raise exc 1722 1723 return loc, instring[start:loc]
1724
1725 - def __str__( self ):
1726 try: 1727 return super(Word,self).__str__() 1728 except: 1729 pass 1730 1731 1732 if self.strRepr is None: 1733 1734 def charsAsStr(s): 1735 if len(s)>4: 1736 return s[:4]+"..." 1737 else: 1738 return s
1739 1740 if ( self.initCharsOrig != self.bodyCharsOrig ): 1741 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1742 else: 1743 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1744 1745 return self.strRepr
1746 1747
1748 -class Regex(Token):
1749 """Token for matching strings that match a given regular expression. 1750 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1751 """ 1752 compiledREtype = type(re.compile("[A-Z]"))
1753 - def __init__( self, pattern, flags=0):
1754 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1755 super(Regex,self).__init__() 1756 1757 if isinstance(pattern, basestring): 1758 if len(pattern) == 0: 1759 warnings.warn("null string passed to Regex; use Empty() instead", 1760 SyntaxWarning, stacklevel=2) 1761 1762 self.pattern = pattern 1763 self.flags = flags 1764 1765 try: 1766 self.re = re.compile(self.pattern, self.flags) 1767 self.reString = self.pattern 1768 except sre_constants.error: 1769 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1770 SyntaxWarning, stacklevel=2) 1771 raise 1772 1773 elif isinstance(pattern, Regex.compiledREtype): 1774 self.re = pattern 1775 self.pattern = \ 1776 self.reString = str(pattern) 1777 self.flags = flags 1778 1779 else: 1780 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1781 1782 self.name = _ustr(self) 1783 self.errmsg = "Expected " + self.name 1784 self.mayIndexError = False 1785 self.mayReturnEmpty = True
1786
1787 - def parseImpl( self, instring, loc, doActions=True ):
1788 result = self.re.match(instring,loc) 1789 if not result: 1790 exc = self.myException 1791 exc.loc = loc 1792 exc.pstr = instring 1793 raise exc 1794 1795 loc = result.end() 1796 d = result.groupdict() 1797 ret = ParseResults(result.group()) 1798 if d: 1799 for k in d: 1800 ret[k] = d[k] 1801 return loc,ret
1802
1803 - def __str__( self ):
1804 try: 1805 return super(Regex,self).__str__() 1806 except: 1807 pass 1808 1809 if self.strRepr is None: 1810 self.strRepr = "Re:(%s)" % repr(self.pattern) 1811 1812 return self.strRepr
1813 1814
1815 -class QuotedString(Token):
1816 """Token for matching strings that are delimited by quoting characters. 1817 """
1818 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1819 """ 1820 Defined with the following parameters: 1821 - quoteChar - string of one or more characters defining the quote delimiting string 1822 - escChar - character to escape quotes, typically backslash (default=None) 1823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1824 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1827 """ 1828 super(QuotedString,self).__init__() 1829 1830 # remove white space from quote chars - wont work anyway 1831 quoteChar = quoteChar.strip() 1832 if len(quoteChar) == 0: 1833 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1834 raise SyntaxError() 1835 1836 if endQuoteChar is None: 1837 endQuoteChar = quoteChar 1838 else: 1839 endQuoteChar = endQuoteChar.strip() 1840 if len(endQuoteChar) == 0: 1841 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1842 raise SyntaxError() 1843 1844 self.quoteChar = quoteChar 1845 self.quoteCharLen = len(quoteChar) 1846 self.firstQuoteChar = quoteChar[0] 1847 self.endQuoteChar = endQuoteChar 1848 self.endQuoteCharLen = len(endQuoteChar) 1849 self.escChar = escChar 1850 self.escQuote = escQuote 1851 self.unquoteResults = unquoteResults 1852 1853 if multiline: 1854 self.flags = re.MULTILINE | re.DOTALL 1855 self.pattern = r'%s(?:[^%s%s]' % \ 1856 ( re.escape(self.quoteChar), 1857 _escapeRegexRangeChars(self.endQuoteChar[0]), 1858 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1859 else: 1860 self.flags = 0 1861 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1862 ( re.escape(self.quoteChar), 1863 _escapeRegexRangeChars(self.endQuoteChar[0]), 1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1865 if len(self.endQuoteChar) > 1: 1866 self.pattern += ( 1867 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1868 _escapeRegexRangeChars(self.endQuoteChar[i])) 1869 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1870 ) 1871 if escQuote: 1872 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1873 if escChar: 1874 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1875 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 1876 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 1877 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1878 1879 try: 1880 self.re = re.compile(self.pattern, self.flags) 1881 self.reString = self.pattern 1882 except sre_constants.error: 1883 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1884 SyntaxWarning, stacklevel=2) 1885 raise 1886 1887 self.name = _ustr(self) 1888 self.errmsg = "Expected " + self.name 1889 self.mayIndexError = False 1890 self.mayReturnEmpty = True
1891
1892 - def parseImpl( self, instring, loc, doActions=True ):
1893 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1894 if not result: 1895 exc = self.myException 1896 exc.loc = loc 1897 exc.pstr = instring 1898 raise exc 1899 1900 loc = result.end() 1901 ret = result.group() 1902 1903 if self.unquoteResults: 1904 1905 # strip off quotes 1906 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1907 1908 if isinstance(ret,basestring): 1909 # replace escaped characters 1910 if self.escChar: 1911 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1912 1913 # replace escaped quotes 1914 if self.escQuote: 1915 ret = ret.replace(self.escQuote, self.endQuoteChar) 1916 1917 return loc, ret
1918
1919 - def __str__( self ):
1920 try: 1921 return super(QuotedString,self).__str__() 1922 except: 1923 pass 1924 1925 if self.strRepr is None: 1926 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1927 1928 return self.strRepr
1929 1930
1931 -class CharsNotIn(Token):
1932 """Token for matching words composed of characters *not* in a given set. 1933 Defined with string containing all disallowed characters, and an optional 1934 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1935 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1936 are 0, meaning no maximum or exact length restriction. 1937 """
1938 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1939 super(CharsNotIn,self).__init__() 1940 self.skipWhitespace = False 1941 self.notChars = notChars 1942 1943 if min < 1: 1944 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1945 1946 self.minLen = min 1947 1948 if max > 0: 1949 self.maxLen = max 1950 else: 1951 self.maxLen = _MAX_INT 1952 1953 if exact > 0: 1954 self.maxLen = exact 1955 self.minLen = exact 1956 1957 self.name = _ustr(self) 1958 self.errmsg = "Expected " + self.name 1959 self.mayReturnEmpty = ( self.minLen == 0 ) 1960 self.mayIndexError = False
1961
1962 - def parseImpl( self, instring, loc, doActions=True ):
1963 if instring[loc] in self.notChars: 1964 #~ raise ParseException( instring, loc, self.errmsg ) 1965 exc = self.myException 1966 exc.loc = loc 1967 exc.pstr = instring 1968 raise exc 1969 1970 start = loc 1971 loc += 1 1972 notchars = self.notChars 1973 maxlen = min( start+self.maxLen, len(instring) ) 1974 while loc < maxlen and \ 1975 (instring[loc] not in notchars): 1976 loc += 1 1977 1978 if loc - start < self.minLen: 1979 #~ raise ParseException( instring, loc, self.errmsg ) 1980 exc = self.myException 1981 exc.loc = loc 1982 exc.pstr = instring 1983 raise exc 1984 1985 return loc, instring[start:loc]
1986
1987 - def __str__( self ):
1988 try: 1989 return super(CharsNotIn, self).__str__() 1990 except: 1991 pass 1992 1993 if self.strRepr is None: 1994 if len(self.notChars) > 4: 1995 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1996 else: 1997 self.strRepr = "!W:(%s)" % self.notChars 1998 1999 return self.strRepr
2000
2001 -class White(Token):
2002 """Special matching class for matching whitespace. Normally, whitespace is ignored 2003 by pyparsing grammars. This class is included when some whitespace structures 2004 are significant. Define with a string containing the whitespace characters to be 2005 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2006 as defined for the C{Word} class.""" 2007 whiteStrs = { 2008 " " : "<SPC>", 2009 "\t": "<TAB>", 2010 "\n": "<LF>", 2011 "\r": "<CR>", 2012 "\f": "<FF>", 2013 }
2014 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2015 super(White,self).__init__() 2016 self.matchWhite = ws 2017 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2018 #~ self.leaveWhitespace() 2019 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2020 self.mayReturnEmpty = True 2021 self.errmsg = "Expected " + self.name 2022 2023 self.minLen = min 2024 2025 if max > 0: 2026 self.maxLen = max 2027 else: 2028 self.maxLen = _MAX_INT 2029 2030 if exact > 0: 2031 self.maxLen = exact 2032 self.minLen = exact
2033
2034 - def parseImpl( self, instring, loc, doActions=True ):
2035 if not(instring[ loc ] in self.matchWhite): 2036 #~ raise ParseException( instring, loc, self.errmsg ) 2037 exc = self.myException 2038 exc.loc = loc 2039 exc.pstr = instring 2040 raise exc 2041 start = loc 2042 loc += 1 2043 maxloc = start + self.maxLen 2044 maxloc = min( maxloc, len(instring) ) 2045 while loc < maxloc and instring[loc] in self.matchWhite: 2046 loc += 1 2047 2048 if loc - start < self.minLen: 2049 #~ raise ParseException( instring, loc, self.errmsg ) 2050 exc = self.myException 2051 exc.loc = loc 2052 exc.pstr = instring 2053 raise exc 2054 2055 return loc, instring[start:loc]
2056 2057
2058 -class _PositionToken(Token):
2059 - def __init__( self ):
2060 super(_PositionToken,self).__init__() 2061 self.name=self.__class__.__name__ 2062 self.mayReturnEmpty = True 2063 self.mayIndexError = False
2064
2065 -class GoToColumn(_PositionToken):
2066 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2067 - def __init__( self, colno ):
2068 super(GoToColumn,self).__init__() 2069 self.col = colno
2070
2071 - def preParse( self, instring, loc ):
2072 if col(loc,instring) != self.col: 2073 instrlen = len(instring) 2074 if self.ignoreExprs: 2075 loc = self._skipIgnorables( instring, loc ) 2076 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2077 loc += 1 2078 return loc
2079
2080 - def parseImpl( self, instring, loc, doActions=True ):
2081 thiscol = col( loc, instring ) 2082 if thiscol > self.col: 2083 raise ParseException( instring, loc, "Text not in expected column", self ) 2084 newloc = loc + self.col - thiscol 2085 ret = instring[ loc: newloc ] 2086 return newloc, ret
2087
2088 -class LineStart(_PositionToken):
2089 """Matches if current position is at the beginning of a line within the parse string"""
2090 - def __init__( self ):
2091 super(LineStart,self).__init__() 2092 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2093 self.errmsg = "Expected start of line"
2094
2095 - def preParse( self, instring, loc ):
2096 preloc = super(LineStart,self).preParse(instring,loc) 2097 if instring[preloc] == "\n": 2098 loc += 1 2099 return loc
2100
2101 - def parseImpl( self, instring, loc, doActions=True ):
2102 if not( loc==0 or 2103 (loc == self.preParse( instring, 0 )) or 2104 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2105 #~ raise ParseException( instring, loc, "Expected start of line" ) 2106 exc = self.myException 2107 exc.loc = loc 2108 exc.pstr = instring 2109 raise exc 2110 return loc, []
2111
2112 -class LineEnd(_PositionToken):
2113 """Matches if current position is at the end of a line within the parse string"""
2114 - def __init__( self ):
2115 super(LineEnd,self).__init__() 2116 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2117 self.errmsg = "Expected end of line"
2118
2119 - def parseImpl( self, instring, loc, doActions=True ):
2120 if loc<len(instring): 2121 if instring[loc] == "\n": 2122 return loc+1, "\n" 2123 else: 2124 #~ raise ParseException( instring, loc, "Expected end of line" ) 2125 exc = self.myException 2126 exc.loc = loc 2127 exc.pstr = instring 2128 raise exc 2129 elif loc == len(instring): 2130 return loc+1, [] 2131 else: 2132 exc = self.myException 2133 exc.loc = loc 2134 exc.pstr = instring 2135 raise exc
2136
2137 -class StringStart(_PositionToken):
2138 """Matches if current position is at the beginning of the parse string"""
2139 - def __init__( self ):
2140 super(StringStart,self).__init__() 2141 self.errmsg = "Expected start of text"
2142
2143 - def parseImpl( self, instring, loc, doActions=True ):
2144 if loc != 0: 2145 # see if entire string up to here is just whitespace and ignoreables 2146 if loc != self.preParse( instring, 0 ): 2147 #~ raise ParseException( instring, loc, "Expected start of text" ) 2148 exc = self.myException 2149 exc.loc = loc 2150 exc.pstr = instring 2151 raise exc 2152 return loc, []
2153
2154 -class StringEnd(_PositionToken):
2155 """Matches if current position is at the end of the parse string"""
2156 - def __init__( self ):
2157 super(StringEnd,self).__init__() 2158 self.errmsg = "Expected end of text"
2159
2160 - def parseImpl( self, instring, loc, doActions=True ):
2161 if loc < len(instring): 2162 #~ raise ParseException( instring, loc, "Expected end of text" ) 2163 exc = self.myException 2164 exc.loc = loc 2165 exc.pstr = instring 2166 raise exc 2167 elif loc == len(instring): 2168 return loc+1, [] 2169 elif loc > len(instring): 2170 return loc, [] 2171 else: 2172 exc = self.myException 2173 exc.loc = loc 2174 exc.pstr = instring 2175 raise exc
2176
2177 -class WordStart(_PositionToken):
2178 """Matches if the current position is at the beginning of a Word, and 2179 is not preceded by any character in a given set of C{wordChars} 2180 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2181 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2182 the string being parsed, or at the beginning of a line. 2183 """
2184 - def __init__(self, wordChars = printables):
2185 super(WordStart,self).__init__() 2186 self.wordChars = set(wordChars) 2187 self.errmsg = "Not at the start of a word"
2188
2189 - def parseImpl(self, instring, loc, doActions=True ):
2190 if loc != 0: 2191 if (instring[loc-1] in self.wordChars or 2192 instring[loc] not in self.wordChars): 2193 exc = self.myException 2194 exc.loc = loc 2195 exc.pstr = instring 2196 raise exc 2197 return loc, []
2198
2199 -class WordEnd(_PositionToken):
2200 """Matches if the current position is at the end of a Word, and 2201 is not followed by any character in a given set of C{wordChars} 2202 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2203 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2204 the string being parsed, or at the end of a line. 2205 """
2206 - def __init__(self, wordChars = printables):
2207 super(WordEnd,self).__init__() 2208 self.wordChars = set(wordChars) 2209 self.skipWhitespace = False 2210 self.errmsg = "Not at the end of a word"
2211
2212 - def parseImpl(self, instring, loc, doActions=True ):
2213 instrlen = len(instring) 2214 if instrlen>0 and loc<instrlen: 2215 if (instring[loc] in self.wordChars or 2216 instring[loc-1] not in self.wordChars): 2217 #~ raise ParseException( instring, loc, "Expected end of word" ) 2218 exc = self.myException 2219 exc.loc = loc 2220 exc.pstr = instring 2221 raise exc 2222 return loc, []
2223 2224
2225 -class ParseExpression(ParserElement):
2226 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2227 - def __init__( self, exprs, savelist = False ):
2228 super(ParseExpression,self).__init__(savelist) 2229 if isinstance( exprs, list ): 2230 self.exprs = exprs 2231 elif isinstance( exprs, basestring ): 2232 self.exprs = [ Literal( exprs ) ] 2233 else: 2234 try: 2235 self.exprs = list( exprs ) 2236 except TypeError: 2237 self.exprs = [ exprs ] 2238 self.callPreparse = False
2239
2240 - def __getitem__( self, i ):
2241 return self.exprs[i]
2242
2243 - def append( self, other ):
2244 self.exprs.append( other ) 2245 self.strRepr = None 2246 return self
2247
2248 - def leaveWhitespace( self ):
2249 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2250 all contained expressions.""" 2251 self.skipWhitespace = False 2252 self.exprs = [ e.copy() for e in self.exprs ] 2253 for e in self.exprs: 2254 e.leaveWhitespace() 2255 return self
2256
2257 - def ignore( self, other ):
2258 if isinstance( other, Suppress ): 2259 if other not in self.ignoreExprs: 2260 super( ParseExpression, self).ignore( other ) 2261 for e in self.exprs: 2262 e.ignore( self.ignoreExprs[-1] ) 2263 else: 2264 super( ParseExpression, self).ignore( other ) 2265 for e in self.exprs: 2266 e.ignore( self.ignoreExprs[-1] ) 2267 return self
2268
2269 - def __str__( self ):
2270 try: 2271 return super(ParseExpression,self).__str__() 2272 except: 2273 pass 2274 2275 if self.strRepr is None: 2276 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2277 return self.strRepr
2278
2279 - def streamline( self ):
2280 super(ParseExpression,self).streamline() 2281 2282 for e in self.exprs: 2283 e.streamline() 2284 2285 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2286 # but only if there are no parse actions or resultsNames on the nested And's 2287 # (likewise for Or's and MatchFirst's) 2288 if ( len(self.exprs) == 2 ): 2289 other = self.exprs[0] 2290 if ( isinstance( other, self.__class__ ) and 2291 not(other.parseAction) and 2292 other.resultsName is None and 2293 not other.debug ): 2294 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2295 self.strRepr = None 2296 self.mayReturnEmpty |= other.mayReturnEmpty 2297 self.mayIndexError |= other.mayIndexError 2298 2299 other = self.exprs[-1] 2300 if ( isinstance( other, self.__class__ ) and 2301 not(other.parseAction) and 2302 other.resultsName is None and 2303 not other.debug ): 2304 self.exprs = self.exprs[:-1] + other.exprs[:] 2305 self.strRepr = None 2306 self.mayReturnEmpty |= other.mayReturnEmpty 2307 self.mayIndexError |= other.mayIndexError 2308 2309 return self
2310
2311 - def setResultsName( self, name, listAllMatches=False ):
2312 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2313 return ret
2314
2315 - def validate( self, validateTrace=[] ):
2316 tmp = validateTrace[:]+[self] 2317 for e in self.exprs: 2318 e.validate(tmp) 2319 self.checkRecursion( [] )
2320
2321 - def copy(self):
2322 ret = super(ParseExpression,self).copy() 2323 ret.exprs = [e.copy() for e in self.exprs] 2324 return ret
2325
2326 -class And(ParseExpression):
2327 """Requires all given C{ParseExpression}s to be found in the given order. 2328 Expressions may be separated by whitespace. 2329 May be constructed using the C{'+'} operator. 2330 """ 2331
2332 - class _ErrorStop(Empty):
2333 - def __init__(self, *args, **kwargs):
2334 super(Empty,self).__init__(*args, **kwargs) 2335 self.leaveWhitespace()
2336
2337 - def __init__( self, exprs, savelist = True ):
2338 super(And,self).__init__(exprs, savelist) 2339 self.mayReturnEmpty = True 2340 for e in self.exprs: 2341 if not e.mayReturnEmpty: 2342 self.mayReturnEmpty = False 2343 break 2344 self.setWhitespaceChars( exprs[0].whiteChars ) 2345 self.skipWhitespace = exprs[0].skipWhitespace 2346 self.callPreparse = True
2347
2348 - def parseImpl( self, instring, loc, doActions=True ):
2349 # pass False as last arg to _parse for first element, since we already 2350 # pre-parsed the string as part of our And pre-parsing 2351 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2352 errorStop = False 2353 for e in self.exprs[1:]: 2354 if isinstance(e, And._ErrorStop): 2355 errorStop = True 2356 continue 2357 if errorStop: 2358 try: 2359 loc, exprtokens = e._parse( instring, loc, doActions ) 2360 except ParseSyntaxException: 2361 raise 2362 except ParseBaseException: 2363 pe = sys.exc_info()[1] 2364 raise ParseSyntaxException(pe) 2365 except IndexError: 2366 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2367 else: 2368 loc, exprtokens = e._parse( instring, loc, doActions ) 2369 if exprtokens or exprtokens.keys(): 2370 resultlist += exprtokens 2371 return loc, resultlist
2372
2373 - def __iadd__(self, other ):
2374 if isinstance( other, basestring ): 2375 other = Literal( other ) 2376 return self.append( other ) #And( [ self, other ] )
2377
2378 - def checkRecursion( self, parseElementList ):
2379 subRecCheckList = parseElementList[:] + [ self ] 2380 for e in self.exprs: 2381 e.checkRecursion( subRecCheckList ) 2382 if not e.mayReturnEmpty: 2383 break
2384
2385 - def __str__( self ):
2386 if hasattr(self,"name"): 2387 return self.name 2388 2389 if self.strRepr is None: 2390 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2391 2392 return self.strRepr
2393 2394
2395 -class Or(ParseExpression):
2396 """Requires that at least one C{ParseExpression} is found. 2397 If two expressions match, the expression that matches the longest string will be used. 2398 May be constructed using the C{'^'} operator. 2399 """
2400 - def __init__( self, exprs, savelist = False ):
2401 super(Or,self).__init__(exprs, savelist) 2402 self.mayReturnEmpty = False 2403 for e in self.exprs: 2404 if e.mayReturnEmpty: 2405 self.mayReturnEmpty = True 2406 break
2407
2408 - def parseImpl( self, instring, loc, doActions=True ):
2409 maxExcLoc = -1 2410 maxMatchLoc = -1 2411 maxException = None 2412 for e in self.exprs: 2413 try: 2414 loc2 = e.tryParse( instring, loc ) 2415 except ParseException: 2416 err = sys.exc_info()[1] 2417 if err.loc > maxExcLoc: 2418 maxException = err 2419 maxExcLoc = err.loc 2420 except IndexError: 2421 if len(instring) > maxExcLoc: 2422 maxException = ParseException(instring,len(instring),e.errmsg,self) 2423 maxExcLoc = len(instring) 2424 else: 2425 if loc2 > maxMatchLoc: 2426 maxMatchLoc = loc2 2427 maxMatchExp = e 2428 2429 if maxMatchLoc < 0: 2430 if maxException is not None: 2431 raise maxException 2432 else: 2433 raise ParseException(instring, loc, "no defined alternatives to match", self) 2434 2435 return maxMatchExp._parse( instring, loc, doActions )
2436
2437 - def __ixor__(self, other ):
2438 if isinstance( other, basestring ): 2439 other = Literal( other ) 2440 return self.append( other ) #Or( [ self, other ] )
2441
2442 - def __str__( self ):
2443 if hasattr(self,"name"): 2444 return self.name 2445 2446 if self.strRepr is None: 2447 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2448 2449 return self.strRepr
2450
2451 - def checkRecursion( self, parseElementList ):
2452 subRecCheckList = parseElementList[:] + [ self ] 2453 for e in self.exprs: 2454 e.checkRecursion( subRecCheckList )
2455 2456
2457 -class MatchFirst(ParseExpression):
2458 """Requires that at least one C{ParseExpression} is found. 2459 If two expressions match, the first one listed is the one that will match. 2460 May be constructed using the C{'|'} operator. 2461 """
2462 - def __init__( self, exprs, savelist = False ):
2463 super(MatchFirst,self).__init__(exprs, savelist) 2464 if exprs: 2465 self.mayReturnEmpty = False 2466 for e in self.exprs: 2467 if e.mayReturnEmpty: 2468 self.mayReturnEmpty = True 2469 break 2470 else: 2471 self.mayReturnEmpty = True
2472
2473 - def parseImpl( self, instring, loc, doActions=True ):
2474 maxExcLoc = -1 2475 maxException = None 2476 for e in self.exprs: 2477 try: 2478 ret = e._parse( instring, loc, doActions ) 2479 return ret 2480 except ParseException, err: 2481 if err.loc > maxExcLoc: 2482 maxException = err 2483 maxExcLoc = err.loc 2484 except IndexError: 2485 if len(instring) > maxExcLoc: 2486 maxException = ParseException(instring,len(instring),e.errmsg,self) 2487 maxExcLoc = len(instring) 2488 2489 # only got here if no expression matched, raise exception for match that made it the furthest 2490 else: 2491 if maxException is not None: 2492 raise maxException 2493 else: 2494 raise ParseException(instring, loc, "no defined alternatives to match", self)
2495
2496 - def __ior__(self, other ):
2497 if isinstance( other, basestring ): 2498 other = Literal( other ) 2499 return self.append( other ) #MatchFirst( [ self, other ] )
2500
2501 - def __str__( self ):
2502 if hasattr(self,"name"): 2503 return self.name 2504 2505 if self.strRepr is None: 2506 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2507 2508 return self.strRepr
2509
2510 - def checkRecursion( self, parseElementList ):
2511 subRecCheckList = parseElementList[:] + [ self ] 2512 for e in self.exprs: 2513 e.checkRecursion( subRecCheckList )
2514 2515
2516 -class Each(ParseExpression):
2517 """Requires all given C{ParseExpression}s to be found, but in any order. 2518 Expressions may be separated by whitespace. 2519 May be constructed using the C{'&'} operator. 2520 """
2521 - def __init__( self, exprs, savelist = True ):
2522 super(Each,self).__init__(exprs, savelist) 2523 self.mayReturnEmpty = True 2524 for e in self.exprs: 2525 if not e.mayReturnEmpty: 2526 self.mayReturnEmpty = False 2527 break 2528 self.skipWhitespace = True 2529 self.initExprGroups = True
2530
2531 - def parseImpl( self, instring, loc, doActions=True ):
2532 if self.initExprGroups: 2533 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2534 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2535 self.optionals = opt1 + opt2 2536 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2537 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2538 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2539 self.required += self.multirequired 2540 self.initExprGroups = False 2541 tmpLoc = loc 2542 tmpReqd = self.required[:] 2543 tmpOpt = self.optionals[:] 2544 matchOrder = [] 2545 2546 keepMatching = True 2547 while keepMatching: 2548 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2549 failed = [] 2550 for e in tmpExprs: 2551 try: 2552 tmpLoc = e.tryParse( instring, tmpLoc ) 2553 except ParseException: 2554 failed.append(e) 2555 else: 2556 matchOrder.append(e) 2557 if e in tmpReqd: 2558 tmpReqd.remove(e) 2559 elif e in tmpOpt: 2560 tmpOpt.remove(e) 2561 if len(failed) == len(tmpExprs): 2562 keepMatching = False 2563 2564 if tmpReqd: 2565 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2566 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2567 2568 # add any unmatched Optionals, in case they have default values defined 2569 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2570 2571 resultlist = [] 2572 for e in matchOrder: 2573 loc,results = e._parse(instring,loc,doActions) 2574 resultlist.append(results) 2575 2576 finalResults = ParseResults([]) 2577 for r in resultlist: 2578 dups = {} 2579 for k in r.keys(): 2580 if k in finalResults.keys(): 2581 tmp = ParseResults(finalResults[k]) 2582 tmp += ParseResults(r[k]) 2583 dups[k] = tmp 2584 finalResults += ParseResults(r) 2585 for k,v in dups.items(): 2586 finalResults[k] = v 2587 return loc, finalResults
2588
2589 - def __str__( self ):
2590 if hasattr(self,"name"): 2591 return self.name 2592 2593 if self.strRepr is None: 2594 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2595 2596 return self.strRepr
2597
2598 - def checkRecursion( self, parseElementList ):
2599 subRecCheckList = parseElementList[:] + [ self ] 2600 for e in self.exprs: 2601 e.checkRecursion( subRecCheckList )
2602 2603
2604 -class ParseElementEnhance(ParserElement):
2605 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2606 - def __init__( self, expr, savelist=False ):
2607 super(ParseElementEnhance,self).__init__(savelist) 2608 if isinstance( expr, basestring ): 2609 expr = Literal(expr) 2610 self.expr = expr 2611 self.strRepr = None 2612 if expr is not None: 2613 self.mayIndexError = expr.mayIndexError 2614 self.mayReturnEmpty = expr.mayReturnEmpty 2615 self.setWhitespaceChars( expr.whiteChars ) 2616 self.skipWhitespace = expr.skipWhitespace 2617 self.saveAsList = expr.saveAsList 2618 self.callPreparse = expr.callPreparse 2619 self.ignoreExprs.extend(expr.ignoreExprs)
2620
2621 - def parseImpl( self, instring, loc, doActions=True ):
2622 if self.expr is not None: 2623 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2624 else: 2625 raise ParseException("",loc,self.errmsg,self)
2626
2627 - def leaveWhitespace( self ):
2628 self.skipWhitespace = False 2629 self.expr = self.expr.copy() 2630 if self.expr is not None: 2631 self.expr.leaveWhitespace() 2632 return self
2633
2634 - def ignore( self, other ):
2635 if isinstance( other, Suppress ): 2636 if other not in self.ignoreExprs: 2637 super( ParseElementEnhance, self).ignore( other ) 2638 if self.expr is not None: 2639 self.expr.ignore( self.ignoreExprs[-1] ) 2640 else: 2641 super( ParseElementEnhance, self).ignore( other ) 2642 if self.expr is not None: 2643 self.expr.ignore( self.ignoreExprs[-1] ) 2644 return self
2645
2646 - def streamline( self ):
2647 super(ParseElementEnhance,self).streamline() 2648 if self.expr is not None: 2649 self.expr.streamline() 2650 return self
2651
2652 - def checkRecursion( self, parseElementList ):
2653 if self in parseElementList: 2654 raise RecursiveGrammarException( parseElementList+[self] ) 2655 subRecCheckList = parseElementList[:] + [ self ] 2656 if self.expr is not None: 2657 self.expr.checkRecursion( subRecCheckList )
2658
2659 - def validate( self, validateTrace=[] ):
2660 tmp = validateTrace[:]+[self] 2661 if self.expr is not None: 2662 self.expr.validate(tmp) 2663 self.checkRecursion( [] )
2664
2665 - def __str__( self ):
2666 try: 2667 return super(ParseElementEnhance,self).__str__() 2668 except: 2669 pass 2670 2671 if self.strRepr is None and self.expr is not None: 2672 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2673 return self.strRepr
2674 2675
2676 -class FollowedBy(ParseElementEnhance):
2677 """Lookahead matching of the given parse expression. C{FollowedBy} 2678 does *not* advance the parsing position within the input string, it only 2679 verifies that the specified parse expression matches at the current 2680 position. C{FollowedBy} always returns a null token list."""
2681 - def __init__( self, expr ):
2682 super(FollowedBy,self).__init__(expr) 2683 self.mayReturnEmpty = True
2684
2685 - def parseImpl( self, instring, loc, doActions=True ):
2686 self.expr.tryParse( instring, loc ) 2687 return loc, []
2688 2689
2690 -class NotAny(ParseElementEnhance):
2691 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2692 does *not* advance the parsing position within the input string, it only 2693 verifies that the specified parse expression does *not* match at the current 2694 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2695 always returns a null token list. May be constructed using the '~' operator."""
2696 - def __init__( self, expr ):
2697 super(NotAny,self).__init__(expr) 2698 #~ self.leaveWhitespace() 2699 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2700 self.mayReturnEmpty = True 2701 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2702
2703 - def parseImpl( self, instring, loc, doActions=True ):
2704 try: 2705 self.expr.tryParse( instring, loc ) 2706 except (ParseException,IndexError): 2707 pass 2708 else: 2709 #~ raise ParseException(instring, loc, self.errmsg ) 2710 exc = self.myException 2711 exc.loc = loc 2712 exc.pstr = instring 2713 raise exc 2714 return loc, []
2715
2716 - def __str__( self ):
2717 if hasattr(self,"name"): 2718 return self.name 2719 2720 if self.strRepr is None: 2721 self.strRepr = "~{" + _ustr(self.expr) + "}" 2722 2723 return self.strRepr
2724 2725
2726 -class ZeroOrMore(ParseElementEnhance):
2727 """Optional repetition of zero or more of the given expression."""
2728 - def __init__( self, expr ):
2729 super(ZeroOrMore,self).__init__(expr) 2730 self.mayReturnEmpty = True
2731
2732 - def parseImpl( self, instring, loc, doActions=True ):
2733 tokens = [] 2734 try: 2735 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2736 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2737 while 1: 2738 if hasIgnoreExprs: 2739 preloc = self._skipIgnorables( instring, loc ) 2740 else: 2741 preloc = loc 2742 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2743 if tmptokens or tmptokens.keys(): 2744 tokens += tmptokens 2745 except (ParseException,IndexError): 2746 pass 2747 2748 return loc, tokens
2749
2750 - def __str__( self ):
2751 if hasattr(self,"name"): 2752 return self.name 2753 2754 if self.strRepr is None: 2755 self.strRepr = "[" + _ustr(self.expr) + "]..." 2756 2757 return self.strRepr
2758
2759 - def setResultsName( self, name, listAllMatches=False ):
2760 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2761 ret.saveAsList = True 2762 return ret
2763 2764
2765 -class OneOrMore(ParseElementEnhance):
2766 """Repetition of one or more of the given expression."""
2767 - def parseImpl( self, instring, loc, doActions=True ):
2768 # must be at least one 2769 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2770 try: 2771 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2772 while 1: 2773 if hasIgnoreExprs: 2774 preloc = self._skipIgnorables( instring, loc ) 2775 else: 2776 preloc = loc 2777 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2778 if tmptokens or tmptokens.keys(): 2779 tokens += tmptokens 2780 except (ParseException,IndexError): 2781 pass 2782 2783 return loc, tokens
2784
2785 - def __str__( self ):
2786 if hasattr(self,"name"): 2787 return self.name 2788 2789 if self.strRepr is None: 2790 self.strRepr = "{" + _ustr(self.expr) + "}..." 2791 2792 return self.strRepr
2793
2794 - def setResultsName( self, name, listAllMatches=False ):
2795 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2796 ret.saveAsList = True 2797 return ret
2798
2799 -class _NullToken(object):
2800 - def __bool__(self):
2801 return False
2802 __nonzero__ = __bool__
2803 - def __str__(self):
2804 return ""
2805 2806 _optionalNotMatched = _NullToken()
2807 -class Optional(ParseElementEnhance):
2808 """Optional matching of the given expression. 2809 A default return string can also be specified, if the optional expression 2810 is not found. 2811 """
2812 - def __init__( self, exprs, default=_optionalNotMatched ):
2813 super(Optional,self).__init__( exprs, savelist=False ) 2814 self.defaultValue = default 2815 self.mayReturnEmpty = True
2816
2817 - def parseImpl( self, instring, loc, doActions=True ):
2818 try: 2819 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2820 except (ParseException,IndexError): 2821 if self.defaultValue is not _optionalNotMatched: 2822 if self.expr.resultsName: 2823 tokens = ParseResults([ self.defaultValue ]) 2824 tokens[self.expr.resultsName] = self.defaultValue 2825 else: 2826 tokens = [ self.defaultValue ] 2827 else: 2828 tokens = [] 2829 return loc, tokens
2830
2831 - def __str__( self ):
2832 if hasattr(self,"name"): 2833 return self.name 2834 2835 if self.strRepr is None: 2836 self.strRepr = "[" + _ustr(self.expr) + "]" 2837 2838 return self.strRepr
2839 2840
2841 -class SkipTo(ParseElementEnhance):
2842 """Token for skipping over all undefined text until the matched expression is found. 2843 If C{include} is set to true, the matched expression is also parsed (the skipped text 2844 and matched expression are returned as a 2-element list). The C{ignore} 2845 argument is used to define grammars (typically quoted strings and comments) that 2846 might contain false matches. 2847 """
2848 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2849 super( SkipTo, self ).__init__( other ) 2850 self.ignoreExpr = ignore 2851 self.mayReturnEmpty = True 2852 self.mayIndexError = False 2853 self.includeMatch = include 2854 self.asList = False 2855 if failOn is not None and isinstance(failOn, basestring): 2856 self.failOn = Literal(failOn) 2857 else: 2858 self.failOn = failOn 2859 self.errmsg = "No match found for "+_ustr(self.expr)
2860
2861 - def parseImpl( self, instring, loc, doActions=True ):
2862 startLoc = loc 2863 instrlen = len(instring) 2864 expr = self.expr 2865 failParse = False 2866 while loc <= instrlen: 2867 try: 2868 if self.failOn: 2869 try: 2870 self.failOn.tryParse(instring, loc) 2871 except ParseBaseException: 2872 pass 2873 else: 2874 failParse = True 2875 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2876 failParse = False 2877 if self.ignoreExpr is not None: 2878 while 1: 2879 try: 2880 loc = self.ignoreExpr.tryParse(instring,loc) 2881 # print "found ignoreExpr, advance to", loc 2882 except ParseBaseException: 2883 break 2884 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2885 skipText = instring[startLoc:loc] 2886 if self.includeMatch: 2887 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2888 if mat: 2889 skipRes = ParseResults( skipText ) 2890 skipRes += mat 2891 return loc, [ skipRes ] 2892 else: 2893 return loc, [ skipText ] 2894 else: 2895 return loc, [ skipText ] 2896 except (ParseException,IndexError): 2897 if failParse: 2898 raise 2899 else: 2900 loc += 1 2901 exc = self.myException 2902 exc.loc = loc 2903 exc.pstr = instring 2904 raise exc
2905
2906 -class Forward(ParseElementEnhance):
2907 """Forward declaration of an expression to be defined later - 2908 used for recursive grammars, such as algebraic infix notation. 2909 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2910 2911 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2912 Specifically, '|' has a lower precedence than '<<', so that:: 2913 fwdExpr << a | b | c 2914 will actually be evaluated as:: 2915 (fwdExpr << a) | b | c 2916 thereby leaving b and c out as parseable alternatives. It is recommended that you 2917 explicitly group the values inserted into the C{Forward}:: 2918 fwdExpr << (a | b | c) 2919 """
2920 - def __init__( self, other=None ):
2921 super(Forward,self).__init__( other, savelist=False )
2922
2923 - def __lshift__( self, other ):
2924 if isinstance( other, basestring ): 2925 other = Literal(other) 2926 self.expr = other 2927 self.mayReturnEmpty = other.mayReturnEmpty 2928 self.strRepr = None 2929 self.mayIndexError = self.expr.mayIndexError 2930 self.mayReturnEmpty = self.expr.mayReturnEmpty 2931 self.setWhitespaceChars( self.expr.whiteChars ) 2932 self.skipWhitespace = self.expr.skipWhitespace 2933 self.saveAsList = self.expr.saveAsList 2934 self.ignoreExprs.extend(self.expr.ignoreExprs) 2935 return None
2936
2937 - def leaveWhitespace( self ):
2938 self.skipWhitespace = False 2939 return self
2940
2941 - def streamline( self ):
2942 if not self.streamlined: 2943 self.streamlined = True 2944 if self.expr is not None: 2945 self.expr.streamline() 2946 return self
2947
2948 - def validate( self, validateTrace=[] ):
2949 if self not in validateTrace: 2950 tmp = validateTrace[:]+[self] 2951 if self.expr is not None: 2952 self.expr.validate(tmp) 2953 self.checkRecursion([])
2954
2955 - def __str__( self ):
2956 if hasattr(self,"name"): 2957 return self.name 2958 2959 self._revertClass = self.__class__ 2960 self.__class__ = _ForwardNoRecurse 2961 try: 2962 if self.expr is not None: 2963 retString = _ustr(self.expr) 2964 else: 2965 retString = "None" 2966 finally: 2967 self.__class__ = self._revertClass 2968 return self.__class__.__name__ + ": " + retString
2969
2970 - def copy(self):
2971 if self.expr is not None: 2972 return super(Forward,self).copy() 2973 else: 2974 ret = Forward() 2975 ret << self 2976 return ret
2977
2978 -class _ForwardNoRecurse(Forward):
2979 - def __str__( self ):
2980 return "..."
2981
2982 -class TokenConverter(ParseElementEnhance):
2983 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2984 - def __init__( self, expr, savelist=False ):
2985 super(TokenConverter,self).__init__( expr )#, savelist ) 2986 self.saveAsList = False
2987
2988 -class Upcase(TokenConverter):
2989 """Converter to upper case all matching tokens."""
2990 - def __init__(self, *args):
2991 super(Upcase,self).__init__(*args) 2992 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2993 DeprecationWarning,stacklevel=2)
2994
2995 - def postParse( self, instring, loc, tokenlist ):
2996 return list(map( string.upper, tokenlist ))
2997 2998
2999 -class Combine(TokenConverter):
3000 """Converter to concatenate all matching tokens to a single string. 3001 By default, the matching patterns must also be contiguous in the input string; 3002 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3003 """
3004 - def __init__( self, expr, joinString="", adjacent=True ):
3005 super(Combine,self).__init__( expr ) 3006 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3007 if adjacent: 3008 self.leaveWhitespace() 3009 self.adjacent = adjacent 3010 self.skipWhitespace = True 3011 self.joinString = joinString 3012 self.callPreparse = True
3013
3014 - def ignore( self, other ):
3015 if self.adjacent: 3016 ParserElement.ignore(self, other) 3017 else: 3018 super( Combine, self).ignore( other ) 3019 return self
3020
3021 - def postParse( self, instring, loc, tokenlist ):
3022 retToks = tokenlist.copy() 3023 del retToks[:] 3024 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3025 3026 if self.resultsName and len(retToks.keys())>0: 3027 return [ retToks ] 3028 else: 3029 return retToks
3030
3031 -class Group(TokenConverter):
3032 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
3033 - def __init__( self, expr ):
3034 super(Group,self).__init__( expr ) 3035 self.saveAsList = True
3036
3037 - def postParse( self, instring, loc, tokenlist ):
3038 return [ tokenlist ]
3039
3040 -class Dict(TokenConverter):
3041 """Converter to return a repetitive expression as a list, but also as a dictionary. 3042 Each element can also be referenced using the first token in the expression as its key. 3043 Useful for tabular report scraping when the first column can be used as a item key. 3044 """
3045 - def __init__( self, exprs ):
3046 super(Dict,self).__init__( exprs ) 3047 self.saveAsList = True
3048
3049 - def postParse( self, instring, loc, tokenlist ):
3050 for i,tok in enumerate(tokenlist): 3051 if len(tok) == 0: 3052 continue 3053 ikey = tok[0] 3054 if isinstance(ikey,int): 3055 ikey = _ustr(tok[0]).strip() 3056 if len(tok)==1: 3057 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3058 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3059 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3060 else: 3061 dictvalue = tok.copy() #ParseResults(i) 3062 del dictvalue[0] 3063 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3064 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3065 else: 3066 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3067 3068 if self.resultsName: 3069 return [ tokenlist ] 3070 else: 3071 return tokenlist
3072 3073
3074 -class Suppress(TokenConverter):
3075 """Converter for ignoring the results of a parsed expression."""
3076 - def postParse( self, instring, loc, tokenlist ):
3077 return []
3078
3079 - def suppress( self ):
3080 return self
3081 3082
3083 -class OnlyOnce(object):
3084 """Wrapper for parse actions, to ensure they are only called once."""
3085 - def __init__(self, methodCall):
3086 self.callable = _trim_arity(methodCall) 3087 self.called = False
3088 - def __call__(self,s,l,t):
3089 if not self.called: 3090 results = self.callable(s,l,t) 3091 self.called = True 3092 return results 3093 raise ParseException(s,l,"")
3094 - def reset(self):
3095 self.called = False
3096
3097 -def traceParseAction(f):
3098 """Decorator for debugging parse actions.""" 3099 f = _trim_arity(f) 3100 def z(*paArgs): 3101 thisFunc = f.func_name 3102 s,l,t = paArgs[-3:] 3103 if len(paArgs)>3: 3104 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3105 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3106 try: 3107 ret = f(*paArgs) 3108 except Exception: 3109 exc = sys.exc_info()[1] 3110 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3111 raise 3112 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3113 return ret
3114 try: 3115 z.__name__ = f.__name__ 3116 except AttributeError: 3117 pass 3118 return z 3119 3120 # 3121 # global helpers 3122 #
3123 -def delimitedList( expr, delim=",", combine=False ):
3124 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3125 By default, the list elements and delimiters can have intervening whitespace, and 3126 comments, but this can be overridden by passing C{combine=True} in the constructor. 3127 If C{combine} is set to True, the matching tokens are returned as a single token 3128 string, with the delimiters included; otherwise, the matching tokens are returned 3129 as a list of tokens, with the delimiters suppressed. 3130 """ 3131 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3132 if combine: 3133 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3134 else: 3135 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3136
3137 -def countedArray( expr, intExpr=None ):
3138 """Helper to define a counted list of expressions. 3139 This helper defines a pattern of the form:: 3140 integer expr expr expr... 3141 where the leading integer tells how many expr expressions follow. 3142 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3143 """ 3144 arrayExpr = Forward() 3145 def countFieldParseAction(s,l,t): 3146 n = t[0] 3147 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3148 return []
3149 if intExpr is None: 3150 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3151 else: 3152 intExpr = intExpr.copy() 3153 intExpr.setName("arrayLen") 3154 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3155 return ( intExpr + arrayExpr ) 3156
3157 -def _flatten(L):
3158 ret = [] 3159 for i in L: 3160 if isinstance(i,list): 3161 ret.extend(_flatten(i)) 3162 else: 3163 ret.append(i) 3164 return ret
3165
3166 -def matchPreviousLiteral(expr):
3167 """Helper to define an expression that is indirectly defined from 3168 the tokens matched in a previous expression, that is, it looks 3169 for a 'repeat' of a previous expression. For example:: 3170 first = Word(nums) 3171 second = matchPreviousLiteral(first) 3172 matchExpr = first + ":" + second 3173 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3174 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3175 If this is not desired, use C{matchPreviousExpr}. 3176 Do *not* use with packrat parsing enabled. 3177 """ 3178 rep = Forward() 3179 def copyTokenToRepeater(s,l,t): 3180 if t: 3181 if len(t) == 1: 3182 rep << t[0] 3183 else: 3184 # flatten t tokens 3185 tflat = _flatten(t.asList()) 3186 rep << And( [ Literal(tt) for tt in tflat ] ) 3187 else: 3188 rep << Empty()
3189 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3190 return rep 3191
3192 -def matchPreviousExpr(expr):
3193 """Helper to define an expression that is indirectly defined from 3194 the tokens matched in a previous expression, that is, it looks 3195 for a 'repeat' of a previous expression. For example:: 3196 first = Word(nums) 3197 second = matchPreviousExpr(first) 3198 matchExpr = first + ":" + second 3199 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3200 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3201 the expressions are evaluated first, and then compared, so 3202 C{"1"} is compared with C{"10"}. 3203 Do *not* use with packrat parsing enabled. 3204 """ 3205 rep = Forward() 3206 e2 = expr.copy() 3207 rep << e2 3208 def copyTokenToRepeater(s,l,t): 3209 matchTokens = _flatten(t.asList()) 3210 def mustMatchTheseTokens(s,l,t): 3211 theseTokens = _flatten(t.asList()) 3212 if theseTokens != matchTokens: 3213 raise ParseException("",0,"")
3214 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3215 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3216 return rep 3217
3218 -def _escapeRegexRangeChars(s):
3219 #~ escape these chars: ^-] 3220 for c in r"\^-]": 3221 s = s.replace(c,_bslash+c) 3222 s = s.replace("\n",r"\n") 3223 s = s.replace("\t",r"\t") 3224 return _ustr(s)
3225
3226 -def oneOf( strs, caseless=False, useRegex=True ):
3227 """Helper to quickly define a set of alternative Literals, and makes sure to do 3228 longest-first testing when there is a conflict, regardless of the input order, 3229 but returns a C{MatchFirst} for best performance. 3230 3231 Parameters: 3232 - strs - a string of space-delimited literals, or a list of string literals 3233 - caseless - (default=False) - treat all literals as caseless 3234 - useRegex - (default=True) - as an optimization, will generate a Regex 3235 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3236 if creating a C{Regex} raises an exception) 3237 """ 3238 if caseless: 3239 isequal = ( lambda a,b: a.upper() == b.upper() ) 3240 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3241 parseElementClass = CaselessLiteral 3242 else: 3243 isequal = ( lambda a,b: a == b ) 3244 masks = ( lambda a,b: b.startswith(a) ) 3245 parseElementClass = Literal 3246 3247 if isinstance(strs,(list,tuple)): 3248 symbols = list(strs[:]) 3249 elif isinstance(strs,basestring): 3250 symbols = strs.split() 3251 else: 3252 warnings.warn("Invalid argument to oneOf, expected string or list", 3253 SyntaxWarning, stacklevel=2) 3254 3255 i = 0 3256 while i < len(symbols)-1: 3257 cur = symbols[i] 3258 for j,other in enumerate(symbols[i+1:]): 3259 if ( isequal(other, cur) ): 3260 del symbols[i+j+1] 3261 break 3262 elif ( masks(cur, other) ): 3263 del symbols[i+j+1] 3264 symbols.insert(i,other) 3265 cur = other 3266 break 3267 else: 3268 i += 1 3269 3270 if not caseless and useRegex: 3271 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3272 try: 3273 if len(symbols)==len("".join(symbols)): 3274 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3275 else: 3276 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3277 except: 3278 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3279 SyntaxWarning, stacklevel=2) 3280 3281 3282 # last resort, just use MatchFirst 3283 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3284
3285 -def dictOf( key, value ):
3286 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3287 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens 3288 in the proper order. The key pattern can include delimiting markers or punctuation, 3289 as long as they are suppressed, thereby leaving the significant key text. The value 3290 pattern can include named results, so that the C{Dict} results can include named token 3291 fields. 3292 """ 3293 return Dict( ZeroOrMore( Group ( key + value ) ) )
3294
3295 -def originalTextFor(expr, asString=True):
3296 """Helper to return the original, untokenized text for a given expression. Useful to 3297 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3298 revert separate tokens with intervening whitespace back to the original matching 3299 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3300 require the inspect module to chase up the call stack. By default, returns a 3301 string containing the original parsed text. 3302 3303 If the optional C{asString} argument is passed as C{False}, then the return value is a 3304 C{ParseResults} containing any results names that were originally matched, and a 3305 single token containing the original matched text from the input string. So if 3306 the expression passed to C{L{originalTextFor}} contains expressions with defined 3307 results names, you must set C{asString} to C{False} if you want to preserve those 3308 results name values.""" 3309 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3310 endlocMarker = locMarker.copy() 3311 endlocMarker.callPreparse = False 3312 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3313 if asString: 3314 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3315 else: 3316 def extractText(s,l,t): 3317 del t[:] 3318 t.insert(0, s[t._original_start:t._original_end]) 3319 del t["_original_start"] 3320 del t["_original_end"]
3321 matchExpr.setParseAction(extractText) 3322 return matchExpr 3323
3324 -def ungroup(expr):
3325 """Helper to undo pyparsing's default grouping of And expressions, even 3326 if all but one are non-empty.""" 3327 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3328 3329 # convenience constants for positional expressions 3330 empty = Empty().setName("empty") 3331 lineStart = LineStart().setName("lineStart") 3332 lineEnd = LineEnd().setName("lineEnd") 3333 stringStart = StringStart().setName("stringStart") 3334 stringEnd = StringEnd().setName("stringEnd") 3335 3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3337 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3338 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) 3339 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3340 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3341 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3342 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3343 3344 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3345
3346 -def srange(s):
3347 r"""Helper to easily define string ranges for use in Word construction. Borrows 3348 syntax from regexp '[]' string range definitions:: 3349 srange("[0-9]") -> "0123456789" 3350 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3351 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3352 The input string must be enclosed in []'s, and the returned string is the expanded 3353 character set joined into a single string. 3354 The values enclosed in the []'s may be:: 3355 a single character 3356 an escaped character with a leading backslash (such as \- or \]) 3357 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3358 (\0x## is also supported for backwards compatibility) 3359 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3360 a range of any of the above, separated by a dash ('a-z', etc.) 3361 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3362 """ 3363 try: 3364 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3365 except: 3366 return ""
3367
3368 -def matchOnlyAtCol(n):
3369 """Helper method for defining parse actions that require matching at a specific 3370 column in the input text. 3371 """ 3372 def verifyCol(strg,locn,toks): 3373 if col(locn,strg) != n: 3374 raise ParseException(strg,locn,"matched token not at column %d" % n)
3375 return verifyCol 3376
3377 -def replaceWith(replStr):
3378 """Helper method for common parse actions that simply return a literal value. Especially 3379 useful when used with C{transformString()}. 3380 """ 3381 def _replFunc(*args): 3382 return [replStr]
3383 return _replFunc 3384
3385 -def removeQuotes(s,l,t):
3386 """Helper parse action for removing quotation marks from parsed quoted strings. 3387 To use, add this parse action to quoted string using:: 3388 quotedString.setParseAction( removeQuotes ) 3389 """ 3390 return t[0][1:-1]
3391
3392 -def upcaseTokens(s,l,t):
3393 """Helper parse action to convert tokens to upper case.""" 3394 return [ tt.upper() for tt in map(_ustr,t) ]
3395
3396 -def downcaseTokens(s,l,t):
3397 """Helper parse action to convert tokens to lower case.""" 3398 return [ tt.lower() for tt in map(_ustr,t) ]
3399
3400 -def keepOriginalText(s,startLoc,t):
3401 """DEPRECATED - use new helper method C{originalTextFor}. 3402 Helper parse action to preserve original parsed text, 3403 overriding any nested parse actions.""" 3404 try: 3405 endloc = getTokensEndLoc() 3406 except ParseException: 3407 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3408 del t[:] 3409 t += ParseResults(s[startLoc:endloc]) 3410 return t
3411
3412 -def getTokensEndLoc():
3413 """Method to be called from within a parse action to determine the end 3414 location of the parsed tokens.""" 3415 import inspect 3416 fstack = inspect.stack() 3417 try: 3418 # search up the stack (through intervening argument normalizers) for correct calling routine 3419 for f in fstack[2:]: 3420 if f[3] == "_parseNoCache": 3421 endloc = f[0].f_locals["loc"] 3422 return endloc 3423 else: 3424 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3425 finally: 3426 del fstack
3427
3428 -def _makeTags(tagStr, xml):
3429 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3430 if isinstance(tagStr,basestring): 3431 resname = tagStr 3432 tagStr = Keyword(tagStr, caseless=not xml) 3433 else: 3434 resname = tagStr.name 3435 3436 tagAttrName = Word(alphas,alphanums+"_-:") 3437 if (xml): 3438 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3439 openTag = Suppress("<") + tagStr("tag") + \ 3440 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3441 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3442 else: 3443 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3444 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3445 openTag = Suppress("<") + tagStr("tag") + \ 3446 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3447 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3448 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3449 closeTag = Combine(_L("</") + tagStr + ">") 3450 3451 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3452 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3453 openTag.tag = resname 3454 closeTag.tag = resname 3455 return openTag, closeTag
3456
3457 -def makeHTMLTags(tagStr):
3458 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3459 return _makeTags( tagStr, False )
3460
3461 -def makeXMLTags(tagStr):
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3463 return _makeTags( tagStr, True )
3464
3465 -def withAttribute(*args,**attrDict):
3466 """Helper to create a validating parse action to be used with start tags created 3467 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag 3468 with a required attribute value, to avoid false matches on common tags such as 3469 C{<TD>} or C{<DIV>}. 3470 3471 Call C{withAttribute} with a series of attribute names and values. Specify the list 3472 of filter attributes names and values as: 3473 - keyword arguments, as in C{(align="right")}, or 3474 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3475 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3476 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3477 For attribute names with a namespace prefix, you must use the second form. Attribute 3478 names are matched insensitive to upper/lower case. 3479 3480 To verify that the attribute exists, but without specifying a value, pass 3481 C{withAttribute.ANY_VALUE} as the value. 3482 """ 3483 if args: 3484 attrs = args[:] 3485 else: 3486 attrs = attrDict.items() 3487 attrs = [(k,v) for k,v in attrs] 3488 def pa(s,l,tokens): 3489 for attrName,attrValue in attrs: 3490 if attrName not in tokens: 3491 raise ParseException(s,l,"no matching attribute " + attrName) 3492 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3493 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3494 (attrName, tokens[attrName], attrValue))
3495 return pa 3496 withAttribute.ANY_VALUE = object() 3497 3498 opAssoc = _Constants() 3499 opAssoc.LEFT = object() 3500 opAssoc.RIGHT = object() 3501
3502 -def operatorPrecedence( baseExpr, opList ):
3503 """Helper method for constructing grammars of expressions made up of 3504 operators working in a precedence hierarchy. Operators may be unary or 3505 binary, left- or right-associative. Parse actions can also be attached 3506 to operator expressions. 3507 3508 Parameters: 3509 - baseExpr - expression representing the most basic element for the nested 3510 - opList - list of tuples, one for each operator precedence level in the 3511 expression grammar; each tuple is of the form 3512 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3513 - opExpr is the pyparsing expression for the operator; 3514 may also be a string, which will be converted to a Literal; 3515 if numTerms is 3, opExpr is a tuple of two expressions, for the 3516 two operators separating the 3 terms 3517 - numTerms is the number of terms for this operator (must 3518 be 1, 2, or 3) 3519 - rightLeftAssoc is the indicator whether the operator is 3520 right or left associative, using the pyparsing-defined 3521 constants opAssoc.RIGHT and opAssoc.LEFT. 3522 - parseAction is the parse action to be associated with 3523 expressions matching this operator expression (the 3524 parse action tuple member may be omitted) 3525 """ 3526 ret = Forward() 3527 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3528 for i,operDef in enumerate(opList): 3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3530 if arity == 3: 3531 if opExpr is None or len(opExpr) != 2: 3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3533 opExpr1, opExpr2 = opExpr 3534 thisExpr = Forward()#.setName("expr%d" % i) 3535 if rightLeftAssoc == opAssoc.LEFT: 3536 if arity == 1: 3537 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3538 elif arity == 2: 3539 if opExpr is not None: 3540 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3541 else: 3542 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3543 elif arity == 3: 3544 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3545 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3546 else: 3547 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3548 elif rightLeftAssoc == opAssoc.RIGHT: 3549 if arity == 1: 3550 # try to avoid LR with this extra test 3551 if not isinstance(opExpr, Optional): 3552 opExpr = Optional(opExpr) 3553 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3554 elif arity == 2: 3555 if opExpr is not None: 3556 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3557 else: 3558 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3559 elif arity == 3: 3560 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3561 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3562 else: 3563 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3564 else: 3565 raise ValueError("operator must indicate right or left associativity") 3566 if pa: 3567 matchExpr.setParseAction( pa ) 3568 thisExpr << ( matchExpr | lastExpr ) 3569 lastExpr = thisExpr 3570 ret << lastExpr 3571 return ret
3572 3573 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3574 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3575 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3576 unicodeString = Combine(_L('u') + quotedString.copy()) 3577
3578 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3579 """Helper method for defining nested lists enclosed in opening and closing 3580 delimiters ("(" and ")" are the default). 3581 3582 Parameters: 3583 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3584 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3585 - content - expression for items within the nested lists (default=None) 3586 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3587 3588 If an expression is not provided for the content argument, the nested 3589 expression will capture all whitespace-delimited content between delimiters 3590 as a list of separate values. 3591 3592 Use the C{ignoreExpr} argument to define expressions that may contain 3593 opening or closing characters that should not be treated as opening 3594 or closing characters for nesting, such as quotedString or a comment 3595 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3596 The default is L{quotedString}, but if no expressions are to be ignored, 3597 then pass C{None} for this argument. 3598 """ 3599 if opener == closer: 3600 raise ValueError("opening and closing strings cannot be the same") 3601 if content is None: 3602 if isinstance(opener,basestring) and isinstance(closer,basestring): 3603 if len(opener) == 1 and len(closer)==1: 3604 if ignoreExpr is not None: 3605 content = (Combine(OneOrMore(~ignoreExpr + 3606 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3607 ).setParseAction(lambda t:t[0].strip())) 3608 else: 3609 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3610 ).setParseAction(lambda t:t[0].strip())) 3611 else: 3612 if ignoreExpr is not None: 3613 content = (Combine(OneOrMore(~ignoreExpr + 3614 ~Literal(opener) + ~Literal(closer) + 3615 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3616 ).setParseAction(lambda t:t[0].strip())) 3617 else: 3618 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3619 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3620 ).setParseAction(lambda t:t[0].strip())) 3621 else: 3622 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3623 ret = Forward() 3624 if ignoreExpr is not None: 3625 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3626 else: 3627 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3628 return ret
3629
3630 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3631 """Helper method for defining space-delimited indentation blocks, such as 3632 those used to define block statements in Python source code. 3633 3634 Parameters: 3635 - blockStatementExpr - expression defining syntax of statement that 3636 is repeated within the indented block 3637 - indentStack - list created by caller to manage indentation stack 3638 (multiple statementWithIndentedBlock expressions within a single grammar 3639 should share a common indentStack) 3640 - indent - boolean indicating whether block must be indented beyond the 3641 the current level; set to False for block of left-most statements 3642 (default=True) 3643 3644 A valid block must contain at least one C{blockStatement}. 3645 """ 3646 def checkPeerIndent(s,l,t): 3647 if l >= len(s): return 3648 curCol = col(l,s) 3649 if curCol != indentStack[-1]: 3650 if curCol > indentStack[-1]: 3651 raise ParseFatalException(s,l,"illegal nesting") 3652 raise ParseException(s,l,"not a peer entry")
3653 3654 def checkSubIndent(s,l,t): 3655 curCol = col(l,s) 3656 if curCol > indentStack[-1]: 3657 indentStack.append( curCol ) 3658 else: 3659 raise ParseException(s,l,"not a subentry") 3660 3661 def checkUnindent(s,l,t): 3662 if l >= len(s): return 3663 curCol = col(l,s) 3664 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3665 raise ParseException(s,l,"not an unindent") 3666 indentStack.pop() 3667 3668 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3669 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3670 PEER = Empty().setParseAction(checkPeerIndent) 3671 UNDENT = Empty().setParseAction(checkUnindent) 3672 if indent: 3673 smExpr = Group( Optional(NL) + 3674 #~ FollowedBy(blockStatementExpr) + 3675 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3676 else: 3677 smExpr = Group( Optional(NL) + 3678 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3679 blockStatementExpr.ignore(_bslash + LineEnd()) 3680 return smExpr 3681 3682 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3683 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3684 3685 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3686 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3687 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3688 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3689 3690 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3691 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3692 3693 htmlComment = Regex(r"<!--[\s\S]*?-->") 3694 restOfLine = Regex(r".*").leaveWhitespace() 3695 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3696 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3697 3698 javaStyleComment = cppStyleComment 3699 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3700 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3701 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3702 Optional( Word(" \t") + 3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3705 3706 3707 if __name__ == "__main__": 3708
3709 - def test( teststring ):
3710 try: 3711 tokens = simpleSQL.parseString( teststring ) 3712 tokenlist = tokens.asList() 3713 print (teststring + "->" + str(tokenlist)) 3714 print ("tokens = " + str(tokens)) 3715 print ("tokens.columns = " + str(tokens.columns)) 3716 print ("tokens.tables = " + str(tokens.tables)) 3717 print (tokens.asXML("SQL",True)) 3718 except ParseBaseException: 3719 err = sys.exc_info()[1] 3720 print (teststring + "->") 3721 print (err.line) 3722 print (" "*(err.column-1) + "^") 3723 print (err) 3724 print()
3725 3726 selectToken = CaselessLiteral( "select" ) 3727 fromToken = CaselessLiteral( "from" ) 3728 3729 ident = Word( alphas, alphanums + "_$" ) 3730 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3731 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3732 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3733 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3734 simpleSQL = ( selectToken + \ 3735 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3736 fromToken + \ 3737 tableNameList.setResultsName( "tables" ) ) 3738 3739 test( "SELECT * from XYZZY, ABC" ) 3740 test( "select * from SYS.XYZZY" ) 3741 test( "Select A from Sys.dual" ) 3742 test( "Select AA,BB,CC from Sys.dual" ) 3743 test( "Select A, B, C from Sys.dual" ) 3744 test( "Select A, B, C from Sys.dual" ) 3745 test( "Xelect A, B, C from Sys.dual" ) 3746 test( "Select A, B, C frox Sys.dual" ) 3747 test( "Select" ) 3748 test( "Select ^^^ frox Sys.dual" ) 3749 test( "Select A, B, C from Sys.dual, Table2 " ) 3750