Package parsedatetime :: Module parsedatetime_consts
[hide private]
[frames] | no frames]

Source Code for Module parsedatetime.parsedatetime_consts

  1  #!/usr/bin/env python 
  2   
  3  """ 
  4  parsedatetime constants and helper functions to determine 
  5  regex values from Locale information if present. 
  6   
  7  Also contains the internal Locale classes to give some sane 
  8  defaults if PyICU is not found. 
  9  """ 
 10   
 11  __license__ = """ 
 12  Copyright (c) 2004-2007 Mike Taylor 
 13  Copyright (c) 2006-2007 Darshana Chhajed 
 14  All rights reserved. 
 15   
 16  Licensed under the Apache License, Version 2.0 (the "License"); 
 17  you may not use this file except in compliance with the License. 
 18  You may obtain a copy of the License at 
 19   
 20     http://www.apache.org/licenses/LICENSE-2.0 
 21   
 22  Unless required by applicable law or agreed to in writing, software 
 23  distributed under the License is distributed on an "AS IS" BASIS, 
 24  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 25  See the License for the specific language governing permissions and 
 26  limitations under the License. 
 27  """ 
 28   
 29  try: 
 30      import PyICU as pyicu 
 31  except: 
 32      pyicu = None 
 33   
 34   
 35  import datetime 
 36  import time 
 37  import re 
 38   
 39   
40 -class pdtLocale_en:
41 """ 42 en_US Locale constants 43 44 This class will be used to initialize L{Constants} if PyICU is not located. 45 46 Defined as class variables are the lists and strings needed by parsedatetime 47 to evaluate strings for USA 48 """ 49 50 localeID = 'en_US' # don't use a unicode string 51 dateSep = [ u'/', u'.' ] 52 timeSep = [ u':' ] 53 meridian = [ u'AM', u'PM' ] 54 usesMeridian = True 55 uses24 = False 56 57 Weekdays = [ u'monday', u'tuesday', u'wednesday', 58 u'thursday', u'friday', u'saturday', u'sunday', 59 ] 60 shortWeekdays = [ u'mon', u'tues', u'wed', 61 u'thu', u'fri', u'sat', u'sun', 62 ] 63 Months = [ u'january', u'february', u'march', 64 u'april', u'may', u'june', 65 u'july', u'august', u'september', 66 u'october', u'november', u'december', 67 ] 68 shortMonths = [ u'jan', u'feb', u'mar', 69 u'apr', u'may', u'jun', 70 u'jul', u'aug', u'sep', 71 u'oct', u'nov', u'dec', 72 ] 73 dateFormats = { 'full': 'EEEE, MMMM d, yyyy', 74 'long': 'MMMM d, yyyy', 75 'medium': 'MMM d, yyyy', 76 'short': 'M/d/yy', 77 } 78 timeFormats = { 'full': 'h:mm:ss a z', 79 'long': 'h:mm:ss a z', 80 'medium': 'h:mm:ss a', 81 'short': 'h:mm a', 82 } 83 84 dp_order = [ u'm', u'd', u'y' ] 85 86 # this will be added to re_consts later 87 units = { 'seconds': [ 'second', 'sec' ], 88 'minutes': [ 'minute', 'min' ], 89 'hours': [ 'hour', 'hr' ], 90 'days': [ 'day', 'dy' ], 91 'weeks': [ 'week', 'wk' ], 92 'months': [ 'month', 'mth' ], 93 'years': [ 'year', 'yr' ], 94 } 95 96 # text constants to be used by regex's later 97 re_consts = { 'specials': 'in|on|of|at', 98 'timeseperator': ':', 99 'rangeseperator': '-', 100 'daysuffix': 'rd|st|nd|th', 101 'meridian': 'am|pm|a.m.|p.m.|a|p', 102 'qunits': 'h|m|s|d|w|m|y', 103 'now': [ 'now' ], 104 } 105 106 # Used to adjust the returned date before/after the source 107 modifiers = { 'from': 1, 108 'before': -1, 109 'after': 1, 110 'ago': -1, 111 'prior': -1, 112 'prev': -1, 113 'last': -1, 114 'next': 1, 115 'this': 0, 116 'previous': -1, 117 'in a': 2, 118 'end of': 0, 119 'eod': 0, 120 'eo': 0 121 } 122 123 dayoffsets = { 'tomorrow': 1, 124 'today': 0, 125 'yesterday': -1, 126 } 127 128 # special day and/or times, i.e. lunch, noon, evening 129 # each element in the dictionary is a dictionary that is used 130 # to fill in any value to be replace - the current date/time will 131 # already have been populated by the method buildSources 132 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 133 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 134 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 135 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 136 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 137 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 138 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 139 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 140 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 141 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 142 }
143 144
145 -class pdtLocale_au:
146 """ 147 en_AU Locale constants 148 149 This class will be used to initialize L{Constants} if PyICU is not located. 150 151 Defined as class variables are the lists and strings needed by parsedatetime 152 to evaluate strings for Australia 153 """ 154 155 localeID = 'en_AU' # don't use a unicode string 156 dateSep = [ u'-', u'/' ] 157 timeSep = [ u':' ] 158 meridian = [ u'AM', u'PM' ] 159 usesMeridian = True 160 uses24 = False 161 162 Weekdays = [ u'monday', u'tuesday', u'wednesday', 163 u'thursday', u'friday', u'saturday', u'sunday', 164 ] 165 shortWeekdays = [ u'mon', u'tues', u'wed', 166 u'thu', u'fri', u'sat', u'sun', 167 ] 168 Months = [ u'january', u'february', u'march', 169 u'april', u'may', u'june', 170 u'july', u'august', u'september', 171 u'october', u'november', u'december', 172 ] 173 shortMonths = [ u'jan', u'feb', u'mar', 174 u'apr', u'may', u'jun', 175 u'jul', u'aug', u'sep', 176 u'oct', u'nov', u'dec', 177 ] 178 dateFormats = { 'full': 'EEEE, d MMMM yyyy', 179 'long': 'd MMMM yyyy', 180 'medium': 'dd/MM/yyyy', 181 'short': 'd/MM/yy', 182 } 183 timeFormats = { 'full': 'h:mm:ss a z', 184 'long': 'h:mm:ss a', 185 'medium': 'h:mm:ss a', 186 'short': 'h:mm a', 187 } 188 189 dp_order = [ u'd', u'm', u'y' ] 190 191 # this will be added to re_consts later 192 units = { 'seconds': [ 'second', 'sec' ], 193 'minutes': [ 'minute', 'min' ], 194 'hours': [ 'hour', 'hr' ], 195 'days': [ 'day', 'dy' ], 196 'weeks': [ 'week', 'wk' ], 197 'months': [ 'month', 'mth' ], 198 'years': [ 'year', 'yr' ], 199 } 200 201 # text constants to be used by regex's later 202 re_consts = { 'specials': 'in|on|of|at', 203 'timeseperator': ':', 204 'rangeseperator': '-', 205 'daysuffix': 'rd|st|nd|th', 206 'meridian': 'am|pm|a.m.|p.m.|a|p', 207 'qunits': 'h|m|s|d|w|m|y', 208 'now': [ 'now' ], 209 } 210 211 # Used to adjust the returned date before/after the source 212 modifiers = { 'from': 1, 213 'before': -1, 214 'after': 1, 215 'ago': 1, 216 'prior': -1, 217 'prev': -1, 218 'last': -1, 219 'next': 1, 220 'this': 0, 221 'previous': -1, 222 'in a': 2, 223 'end of': 0, 224 'eo': 0, 225 } 226 227 dayoffsets = { 'tomorrow': 1, 228 'today': 0, 229 'yesterday': -1, 230 } 231 232 # special day and/or times, i.e. lunch, noon, evening 233 # each element in the dictionary is a dictionary that is used 234 # to fill in any value to be replace - the current date/time will 235 # already have been populated by the method buildSources 236 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 237 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 238 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 239 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 240 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 241 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 242 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 243 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 244 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 245 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 246 }
247 248
249 -class pdtLocale_es:
250 """ 251 es Locale constants 252 253 This class will be used to initialize L{Constants} if PyICU is not located. 254 255 Defined as class variables are the lists and strings needed by parsedatetime 256 to evaluate strings in Spanish 257 258 Note that I don't speak Spanish so many of the items below are still in English 259 """ 260 261 localeID = 'es' # don't use a unicode string 262 dateSep = [ u'/' ] 263 timeSep = [ u':' ] 264 meridian = [] 265 usesMeridian = False 266 uses24 = True 267 268 Weekdays = [ u'lunes', u'martes', u'mi\xe9rcoles', 269 u'jueves', u'viernes', u's\xe1bado', u'domingo', 270 ] 271 shortWeekdays = [ u'lun', u'mar', u'mi\xe9', 272 u'jue', u'vie', u's\xe1b', u'dom', 273 ] 274 Months = [ u'enero', u'febrero', u'marzo', 275 u'abril', u'mayo', u'junio', 276 u'julio', u'agosto', u'septiembre', 277 u'octubre', u'noviembre', u'diciembre' 278 ] 279 shortMonths = [ u'ene', u'feb', u'mar', 280 u'abr', u'may', u'jun', 281 u'jul', u'ago', u'sep', 282 u'oct', u'nov', u'dic' 283 ] 284 dateFormats = { 'full': "EEEE d' de 'MMMM' de 'yyyy", 285 'long': "d' de 'MMMM' de 'yyyy", 286 'medium': "dd-MMM-yy", 287 'short': "d/MM/yy", 288 } 289 timeFormats = { 'full': "HH'H'mm' 'ss z", 290 'long': "HH:mm:ss z", 291 'medium': "HH:mm:ss", 292 'short': "HH:mm", 293 } 294 295 dp_order = [ u'd', u'm', u'y' ] 296 297 # this will be added to re_consts later 298 units = { 'seconds': [ 'second', 'sec' ], 299 'minutes': [ 'minute', 'min' ], 300 'hours': [ 'hour', 'hr' ], 301 'days': [ 'day', 'dy' ], 302 'weeks': [ 'week', 'wk' ], 303 'months': [ 'month', 'mth' ], 304 'years': [ 'year', 'yr' ], 305 } 306 307 # text constants to be used by regex's later 308 re_consts = { 'specials': 'in|on|of|at', 309 'timeseperator': timeSep, 310 'dateseperator': dateSep, 311 'rangeseperator': '-', 312 'daysuffix': 'rd|st|nd|th', 313 'qunits': 'h|m|s|d|w|m|y', 314 'now': [ 'now' ], 315 } 316 317 # Used to adjust the returned date before/after the source 318 modifiers = { 'from': 1, 319 'before': -1, 320 'after': 1, 321 'ago': 1, 322 'prior': -1, 323 'prev': -1, 324 'last': -1, 325 'next': 1, 326 'this': 0, 327 'previous': -1, 328 'in a': 2, 329 'end of': 0, 330 'eo': 0, 331 } 332 333 dayoffsets = { 'tomorrow': 1, 334 'today': 0, 335 'yesterday': -1, 336 } 337 338 # special day and/or times, i.e. lunch, noon, evening 339 # each element in the dictionary is a dictionary that is used 340 # to fill in any value to be replace - the current date/time will 341 # already have been populated by the method buildSources 342 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 343 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 344 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 345 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 346 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 347 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 348 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 349 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 350 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 351 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 352 }
353 354 355 pdtLocales = { 'en_US': pdtLocale_en, 356 'en_AU': pdtLocale_au, 357 'es': pdtLocale_es, 358 } 359 360
361 -def _initLocale(ptc):
362 """ 363 Helper function to initialize the different lists and strings 364 from either PyICU or one of the internal pdt Locales and store 365 them into ptc. 366 """ 367 368 def lcase(x): 369 return x.lower()
370 371 if pyicu and ptc.usePyICU: 372 ptc.icuLocale = None 373 374 if ptc.localeID is not None: 375 ptc.icuLocale = pyicu.Locale(ptc.localeID) 376 377 if ptc.icuLocale is None: 378 for id in range(0, len(ptc.fallbackLocales)): 379 ptc.localeID = ptc.fallbackLocales[id] 380 ptc.icuLocale = pyicu.Locale(ptc.localeID) 381 382 if ptc.icuLocale is not None: 383 break 384 385 ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale) 386 387 # grab ICU list of weekdays, skipping first entry which 388 # is always blank 389 wd = map(lcase, ptc.icuSymbols.getWeekdays()[1:]) 390 swd = map(lcase, ptc.icuSymbols.getShortWeekdays()[1:]) 391 392 # store them in our list with Monday first (ICU puts Sunday first) 393 ptc.Weekdays = wd[1:] + wd[0:1] 394 ptc.shortWeekdays = swd[1:] + swd[0:1] 395 ptc.Months = map(lcase, ptc.icuSymbols.getMonths()) 396 ptc.shortMonths = map(lcase, ptc.icuSymbols.getShortMonths()) 397 398 # not quite sure how to init this so for now 399 # set it to none so it will be set to the en_US defaults for now 400 ptc.re_consts = None 401 ptc.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 402 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 403 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 404 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 405 } 406 ptc.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 407 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 408 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 409 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 410 } 411 ptc.dateFormats = { 'full': ptc.icu_df['full'].toPattern(), 412 'long': ptc.icu_df['long'].toPattern(), 413 'medium': ptc.icu_df['medium'].toPattern(), 414 'short': ptc.icu_df['short'].toPattern(), 415 } 416 ptc.timeFormats = { 'full': ptc.icu_tf['full'].toPattern(), 417 'long': ptc.icu_tf['long'].toPattern(), 418 'medium': ptc.icu_tf['medium'].toPattern(), 419 'short': ptc.icu_tf['short'].toPattern(), 420 } 421 else: 422 if not ptc.localeID in pdtLocales: 423 for id in range(0, len(ptc.fallbackLocales)): 424 ptc.localeID = ptc.fallbackLocales[id] 425 426 if ptc.localeID in pdtLocales: 427 break 428 429 ptc.locale = pdtLocales[ptc.localeID] 430 ptc.usePyICU = False 431 432 ptc.Weekdays = ptc.locale.Weekdays 433 ptc.shortWeekdays = ptc.locale.shortWeekdays 434 ptc.Months = ptc.locale.Months 435 ptc.shortMonths = ptc.locale.shortMonths 436 ptc.dateFormats = ptc.locale.dateFormats 437 ptc.timeFormats = ptc.locale.timeFormats 438 439 # these values are used to setup the various bits 440 # of the regex values used to parse 441 # 442 # check if a local set of constants has been 443 # provided, if not use en_US as the default 444 if ptc.localeID in pdtLocales: 445 ptc.re_sources = pdtLocales[ptc.localeID].re_sources 446 ptc.re_values = pdtLocales[ptc.localeID].re_consts 447 448 units = pdtLocales[ptc.localeID].units 449 450 ptc.Modifiers = pdtLocales[ptc.localeID].modifiers 451 ptc.dayOffsets = pdtLocales[ptc.localeID].dayoffsets 452 453 # for now, pull over any missing keys from the US set 454 for key in pdtLocales['en_US'].re_consts: 455 if not key in ptc.re_values: 456 ptc.re_values[key] = pdtLocales['en_US'].re_consts[key] 457 else: 458 ptc.re_sources = pdtLocales['en_US'].re_sources 459 ptc.re_values = pdtLocales['en_US'].re_consts 460 ptc.Modifiers = pdtLocales['en_US'].modifiers 461 ptc.dayOffsets = pdtLocales['en_US'].dayoffsets 462 units = pdtLocales['en_US'].units 463 464 ptc.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.Months) 465 ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.shortMonths) 466 ptc.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.Weekdays) 467 ptc.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.shortWeekdays) 468 469 l = [] 470 for unit in units: 471 l.append('|'.join(units[unit])) 472 473 ptc.re_values['units'] = '|'.join(l) 474 ptc.Units = ptc.re_values['units'].split('|') 475 476
477 -def _initSymbols(ptc):
478 """ 479 Helper function to initialize the single character constants 480 and other symbols needed. 481 """ 482 ptc.timeSep = [ u':' ] 483 ptc.dateSep = [ u'/' ] 484 ptc.meridian = [ u'AM', u'PM' ] 485 486 ptc.usesMeridian = True 487 ptc.uses24 = False 488 489 if pyicu and ptc.usePyICU: 490 am = u'' 491 pm = u'' 492 ts = '' 493 494 # ICU doesn't seem to provide directly the 495 # date or time seperator - so we have to 496 # figure it out 497 o = ptc.icu_tf['short'] 498 s = ptc.timeFormats['short'] 499 500 ptc.usesMeridian = u'a' in s 501 ptc.uses24 = u'H' in s 502 503 # '11:45 AM' or '11:45' 504 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 505 506 # ': AM' or ':' 507 s = s.replace('11', '').replace('45', '') 508 509 if len(s) > 0: 510 ts = s[0] 511 512 if ptc.usesMeridian: 513 # '23:45 AM' or '23:45' 514 am = s[1:].strip() 515 s = o.format(datetime.datetime(2003, 10, 30, 23, 45)) 516 517 if ptc.uses24: 518 s = s.replace('23', '') 519 else: 520 s = s.replace('11', '') 521 522 # 'PM' or '' 523 pm = s.replace('45', '').replace(ts, '').strip() 524 525 ptc.timeSep = [ ts ] 526 ptc.meridian = [ am, pm ] 527 528 o = ptc.icu_df['short'] 529 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 530 s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '') 531 532 if len(s) > 0: 533 ds = s[0] 534 else: 535 ds = '/' 536 537 ptc.dateSep = [ ds ] 538 s = ptc.dateFormats['short'] 539 l = s.lower().split(ds) 540 dp_order = [] 541 542 for s in l: 543 if len(s) > 0: 544 dp_order.append(s[:1]) 545 546 ptc.dp_order = dp_order 547 else: 548 ptc.timeSep = ptc.locale.timeSep 549 ptc.dateSep = ptc.locale.dateSep 550 ptc.meridian = ptc.locale.meridian 551 ptc.usesMeridian = ptc.locale.usesMeridian 552 ptc.uses24 = ptc.locale.uses24 553 ptc.dp_order = ptc.locale.dp_order 554 555 # build am and pm lists to contain 556 # original case, lowercase and first-char 557 # versions of the meridian text 558 559 if len(ptc.meridian) > 0: 560 am = ptc.meridian[0] 561 ptc.am = [ am ] 562 563 if len(am) > 0: 564 ptc.am.append(am[0]) 565 am = am.lower() 566 ptc.am.append(am) 567 ptc.am.append(am[0]) 568 else: 569 am = '' 570 ptc.am = [ '', '' ] 571 572 if len(ptc.meridian) > 1: 573 pm = ptc.meridian[1] 574 ptc.pm = [ pm ] 575 576 if len(pm) > 0: 577 ptc.pm.append(pm[0]) 578 pm = pm.lower() 579 ptc.pm.append(pm) 580 ptc.pm.append(pm[0]) 581 else: 582 pm = '' 583 ptc.pm = [ '', '' ]
584 585
586 -def _initPatterns(ptc):
587 """ 588 Helper function to take the different localized bits from ptc and 589 create the regex strings. 590 """ 591 # TODO add code to parse the date formats and build the regexes up from sub-parts 592 # TODO find all hard-coded uses of date/time seperators 593 594 ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s? 595 ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)? 596 (,\s?(?P<year>\d\d(\d\d)?))?)) 597 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 598 ptc.RE_MONTH = r'''(\s?|^) 599 (?P<month>( 600 (?P<mthname>(%(months)s|%(shortmonths)s)) 601 (\s?(?P<year>(\d\d\d\d)))? 602 )) 603 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 604 ptc.RE_WEEKDAY = r'''(\s?|^) 605 (?P<weekday>(%(days)s|%(shortdays)s)) 606 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 607 608 ptc.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values 609 ptc.RE_UNITS = r'''(?P<qty>(-?\d+\s* 610 (?P<units>((%(units)s)s?)) 611 ))''' % ptc.re_values 612 ptc.RE_QUNITS = r'''(?P<qty>(-?\d+\s? 613 (?P<qunits>%(qunits)s) 614 (\s?|,|$) 615 ))''' % ptc.re_values 616 ptc.RE_MODIFIER = r'''(\s?|^) 617 (?P<modifier> 618 (previous|prev|last|next|this|eod|eo|(end\sof)|(in\sa)))''' % ptc.re_values 619 ptc.RE_MODIFIER2 = r'''(\s?|^) 620 (?P<modifier> 621 (from|before|after|ago|prior)) 622 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 623 ptc.RE_TIMEHMS = r'''(\s?|^) 624 (?P<hours>\d\d?) 625 (?P<tsep>%(timeseperator)s|) 626 (?P<minutes>\d\d) 627 (?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % ptc.re_values 628 ptc.RE_TIMEHMS2 = r'''(?P<hours>(\d\d?)) 629 ((?P<tsep>%(timeseperator)s|) 630 (?P<minutes>(\d\d?)) 631 (?:(?P=tsep) 632 (?P<seconds>\d\d? 633 (?:[.,]\d+)?))?)?''' % ptc.re_values 634 635 if 'meridian' in ptc.re_values: 636 ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values 637 638 dateSeps = ''.join(ptc.dateSep) + '.' 639 640 ptc.RE_DATE = r'''(\s?|^) 641 (?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?)) 642 (\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps) 643 ptc.RE_DATE2 = r'[%s]' % dateSeps 644 ptc.RE_DAY = r'''(\s?|^) 645 (?P<day>(today|tomorrow|yesterday)) 646 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 647 ptc.RE_TIME = r'''(\s?|^) 648 (?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now)) 649 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 650 ptc.RE_REMAINING = r'\s+' 651 652 # Regex for date/time ranges 653 ptc.RE_RTIMEHMS = r'''(\s?|^) 654 (\d\d?)%(timeseperator)s 655 (\d\d) 656 (%(timeseperator)s(\d\d))? 657 (\s?|$)''' % ptc.re_values 658 ptc.RE_RTIMEHMS2 = r'''(\s?|^) 659 (\d\d?) 660 (%(timeseperator)s(\d\d?))? 661 (%(timeseperator)s(\d\d?))?''' % ptc.re_values 662 663 if 'meridian' in ptc.re_values: 664 ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values 665 666 ptc.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 667 ptc.RE_RDATE3 = r'''((((%(months)s))\s? 668 ((\d\d?) 669 (\s?|%(daysuffix)s|$)+)? 670 (,\s?\d\d\d\d)?))''' % ptc.re_values 671 672 # "06/07/06 - 08/09/06" 673 ptc.DATERNG1 = ptc.RE_RDATE + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE 674 ptc.DATERNG1 = ptc.DATERNG1 % ptc.re_values 675 676 # "march 31 - june 1st, 2006" 677 ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE3 678 ptc.DATERNG2 = ptc.DATERNG2 % ptc.re_values 679 680 # "march 1rd -13th" 681 ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?' 682 ptc.DATERNG3 = ptc.DATERNG3 % ptc.re_values 683 684 # "4:00:55 pm - 5:90:44 am", '4p-5p' 685 ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 686 ptc.TIMERNG1 = ptc.TIMERNG1 % ptc.re_values 687 688 # "4:00 - 5:90 ", "4:55:55-3:44:55" 689 ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS 690 ptc.TIMERNG2 = ptc.TIMERNG2 % ptc.re_values 691 692 # "4-5pm " 693 ptc.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 694 ptc.TIMERNG3 = ptc.TIMERNG3 % ptc.re_values
695 696
697 -def _initConstants(ptc):
698 """ 699 Create localized versions of the units, week and month names 700 """ 701 # build weekday offsets - yes, it assumes the Weekday and shortWeekday 702 # lists are in the same order and Mon..Sun (Python style) 703 ptc.WeekdayOffsets = {} 704 705 o = 0 706 for key in ptc.Weekdays: 707 ptc.WeekdayOffsets[key] = o 708 o += 1 709 o = 0 710 for key in ptc.shortWeekdays: 711 ptc.WeekdayOffsets[key] = o 712 o += 1 713 714 # build month offsets - yes, it assumes the Months and shortMonths 715 # lists are in the same order and Jan..Dec 716 ptc.MonthOffsets = {} 717 ptc.DaysInMonth = {} 718 719 o = 1 720 for key in ptc.Months: 721 ptc.MonthOffsets[key] = o 722 ptc.DaysInMonth[key] = ptc.DaysInMonthList[o - 1] 723 o += 1 724 o = 1 725 for key in ptc.shortMonths: 726 ptc.MonthOffsets[key] = o 727 ptc.DaysInMonth[key] = ptc.DaysInMonthList[o - 1] 728 o += 1
729 730
731 -class Constants:
732 """ 733 Default set of constants for parsedatetime. 734 735 If PyICU is present, then the class will first try to get PyICU 736 to return a locale specified by C{localeID}. If either C{localeID} is 737 None or if the locale does not exist within PyICU, then each of the 738 locales defined in C{fallbackLocales} is tried in order. 739 740 If PyICU is not present or none of the specified locales can be used, 741 then the class will initialize itself to the en_US locale. 742 743 if PyICU is not present or not requested, only the locales defined by 744 C{pdtLocales} will be searched. 745 """
746 - def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']):
747 self.localeID = localeID 748 self.fallbackLocales = fallbackLocales 749 750 if 'en_US' not in self.fallbackLocales: 751 self.fallbackLocales.append('en_US') 752 753 # define non-locale specific constants 754 755 self.locale = None 756 self.usePyICU = usePyICU 757 758 self.Second = 1 759 self.Minute = 60 * self.Second 760 self.Hour = 60 * self.Minute 761 self.Day = 24 * self.Hour 762 self.Week = 7 * self.Day 763 self.Month = 30 * self.Day 764 self.Year = 365 * self.Day 765 766 self.rangeSep = u'-' 767 768 self.DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) 769 770 self.BirthdayEpoch = 50 771 772 # DOWParseStyle controls how we parse "Tuesday" 773 # If the current day was Thursday and the text to parse is "Tuesday" 774 # then the following table shows how each style would be returned 775 # -1, 0, +1 776 # 777 # Current day marked as *** 778 # 779 # Sun Mon Tue Wed Thu Fri Sat 780 # week -1 781 # current -1,0 *** 782 # week +1 +1 783 # 784 # If the current day was Monday and the text to parse is "Tuesday" 785 # then the following table shows how each style would be returned 786 # -1, 0, +1 787 # 788 # Sun Mon Tue Wed Thu Fri Sat 789 # week -1 -1 790 # current *** 0,+1 791 # week +1 792 793 self.DOWParseStyle = 1 794 795 # CurrentDOWParseStyle controls how we parse "Friday" 796 # If the current day was Friday and the text to parse is "Friday" 797 # then the following table shows how each style would be returned 798 # True/False. This also depends on DOWParseStyle. 799 # 800 # Current day marked as *** 801 # 802 # DOWParseStyle = 0 803 # Sun Mon Tue Wed Thu Fri Sat 804 # week -1 805 # current T,F 806 # week +1 807 # 808 # DOWParseStyle = -1 809 # Sun Mon Tue Wed Thu Fri Sat 810 # week -1 F 811 # current T 812 # week +1 813 # 814 # DOWParseStyle = +1 815 # 816 # Sun Mon Tue Wed Thu Fri Sat 817 # week -1 818 # current T 819 # week +1 F 820 821 self.CurrentDOWParseStyle = False 822 823 # initalize attributes to empty values to ensure 824 # they are defined 825 self.re_sources = None 826 self.re_values = None 827 self.Modifiers = None 828 self.dayOffsets = None 829 self.WeekdayOffsets = None 830 self.MonthOffsets = None 831 self.DaysInMonth = None 832 self.dateSep = None 833 self.timeSep = None 834 self.am = None 835 self.pm = None 836 self.meridian = None 837 self.usesMeridian = None 838 self.uses24 = None 839 self.dp_order = None 840 841 self.RE_DATE3 = r'' 842 self.RE_MONTH = r'' 843 self.RE_WEEKDAY = r'' 844 self.RE_SPECIAL = r'' 845 self.RE_UNITS = r'' 846 self.RE_QUNITS = r'' 847 self.RE_MODIFIER = r'' 848 self.RE_MODIFIER2 = r'' 849 self.RE_TIMEHMS = r'' 850 self.RE_TIMEHMS2 = r'' 851 self.RE_DATE = r'' 852 self.RE_DATE2 = r'' 853 self.RE_DAY = r'' 854 self.RE_TIME = r'' 855 self.RE_REMAINING = r'' 856 self.RE_RTIMEHMS = r'' 857 self.RE_RTIMEHMS2 = r'' 858 self.RE_RDATE = r'' 859 self.RE_RDATE3 = r'' 860 self.DATERNG1 = r'' 861 self.DATERNG2 = r'' 862 self.DATERNG3 = r'' 863 self.TIMERNG1 = r'' 864 self.TIMERNG2 = r'' 865 self.TIMERNG3 = r'' 866 867 _initLocale(self) 868 _initConstants(self) 869 _initSymbols(self) 870 _initPatterns(self) 871 872 self.re_option = re.IGNORECASE + re.VERBOSE 873 self.cre_source = { 'CRE_SPECIAL': self.RE_SPECIAL, 874 'CRE_UNITS': self.RE_UNITS, 875 'CRE_QUNITS': self.RE_QUNITS, 876 'CRE_MODIFIER': self.RE_MODIFIER, 877 'CRE_MODIFIER2': self.RE_MODIFIER2, 878 'CRE_TIMEHMS': self.RE_TIMEHMS, 879 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 880 'CRE_DATE': self.RE_DATE, 881 'CRE_DATE2': self.RE_DATE2, 882 'CRE_DATE3': self.RE_DATE3, 883 'CRE_MONTH': self.RE_MONTH, 884 'CRE_WEEKDAY': self.RE_WEEKDAY, 885 'CRE_DAY': self.RE_DAY, 886 'CRE_TIME': self.RE_TIME, 887 'CRE_REMAINING': self.RE_REMAINING, 888 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 889 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 890 'CRE_RDATE': self.RE_RDATE, 891 'CRE_RDATE3': self.RE_RDATE3, 892 'CRE_TIMERNG1': self.TIMERNG1, 893 'CRE_TIMERNG2': self.TIMERNG2, 894 'CRE_TIMERNG3': self.TIMERNG3, 895 'CRE_DATERNG1': self.DATERNG1, 896 'CRE_DATERNG2': self.DATERNG2, 897 'CRE_DATERNG3': self.DATERNG3, 898 } 899 self.cre_keys = self.cre_source.keys()
900 901
902 - def __getattr__(self, name):
903 if name in self.cre_keys: 904 value = re.compile(self.cre_source[name], self.re_option) 905 setattr(self, name, value) 906 return value 907 else: 908 raise AttributeError, name
909 910
911 - def buildSources(self, sourceTime=None):
912 """ 913 Return a dictionary of date/time tuples based on the keys 914 found in self.re_sources. 915 916 The current time is used as the default and any specified 917 item found in self.re_sources is inserted into the value 918 and the generated dictionary is returned. 919 """ 920 if sourceTime is None: 921 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 922 else: 923 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 924 925 sources = {} 926 defaults = { 'yr': yr, 'mth': mth, 'dy': dy, 927 'hr': hr, 'mn': mn, 'sec': sec, } 928 929 for item in self.re_sources: 930 values = {} 931 source = self.re_sources[item] 932 933 for key in defaults.keys(): 934 if key in source: 935 values[key] = source[key] 936 else: 937 values[key] = defaults[key] 938 939 sources[item] = ( values['yr'], values['mth'], values['dy'], 940 values['hr'], values['mn'], values['sec'], wd, yd, isdst ) 941 942 return sources
943