Package parsedatetime :: Module parsedatetime_consts
[hide private]
[frames] | no frames]

Source Code for Module parsedatetime.parsedatetime_consts

   1  #!/usr/bin/env python 
   2   
   3  """ 
   4  parsedatetime constants and helper functions to determine 
   5  regex values from Locale information if present. 
   6   
   7  Also contains the internal Locale classes to give some sane 
   8  defaults if PyICU is not found. 
   9  """ 
  10   
  11  __license__ = """ 
  12  Copyright (c) 2004-2008 Mike Taylor 
  13  Copyright (c) 2006-2008 Darshana Chhajed 
  14  Copyright (c)      2007 Bernd Zeimetz <bzed@debian.org> 
  15  All rights reserved. 
  16   
  17  Licensed under the Apache License, Version 2.0 (the "License"); 
  18  you may not use this file except in compliance with the License. 
  19  You may obtain a copy of the License at 
  20   
  21     http://www.apache.org/licenses/LICENSE-2.0 
  22   
  23  Unless required by applicable law or agreed to in writing, software 
  24  distributed under the License is distributed on an "AS IS" BASIS, 
  25  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  26  See the License for the specific language governing permissions and 
  27  limitations under the License. 
  28  """ 
  29   
  30  try: 
  31      import PyICU as pyicu 
  32  except: 
  33      pyicu = None 
  34   
  35   
  36  import datetime 
  37  import calendar 
  38  import time 
  39  import re 
  40   
  41   
42 -class pdtLocale_en:
43 """ 44 en_US Locale constants 45 46 This class will be used to initialize L{Constants} if PyICU is not located. 47 48 Defined as class variables are the lists and strings needed by parsedatetime 49 to evaluate strings for USA 50 """ 51 52 localeID = 'en_US' # don't use a unicode string 53 dateSep = [ u'/', u'.' ] 54 timeSep = [ u':' ] 55 meridian = [ u'AM', u'PM' ] 56 usesMeridian = True 57 uses24 = False 58 59 Weekdays = [ u'monday', u'tuesday', u'wednesday', 60 u'thursday', u'friday', u'saturday', u'sunday', 61 ] 62 shortWeekdays = [ u'mon', u'tues', u'wed', 63 u'thu', u'fri', u'sat', u'sun', 64 ] 65 Months = [ u'january', u'february', u'march', 66 u'april', u'may', u'june', 67 u'july', u'august', u'september', 68 u'october', u'november', u'december', 69 ] 70 shortMonths = [ u'jan', u'feb', u'mar', 71 u'apr', u'may', u'jun', 72 u'jul', u'aug', u'sep', 73 u'oct', u'nov', u'dec', 74 ] 75 dateFormats = { 'full': 'EEEE, MMMM d, yyyy', 76 'long': 'MMMM d, yyyy', 77 'medium': 'MMM d, yyyy', 78 'short': 'M/d/yy', 79 } 80 timeFormats = { 'full': 'h:mm:ss a z', 81 'long': 'h:mm:ss a z', 82 'medium': 'h:mm:ss a', 83 'short': 'h:mm a', 84 } 85 86 dp_order = [ u'm', u'd', u'y' ] 87 88 # this will be added to re_consts later 89 units = { 'seconds': [ 'second', 'sec' ], 90 'minutes': [ 'minute', 'min' ], 91 'hours': [ 'hour', 'hr' ], 92 'days': [ 'day', 'dy' ], 93 'weeks': [ 'week', 'wk' ], 94 'months': [ 'month', 'mth' ], 95 'years': [ 'year', 'yr' ], 96 } 97 98 # text constants to be used by regex's later 99 re_consts = { 'specials': 'in|on|of|at', 100 'timeseperator': ':', 101 'rangeseperator': '-', 102 'daysuffix': 'rd|st|nd|th', 103 'meridian': 'am|pm|a.m.|p.m.|a|p', 104 'qunits': 'h|m|s|d|w|m|y', 105 'now': [ 'now' ], 106 } 107 108 # Used to adjust the returned date before/after the source 109 modifiers = { 'from': 1, 110 'before': -1, 111 'after': 1, 112 'ago': -1, 113 'prior': -1, 114 'prev': -1, 115 'last': -1, 116 'next': 1, 117 'previous': -1, 118 'in a': 2, 119 'end of': 0, 120 'eod': 0, 121 'eo': 0 122 } 123 124 dayoffsets = { 'tomorrow': 1, 125 'today': 0, 126 'yesterday': -1, 127 } 128 129 # special day and/or times, i.e. lunch, noon, evening 130 # each element in the dictionary is a dictionary that is used 131 # to fill in any value to be replace - the current date/time will 132 # already have been populated by the method buildSources 133 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 134 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 135 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 136 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 137 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 138 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 139 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 140 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 141 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 142 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 143 }
144 145
146 -class pdtLocale_au:
147 """ 148 en_AU Locale constants 149 150 This class will be used to initialize L{Constants} if PyICU is not located. 151 152 Defined as class variables are the lists and strings needed by parsedatetime 153 to evaluate strings for Australia 154 """ 155 156 localeID = 'en_AU' # don't use a unicode string 157 dateSep = [ u'-', u'/' ] 158 timeSep = [ u':' ] 159 meridian = [ u'AM', u'PM' ] 160 usesMeridian = True 161 uses24 = False 162 163 Weekdays = [ u'monday', u'tuesday', u'wednesday', 164 u'thursday', u'friday', u'saturday', u'sunday', 165 ] 166 shortWeekdays = [ u'mon', u'tues', u'wed', 167 u'thu', u'fri', u'sat', u'sun', 168 ] 169 Months = [ u'january', u'february', u'march', 170 u'april', u'may', u'june', 171 u'july', u'august', u'september', 172 u'october', u'november', u'december', 173 ] 174 shortMonths = [ u'jan', u'feb', u'mar', 175 u'apr', u'may', u'jun', 176 u'jul', u'aug', u'sep', 177 u'oct', u'nov', u'dec', 178 ] 179 dateFormats = { 'full': 'EEEE, d MMMM yyyy', 180 'long': 'd MMMM yyyy', 181 'medium': 'dd/MM/yyyy', 182 'short': 'd/MM/yy', 183 } 184 timeFormats = { 'full': 'h:mm:ss a z', 185 'long': 'h:mm:ss a', 186 'medium': 'h:mm:ss a', 187 'short': 'h:mm a', 188 } 189 190 dp_order = [ u'd', u'm', u'y' ] 191 192 # this will be added to re_consts later 193 units = { 'seconds': [ 'second', 'sec' ], 194 'minutes': [ 'minute', 'min' ], 195 'hours': [ 'hour', 'hr' ], 196 'days': [ 'day', 'dy' ], 197 'weeks': [ 'week', 'wk' ], 198 'months': [ 'month', 'mth' ], 199 'years': [ 'year', 'yr' ], 200 } 201 202 # text constants to be used by regex's later 203 re_consts = { 'specials': 'in|on|of|at', 204 'timeseperator': ':', 205 'rangeseperator': '-', 206 'daysuffix': 'rd|st|nd|th', 207 'meridian': 'am|pm|a.m.|p.m.|a|p', 208 'qunits': 'h|m|s|d|w|m|y', 209 'now': [ 'now' ], 210 } 211 212 # Used to adjust the returned date before/after the source 213 modifiers = { 'from': 1, 214 'before': -1, 215 'after': 1, 216 'ago': 1, 217 'prior': -1, 218 'prev': -1, 219 'last': -1, 220 'next': 1, 221 'previous': -1, 222 'in a': 2, 223 'end of': 0, 224 'eo': 0, 225 } 226 227 dayoffsets = { 'tomorrow': 1, 228 'today': 0, 229 'yesterday': -1, 230 } 231 232 # special day and/or times, i.e. lunch, noon, evening 233 # each element in the dictionary is a dictionary that is used 234 # to fill in any value to be replace - the current date/time will 235 # already have been populated by the method buildSources 236 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 237 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 238 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 239 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 240 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 241 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 242 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 243 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 244 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 245 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 246 }
247 248
249 -class pdtLocale_es:
250 """ 251 es Locale constants 252 253 This class will be used to initialize L{Constants} if PyICU is not located. 254 255 Defined as class variables are the lists and strings needed by parsedatetime 256 to evaluate strings in Spanish 257 258 Note that I don't speak Spanish so many of the items below are still in English 259 """ 260 261 localeID = 'es' # don't use a unicode string 262 dateSep = [ u'/' ] 263 timeSep = [ u':' ] 264 meridian = [] 265 usesMeridian = False 266 uses24 = True 267 268 Weekdays = [ u'lunes', u'martes', u'mi\xe9rcoles', 269 u'jueves', u'viernes', u's\xe1bado', u'domingo', 270 ] 271 shortWeekdays = [ u'lun', u'mar', u'mi\xe9', 272 u'jue', u'vie', u's\xe1b', u'dom', 273 ] 274 Months = [ u'enero', u'febrero', u'marzo', 275 u'abril', u'mayo', u'junio', 276 u'julio', u'agosto', u'septiembre', 277 u'octubre', u'noviembre', u'diciembre' 278 ] 279 shortMonths = [ u'ene', u'feb', u'mar', 280 u'abr', u'may', u'jun', 281 u'jul', u'ago', u'sep', 282 u'oct', u'nov', u'dic' 283 ] 284 dateFormats = { 'full': "EEEE d' de 'MMMM' de 'yyyy", 285 'long': "d' de 'MMMM' de 'yyyy", 286 'medium': "dd-MMM-yy", 287 'short': "d/MM/yy", 288 } 289 timeFormats = { 'full': "HH'H'mm' 'ss z", 290 'long': "HH:mm:ss z", 291 'medium': "HH:mm:ss", 292 'short': "HH:mm", 293 } 294 295 dp_order = [ u'd', u'm', u'y' ] 296 297 # this will be added to re_consts later 298 units = { 'seconds': [ 'second', 'sec' ], 299 'minutes': [ 'minute', 'min' ], 300 'hours': [ 'hour', 'hr' ], 301 'days': [ 'day', 'dy' ], 302 'weeks': [ 'week', 'wk' ], 303 'months': [ 'month', 'mth' ], 304 'years': [ 'year', 'yr' ], 305 } 306 307 # text constants to be used by regex's later 308 re_consts = { 'specials': 'in|on|of|at', 309 'timeseperator': timeSep, 310 'dateseperator': dateSep, 311 'rangeseperator': '-', 312 'daysuffix': 'rd|st|nd|th', 313 'qunits': 'h|m|s|d|w|m|y', 314 'now': [ 'now' ], 315 } 316 317 # Used to adjust the returned date before/after the source 318 modifiers = { 'from': 1, 319 'before': -1, 320 'after': 1, 321 'ago': 1, 322 'prior': -1, 323 'prev': -1, 324 'last': -1, 325 'next': 1, 326 'previous': -1, 327 'in a': 2, 328 'end of': 0, 329 'eo': 0, 330 } 331 332 dayoffsets = { 'tomorrow': 1, 333 'today': 0, 334 'yesterday': -1, 335 } 336 337 # special day and/or times, i.e. lunch, noon, evening 338 # each element in the dictionary is a dictionary that is used 339 # to fill in any value to be replace - the current date/time will 340 # already have been populated by the method buildSources 341 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 342 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 343 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 344 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 345 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 346 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 347 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 348 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 349 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 350 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 }, 351 }
352 353
354 -class pdtLocale_de:
355 """ 356 de_DE Locale constants 357 358 This class will be used to initialize L{Constants} if PyICU is not located. 359 360 Contributed by Debian parsedatetime package maintainer Bernd Zeimetz <bzed@debian.org> 361 362 Defined as class variables are the lists and strings needed by parsedatetime 363 to evaluate strings for German 364 """ 365 366 localeID = 'de_DE' # don't use a unicode string 367 dateSep = [ u'.' ] 368 timeSep = [ u':' ] 369 meridian = [ ] 370 usesMeridian = False 371 uses24 = True 372 373 Weekdays = [ u'montag', u'dienstag', u'mittwoch', 374 u'donnerstag', u'freitag', u'samstag', u'sonntag', 375 ] 376 shortWeekdays = [ u'mo', u'di', u'mi', 377 u'do', u'fr', u'sa', u'so', 378 ] 379 Months = [ u'januar', u'februar', u'm\xe4rz', 380 u'april', u'mai', u'juni', 381 u'juli', u'august', u'september', 382 u'oktober', u'november', u'dezember', 383 ] 384 shortMonths = [ u'jan', u'feb', u'mrz', 385 u'apr', u'mai', u'jun', 386 u'jul', u'aug', u'sep', 387 u'okt', u'nov', u'dez', 388 ] 389 dateFormats = { 'full': u'EEEE, d. MMMM yyyy', 390 'long': u'd. MMMM yyyy', 391 'medium': u'dd.MM.yyyy', 392 'short': u'dd.MM.yy' 393 } 394 395 timeFormats = { 'full': u'HH:mm:ss v', 396 'long': u'HH:mm:ss z', 397 'medium': u'HH:mm:ss', 398 'short': u'HH:mm' 399 } 400 401 dp_order = [ u'd', u'm', u'y' ] 402 403 # this will be added to re_consts later 404 units = { 'seconds': [ 'sekunden', 'sek', 's' ], 405 'minutes': [ 'minuten', 'min' , 'm' ], 406 'hours': [ 'stunden', 'std', 'h' ], 407 'days': [ 'tage', 't' ], 408 'weeks': [ 'wochen', 'w' ], 409 'months': [ 'monate' ], #the short version would be a capital M, 410 #as I understand it we can't distinguis 411 #between m for minutes and M for months. 412 'years': [ 'jahre', 'j' ], 413 } 414 415 # text constants to be used by regex's later 416 re_consts = { 'specials': 'am|dem|der|im|in|den|zum', 417 'timeseperator': ':', 418 'rangeseperator': '-', 419 'daysuffix': '', 420 'qunits': 'h|m|s|t|w|m|j', 421 'now': [ 'jetzt' ], 422 } 423 424 # Used to adjust the returned date before/after the source 425 #still looking for insight on how to translate all of them to german. 426 modifiers = { u'from': 1, 427 u'before': -1, 428 u'after': 1, 429 u'vergangener': -1, 430 u'vorheriger': -1, 431 u'prev': -1, 432 u'letzter': -1, 433 u'n\xe4chster': 1, 434 u'dieser': 0, 435 u'previous': -1, 436 u'in a': 2, 437 u'end of': 0, 438 u'eod': 0, 439 u'eo': 0, 440 } 441 442 #morgen/abermorgen does not work, see http://code.google.com/p/parsedatetime/issues/detail?id=19 443 dayoffsets = { u'morgen': 1, 444 u'heute': 0, 445 u'gestern': -1, 446 u'vorgestern': -2, 447 u'\xfcbermorgen': 2, 448 } 449 450 # special day and/or times, i.e. lunch, noon, evening 451 # each element in the dictionary is a dictionary that is used 452 # to fill in any value to be replace - the current date/time will 453 # already have been populated by the method buildSources 454 re_sources = { u'mittag': { 'hr': 12, 'mn': 0, 'sec': 0 }, 455 u'mittags': { 'hr': 12, 'mn': 0, 'sec': 0 }, 456 u'mittagessen': { 'hr': 12, 'mn': 0, 'sec': 0 }, 457 u'morgen': { 'hr': 6, 'mn': 0, 'sec': 0 }, 458 u'morgens': { 'hr': 6, 'mn': 0, 'sec': 0 }, 459 u'fr\e4hst\xe4ck': { 'hr': 8, 'mn': 0, 'sec': 0 }, 460 u'abendessen': { 'hr': 19, 'mn': 0, 'sec': 0 }, 461 u'abend': { 'hr': 18, 'mn': 0, 'sec': 0 }, 462 u'abends': { 'hr': 18, 'mn': 0, 'sec': 0 }, 463 u'mitternacht': { 'hr': 0, 'mn': 0, 'sec': 0 }, 464 u'nacht': { 'hr': 21, 'mn': 0, 'sec': 0 }, 465 u'nachts': { 'hr': 21, 'mn': 0, 'sec': 0 }, 466 u'heute abend': { 'hr': 21, 'mn': 0, 'sec': 0 }, 467 u'heute nacht': { 'hr': 21, 'mn': 0, 'sec': 0 }, 468 u'feierabend': { 'hr': 17, 'mn': 0, 'sec': 0 }, 469 }
470 471 472 pdtLocales = { 'en_US': pdtLocale_en, 473 'en_AU': pdtLocale_au, 474 'es_ES': pdtLocale_es, 475 'de_DE': pdtLocale_de, 476 } 477 478
479 -def _initLocale(ptc):
480 """ 481 Helper function to initialize the different lists and strings 482 from either PyICU or one of the internal pdt Locales and store 483 them into ptc. 484 """ 485 486 def lcase(x): 487 return x.lower()
488 489 if pyicu and ptc.usePyICU: 490 ptc.icuLocale = None 491 492 if ptc.localeID is not None: 493 ptc.icuLocale = pyicu.Locale(ptc.localeID) 494 495 if ptc.icuLocale is None: 496 for id in range(0, len(ptc.fallbackLocales)): 497 ptc.localeID = ptc.fallbackLocales[id] 498 ptc.icuLocale = pyicu.Locale(ptc.localeID) 499 500 if ptc.icuLocale is not None: 501 break 502 503 ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale) 504 505 # grab ICU list of weekdays, skipping first entry which 506 # is always blank 507 wd = map(lcase, ptc.icuSymbols.getWeekdays()[1:]) 508 swd = map(lcase, ptc.icuSymbols.getShortWeekdays()[1:]) 509 510 # store them in our list with Monday first (ICU puts Sunday first) 511 ptc.Weekdays = wd[1:] + wd[0:1] 512 ptc.shortWeekdays = swd[1:] + swd[0:1] 513 ptc.Months = map(lcase, ptc.icuSymbols.getMonths()) 514 ptc.shortMonths = map(lcase, ptc.icuSymbols.getShortMonths()) 515 516 # not quite sure how to init this so for now 517 # set it to none so it will be set to the en_US defaults for now 518 ptc.re_consts = None 519 ptc.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 520 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 521 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 522 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 523 } 524 ptc.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 525 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 526 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 527 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 528 } 529 ptc.dateFormats = { 'full': ptc.icu_df['full'].toPattern(), 530 'long': ptc.icu_df['long'].toPattern(), 531 'medium': ptc.icu_df['medium'].toPattern(), 532 'short': ptc.icu_df['short'].toPattern(), 533 } 534 ptc.timeFormats = { 'full': ptc.icu_tf['full'].toPattern(), 535 'long': ptc.icu_tf['long'].toPattern(), 536 'medium': ptc.icu_tf['medium'].toPattern(), 537 'short': ptc.icu_tf['short'].toPattern(), 538 } 539 else: 540 if not ptc.localeID in pdtLocales: 541 for id in range(0, len(ptc.fallbackLocales)): 542 ptc.localeID = ptc.fallbackLocales[id] 543 544 if ptc.localeID in pdtLocales: 545 break 546 547 ptc.locale = pdtLocales[ptc.localeID] 548 ptc.usePyICU = False 549 550 ptc.Weekdays = ptc.locale.Weekdays 551 ptc.shortWeekdays = ptc.locale.shortWeekdays 552 ptc.Months = ptc.locale.Months 553 ptc.shortMonths = ptc.locale.shortMonths 554 ptc.dateFormats = ptc.locale.dateFormats 555 ptc.timeFormats = ptc.locale.timeFormats 556 557 # these values are used to setup the various bits 558 # of the regex values used to parse 559 # 560 # check if a local set of constants has been 561 # provided, if not use en_US as the default 562 if ptc.localeID in pdtLocales: 563 ptc.re_sources = pdtLocales[ptc.localeID].re_sources 564 ptc.re_values = pdtLocales[ptc.localeID].re_consts 565 566 units = pdtLocales[ptc.localeID].units 567 568 ptc.Modifiers = pdtLocales[ptc.localeID].modifiers 569 ptc.dayOffsets = pdtLocales[ptc.localeID].dayoffsets 570 571 # for now, pull over any missing keys from the US set 572 for key in pdtLocales['en_US'].re_consts: 573 if not key in ptc.re_values: 574 ptc.re_values[key] = pdtLocales['en_US'].re_consts[key] 575 else: 576 ptc.re_sources = pdtLocales['en_US'].re_sources 577 ptc.re_values = pdtLocales['en_US'].re_consts 578 ptc.Modifiers = pdtLocales['en_US'].modifiers 579 ptc.dayOffsets = pdtLocales['en_US'].dayoffsets 580 units = pdtLocales['en_US'].units 581 582 # escape any regex special characters that may be found 583 wd = tuple(map(re.escape, ptc.Weekdays)) 584 swd = tuple(map(re.escape, ptc.shortWeekdays)) 585 mth = tuple(map(re.escape, ptc.Months)) 586 smth = tuple(map(re.escape, ptc.shortMonths)) 587 588 ptc.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth 589 ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth 590 ptc.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % wd 591 ptc.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % swd 592 593 l = [] 594 for unit in units: 595 l.append('|'.join(units[unit])) 596 597 ptc.re_values['units'] = '|'.join(l) 598 ptc.Units = ptc.re_values['units'].split('|') 599 600
601 -def _initSymbols(ptc):
602 """ 603 Helper function to initialize the single character constants 604 and other symbols needed. 605 """ 606 ptc.timeSep = [ u':' ] 607 ptc.dateSep = [ u'/' ] 608 ptc.meridian = [ u'AM', u'PM' ] 609 610 ptc.usesMeridian = True 611 ptc.uses24 = False 612 613 if pyicu and ptc.usePyICU: 614 am = u'' 615 pm = u'' 616 ts = '' 617 618 # ICU doesn't seem to provide directly the 619 # date or time seperator - so we have to 620 # figure it out 621 o = ptc.icu_tf['short'] 622 s = ptc.timeFormats['short'] 623 624 ptc.usesMeridian = u'a' in s 625 ptc.uses24 = u'H' in s 626 627 # '11:45 AM' or '11:45' 628 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 629 630 # ': AM' or ':' 631 s = s.replace('11', '').replace('45', '') 632 633 if len(s) > 0: 634 ts = s[0] 635 636 if ptc.usesMeridian: 637 # '23:45 AM' or '23:45' 638 am = s[1:].strip() 639 s = o.format(datetime.datetime(2003, 10, 30, 23, 45)) 640 641 if ptc.uses24: 642 s = s.replace('23', '') 643 else: 644 s = s.replace('11', '') 645 646 # 'PM' or '' 647 pm = s.replace('45', '').replace(ts, '').strip() 648 649 ptc.timeSep = [ ts ] 650 ptc.meridian = [ am, pm ] 651 652 o = ptc.icu_df['short'] 653 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) 654 s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '') 655 656 if len(s) > 0: 657 ds = s[0] 658 else: 659 ds = '/' 660 661 ptc.dateSep = [ ds ] 662 s = ptc.dateFormats['short'] 663 l = s.lower().split(ds) 664 dp_order = [] 665 666 for s in l: 667 if len(s) > 0: 668 dp_order.append(s[:1]) 669 670 ptc.dp_order = dp_order 671 else: 672 ptc.timeSep = ptc.locale.timeSep 673 ptc.dateSep = ptc.locale.dateSep 674 ptc.meridian = ptc.locale.meridian 675 ptc.usesMeridian = ptc.locale.usesMeridian 676 ptc.uses24 = ptc.locale.uses24 677 ptc.dp_order = ptc.locale.dp_order 678 679 # build am and pm lists to contain 680 # original case, lowercase and first-char 681 # versions of the meridian text 682 683 if len(ptc.meridian) > 0: 684 am = ptc.meridian[0] 685 ptc.am = [ am ] 686 687 if len(am) > 0: 688 ptc.am.append(am[0]) 689 am = am.lower() 690 ptc.am.append(am) 691 ptc.am.append(am[0]) 692 else: 693 am = '' 694 ptc.am = [ '', '' ] 695 696 if len(ptc.meridian) > 1: 697 pm = ptc.meridian[1] 698 ptc.pm = [ pm ] 699 700 if len(pm) > 0: 701 ptc.pm.append(pm[0]) 702 pm = pm.lower() 703 ptc.pm.append(pm) 704 ptc.pm.append(pm[0]) 705 else: 706 pm = '' 707 ptc.pm = [ '', '' ]
708 709
710 -def _initPatterns(ptc):
711 """ 712 Helper function to take the different localized bits from ptc and 713 create the regex strings. 714 """ 715 # TODO add code to parse the date formats and build the regexes up from sub-parts 716 # TODO find all hard-coded uses of date/time seperators 717 718 # I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16 719 # I suspect the final line was for a trailing time - but testing shows it's not needed 720 # ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s? 721 # ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)? 722 # (,\s?(?P<year>\d\d(\d\d)?))?)) 723 # (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 724 ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s? 725 ((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?)? 726 ((,)?(\s)?(?P<year>\d\d(\d\d)?))? 727 ) 728 )''' % ptc.re_values 729 ptc.RE_MONTH = r'''(\s?|^) 730 (?P<month>( 731 (?P<mthname>(%(months)s|%(shortmonths)s)) 732 (\s?(?P<year>(\d\d\d\d)))? 733 )) 734 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 735 ptc.RE_WEEKDAY = r'''(\s?|^) 736 (?P<weekday>(%(days)s|%(shortdays)s)) 737 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 738 739 ptc.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values 740 ptc.RE_UNITS = r'''(?P<qty>(-?\d+\s* 741 (?P<units>((%(units)s)s?)) 742 ))''' % ptc.re_values 743 ptc.RE_QUNITS = r'''(?P<qty>(-?\d+\s? 744 (?P<qunits>%(qunits)s) 745 (\s?|,|$) 746 ))''' % ptc.re_values 747 ptc.RE_MODIFIER = r'''(\s?|^) 748 (?P<modifier> 749 (previous|prev|last|next|eod|eo|(end\sof)|(in\sa)))''' % ptc.re_values 750 ptc.RE_MODIFIER2 = r'''(\s?|^) 751 (?P<modifier> 752 (from|before|after|ago|prior)) 753 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 754 ptc.RE_TIMEHMS = r'''(\s?|^) 755 (?P<hours>\d\d?) 756 (?P<tsep>%(timeseperator)s|) 757 (?P<minutes>\d\d) 758 (?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % ptc.re_values 759 ptc.RE_TIMEHMS2 = r'''(?P<hours>(\d\d?)) 760 ((?P<tsep>%(timeseperator)s|) 761 (?P<minutes>(\d\d?)) 762 (?:(?P=tsep) 763 (?P<seconds>\d\d? 764 (?:[.,]\d+)?))?)?''' % ptc.re_values 765 766 if 'meridian' in ptc.re_values: 767 ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values 768 769 dateSeps = ''.join(ptc.dateSep) + '.' 770 771 ptc.RE_DATE = r'''(\s?|^) 772 (?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?)) 773 (\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps) 774 ptc.RE_DATE2 = r'[%s]' % dateSeps 775 ptc.RE_DAY = r'''(\s?|^) 776 (?P<day>(today|tomorrow|yesterday)) 777 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 778 ptc.RE_TIME = r'''(\s?|^) 779 (?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now)) 780 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values 781 ptc.RE_REMAINING = r'\s+' 782 783 # Regex for date/time ranges 784 ptc.RE_RTIMEHMS = r'''(\s?|^) 785 (\d\d?)%(timeseperator)s 786 (\d\d) 787 (%(timeseperator)s(\d\d))? 788 (\s?|$)''' % ptc.re_values 789 ptc.RE_RTIMEHMS2 = r'''(\s?|^) 790 (\d\d?) 791 (%(timeseperator)s(\d\d?))? 792 (%(timeseperator)s(\d\d?))?''' % ptc.re_values 793 794 if 'meridian' in ptc.re_values: 795 ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values 796 797 ptc.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 798 ptc.RE_RDATE3 = r'''((((%(months)s))\s? 799 ((\d\d?) 800 (\s?|%(daysuffix)s|$)+)? 801 (,\s?\d\d\d\d)?))''' % ptc.re_values 802 803 # "06/07/06 - 08/09/06" 804 ptc.DATERNG1 = ptc.RE_RDATE + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE 805 ptc.DATERNG1 = ptc.DATERNG1 % ptc.re_values 806 807 # "march 31 - june 1st, 2006" 808 ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE3 809 ptc.DATERNG2 = ptc.DATERNG2 % ptc.re_values 810 811 # "march 1rd -13th" 812 ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?' 813 ptc.DATERNG3 = ptc.DATERNG3 % ptc.re_values 814 815 # "4:00:55 pm - 5:90:44 am", '4p-5p' 816 ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 817 ptc.TIMERNG1 = ptc.TIMERNG1 % ptc.re_values 818 819 # "4:00 - 5:90 ", "4:55:55-3:44:55" 820 ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS 821 ptc.TIMERNG2 = ptc.TIMERNG2 % ptc.re_values 822 823 # "4-5pm " 824 ptc.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 825 ptc.TIMERNG3 = ptc.TIMERNG3 % ptc.re_values 826 827 # "4:30-5pm " 828 ptc.TIMERNG4 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2 829 ptc.TIMERNG4 = ptc.TIMERNG4 % ptc.re_values
830 831
832 -def _initConstants(ptc):
833 """ 834 Create localized versions of the units, week and month names 835 """ 836 # build weekday offsets - yes, it assumes the Weekday and shortWeekday 837 # lists are in the same order and Mon..Sun (Python style) 838 ptc.WeekdayOffsets = {} 839 840 o = 0 841 for key in ptc.Weekdays: 842 ptc.WeekdayOffsets[key] = o 843 o += 1 844 o = 0 845 for key in ptc.shortWeekdays: 846 ptc.WeekdayOffsets[key] = o 847 o += 1 848 849 # build month offsets - yes, it assumes the Months and shortMonths 850 # lists are in the same order and Jan..Dec 851 ptc.MonthOffsets = {} 852 853 o = 1 854 for key in ptc.Months: 855 ptc.MonthOffsets[key] = o 856 o += 1 857 o = 1 858 for key in ptc.shortMonths: 859 ptc.MonthOffsets[key] = o 860 o += 1
861 862
863 -class Constants:
864 """ 865 Default set of constants for parsedatetime. 866 867 If PyICU is present, then the class will first try to get PyICU 868 to return a locale specified by C{localeID}. If either C{localeID} is 869 None or if the locale does not exist within PyICU, then each of the 870 locales defined in C{fallbackLocales} is tried in order. 871 872 If PyICU is not present or none of the specified locales can be used, 873 then the class will initialize itself to the en_US locale. 874 875 if PyICU is not present or not requested, only the locales defined by 876 C{pdtLocales} will be searched. 877 """
878 - def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']):
879 self.localeID = localeID 880 self.fallbackLocales = fallbackLocales 881 882 if 'en_US' not in self.fallbackLocales: 883 self.fallbackLocales.append('en_US') 884 885 # define non-locale specific constants 886 887 self.locale = None 888 self.usePyICU = usePyICU 889 890 # starting cache of leap years 891 # daysInMonth will add to this if during 892 # runtime it gets a request for a year not found 893 self._leapYears = [ 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 894 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 895 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, 896 2036, 2040, 2044, 2048, 2052, 2056, 2060, 2064, 2068, 2072, 2076, 897 2080, 2084, 2088, 2092, 2096 ] 898 899 self.Second = 1 900 self.Minute = 60 * self.Second 901 self.Hour = 60 * self.Minute 902 self.Day = 24 * self.Hour 903 self.Week = 7 * self.Day 904 self.Month = 30 * self.Day 905 self.Year = 365 * self.Day 906 907 self.rangeSep = u'-' 908 909 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) 910 911 self.BirthdayEpoch = 50 912 913 # DOWParseStyle controls how we parse "Tuesday" 914 # If the current day was Thursday and the text to parse is "Tuesday" 915 # then the following table shows how each style would be returned 916 # -1, 0, +1 917 # 918 # Current day marked as *** 919 # 920 # Sun Mon Tue Wed Thu Fri Sat 921 # week -1 922 # current -1,0 *** 923 # week +1 +1 924 # 925 # If the current day was Monday and the text to parse is "Tuesday" 926 # then the following table shows how each style would be returned 927 # -1, 0, +1 928 # 929 # Sun Mon Tue Wed Thu Fri Sat 930 # week -1 -1 931 # current *** 0,+1 932 # week +1 933 934 self.DOWParseStyle = 1 935 936 # CurrentDOWParseStyle controls how we parse "Friday" 937 # If the current day was Friday and the text to parse is "Friday" 938 # then the following table shows how each style would be returned 939 # True/False. This also depends on DOWParseStyle. 940 # 941 # Current day marked as *** 942 # 943 # DOWParseStyle = 0 944 # Sun Mon Tue Wed Thu Fri Sat 945 # week -1 946 # current T,F 947 # week +1 948 # 949 # DOWParseStyle = -1 950 # Sun Mon Tue Wed Thu Fri Sat 951 # week -1 F 952 # current T 953 # week +1 954 # 955 # DOWParseStyle = +1 956 # 957 # Sun Mon Tue Wed Thu Fri Sat 958 # week -1 959 # current T 960 # week +1 F 961 962 self.CurrentDOWParseStyle = False 963 964 # initalize attributes to empty values to ensure 965 # they are defined 966 self.re_sources = None 967 self.re_values = None 968 self.Modifiers = None 969 self.dayOffsets = None 970 self.WeekdayOffsets = None 971 self.MonthOffsets = None 972 self.dateSep = None 973 self.timeSep = None 974 self.am = None 975 self.pm = None 976 self.meridian = None 977 self.usesMeridian = None 978 self.uses24 = None 979 self.dp_order = None 980 981 self.RE_DATE3 = r'' 982 self.RE_MONTH = r'' 983 self.RE_WEEKDAY = r'' 984 self.RE_SPECIAL = r'' 985 self.RE_UNITS = r'' 986 self.RE_QUNITS = r'' 987 self.RE_MODIFIER = r'' 988 self.RE_MODIFIER2 = r'' 989 self.RE_TIMEHMS = r'' 990 self.RE_TIMEHMS2 = r'' 991 self.RE_DATE = r'' 992 self.RE_DATE2 = r'' 993 self.RE_DAY = r'' 994 self.RE_TIME = r'' 995 self.RE_REMAINING = r'' 996 self.RE_RTIMEHMS = r'' 997 self.RE_RTIMEHMS2 = r'' 998 self.RE_RDATE = r'' 999 self.RE_RDATE3 = r'' 1000 self.DATERNG1 = r'' 1001 self.DATERNG2 = r'' 1002 self.DATERNG3 = r'' 1003 self.TIMERNG1 = r'' 1004 self.TIMERNG2 = r'' 1005 self.TIMERNG3 = r'' 1006 self.TIMERNG4 = r'' 1007 1008 _initLocale(self) 1009 _initConstants(self) 1010 _initSymbols(self) 1011 _initPatterns(self) 1012 1013 self.re_option = re.IGNORECASE + re.VERBOSE 1014 self.cre_source = { 'CRE_SPECIAL': self.RE_SPECIAL, 1015 'CRE_UNITS': self.RE_UNITS, 1016 'CRE_QUNITS': self.RE_QUNITS, 1017 'CRE_MODIFIER': self.RE_MODIFIER, 1018 'CRE_MODIFIER2': self.RE_MODIFIER2, 1019 'CRE_TIMEHMS': self.RE_TIMEHMS, 1020 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 1021 'CRE_DATE': self.RE_DATE, 1022 'CRE_DATE2': self.RE_DATE2, 1023 'CRE_DATE3': self.RE_DATE3, 1024 'CRE_MONTH': self.RE_MONTH, 1025 'CRE_WEEKDAY': self.RE_WEEKDAY, 1026 'CRE_DAY': self.RE_DAY, 1027 'CRE_TIME': self.RE_TIME, 1028 'CRE_REMAINING': self.RE_REMAINING, 1029 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 1030 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 1031 'CRE_RDATE': self.RE_RDATE, 1032 'CRE_RDATE3': self.RE_RDATE3, 1033 'CRE_TIMERNG1': self.TIMERNG1, 1034 'CRE_TIMERNG2': self.TIMERNG2, 1035 'CRE_TIMERNG3': self.TIMERNG3, 1036 'CRE_TIMERNG4': self.TIMERNG4, 1037 'CRE_DATERNG1': self.DATERNG1, 1038 'CRE_DATERNG2': self.DATERNG2, 1039 'CRE_DATERNG3': self.DATERNG3, 1040 } 1041 self.cre_keys = self.cre_source.keys()
1042 1043
1044 - def __getattr__(self, name):
1045 if name in self.cre_keys: 1046 value = re.compile(self.cre_source[name], self.re_option) 1047 setattr(self, name, value) 1048 return value 1049 else: 1050 raise AttributeError, name
1051
1052 - def daysInMonth(self, month, year):
1053 """ 1054 Take the given month (1-12) and a given year (4 digit) return 1055 the number of days in the month adjusting for leap year as needed 1056 """ 1057 result = None 1058 1059 if month > 0 and month <= 12: 1060 result = self._DaysInMonthList[month - 1] 1061 1062 if month == 2: 1063 if year in self._leapYears: 1064 result += 1 1065 else: 1066 if calendar.isleap(year): 1067 self._leapYears.append(year) 1068 result += 1 1069 1070 return result
1071
1072 - def buildSources(self, sourceTime=None):
1073 """ 1074 Return a dictionary of date/time tuples based on the keys 1075 found in self.re_sources. 1076 1077 The current time is used as the default and any specified 1078 item found in self.re_sources is inserted into the value 1079 and the generated dictionary is returned. 1080 """ 1081 if sourceTime is None: 1082 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 1083 else: 1084 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1085 1086 sources = {} 1087 defaults = { 'yr': yr, 'mth': mth, 'dy': dy, 1088 'hr': hr, 'mn': mn, 'sec': sec, } 1089 1090 for item in self.re_sources: 1091 values = {} 1092 source = self.re_sources[item] 1093 1094 for key in defaults.keys(): 1095 if key in source: 1096 values[key] = source[key] 1097 else: 1098 values[key] = defaults[key] 1099 1100 sources[item] = ( values['yr'], values['mth'], values['dy'], 1101 values['hr'], values['mn'], values['sec'], wd, yd, isdst ) 1102 1103 return sources
1104