Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  # 
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  # 
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation 
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check 
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the 
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  import re 
  34   
  35  from translate.filters import helpers 
  36  from translate.filters import decoration 
  37  from translate.filters import prefilters 
  38  from translate.filters import spelling 
  39  from translate.lang import factory 
  40  from translate.lang import data 
  41  # The import of xliff could fail if the user doesn't have lxml installed. For 
  42  # now we try to continue gracefully to help users who aren't interested in 
  43  # support for XLIFF or other XML formats. 
  44  try: 
  45      from translate.storage import xliff 
  46  except ImportError, e: 
  47      xliff = None 
  48  # The import of xliff fail silently in the absence of lxml if another module 
  49  # already tried to import it unsuccessfully, so let's make 100% sure: 
  50  if not hasattr(xliff, "xliffunit"): 
  51      xliff = None 
  52   
  53  # These are some regular expressions that are compiled for use in some tests 
  54   
  55  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't 
  56  # cover everything we leave \w instead of specifying the exact letters as 
  57  # this should capture printf types defined in other platforms. 
  58  # extended to support Python named format specifiers 
  59  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  60   
  61  # The name of the XML tag 
  62  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  63   
  64  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  65  #TODO: remove escaped strings once usage is audited 
  66  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  67   
  68  # The whole tag 
  69  tag_re = re.compile("<[^>]+>") 
  70   
  71  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  72   
  73   
74 -def tagname(string):
75 """Returns the name of the XML/HTML tag in string""" 76 return tagname_re.match(string).groups(1)[0]
77 78
79 -def intuplelist(pair, list):
80 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 81 list as wildcards (only allowed in positions "a" and "c"). We take a 82 shortcut by only considering "c" if "b" has already matched.""" 83 a, b, c = pair 84 if (b, c) == (None, None): 85 #This is a tagname 86 return pair 87 for pattern in list: 88 x, y, z = pattern 89 if (x, y) in [(a, b), (None, b)]: 90 if z in [None, c]: 91 return pattern 92 return pair
93 94
95 -def tagproperties(strings, ignore):
96 """Returns all the properties in the XML/HTML tag string as 97 (tagname, propertyname, propertyvalue), but ignore those combinations 98 specified in ignore.""" 99 properties = [] 100 for string in strings: 101 tag = tagname(string) 102 properties += [(tag, None, None)] 103 #Now we isolate the attribute pairs. 104 pairs = property_re.findall(string) 105 for property, value, a, b in pairs: 106 #Strip the quotes: 107 value = value[1:-1] 108 109 canignore = False 110 if (tag, property, value) in ignore or \ 111 intuplelist((tag, property, value), ignore) != (tag, property, value): 112 canignore = True 113 break 114 if not canignore: 115 properties += [(tag, property, value)] 116 return properties
117 118
119 -class FilterFailure(Exception):
120 """This exception signals that a Filter didn't pass, and gives an 121 explanation or a comment""" 122
123 - def __init__(self, messages):
124 if not isinstance(messages, list): 125 messages = [messages] 126 assert isinstance(messages[0], unicode) # Assumption: all of same type 127 joined = u", ".join(messages) 128 Exception.__init__(self, joined) 129 # Python 2.3 doesn't have .args 130 if not hasattr(self, "args"): 131 self.args = joined
132 133
134 -class SeriousFilterFailure(FilterFailure):
135 """This exception signals that a Filter didn't pass, and the bad translation 136 might break an application (so the string will be marked fuzzy)""" 137 pass
138 139 #(tag, attribute, value) specifies a certain attribute which can be changed/ 140 #ignored if it exists inside tag. In the case where there is a third element 141 #in the tuple, it indicates a property value that can be ignored if present 142 #(like defaults, for example) 143 #If a certain item is None, it indicates that it is relevant for all values of 144 #the property/tag that is specified as None. A non-None value of "value" 145 #indicates that the value of the attribute must be taken into account. 146 common_ignoretags = [(None, "xml-lang", None)] 147 common_canchangetags = [("img", "alt", None), 148 (None, "title", None), 149 (None, "dir", None), 150 (None, "lang", None), 151 ] 152 # Actually the title tag is allowed on many tags in HTML (but probably not all) 153 154
155 -class CheckerConfig(object):
156 """object representing the configuration of a checker""" 157
158 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 159 notranslatewords=None, musttranslatewords=None, 160 validchars=None, punctuation=None, endpunctuation=None, 161 ignoretags=None, canchangetags=None, criticaltests=None, 162 credit_sources=None):
163 # Init lists 164 self.accelmarkers = self._init_list(accelmarkers) 165 self.varmatches = self._init_list(varmatches) 166 self.criticaltests = self._init_list(criticaltests) 167 self.credit_sources = self._init_list(credit_sources) 168 # Lang data 169 self.targetlanguage = targetlanguage 170 self.updatetargetlanguage(targetlanguage) 171 self.sourcelang = factory.getlanguage('en') 172 # Inits with default values 173 self.punctuation = self._init_default(data.normalized_unicode(punctuation), 174 self.lang.punctuation) 175 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), 176 self.lang.sentenceend) 177 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 178 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 179 # Other data 180 # TODO: allow user configuration of untranslatable words 181 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 182 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 183 validchars = data.normalized_unicode(validchars) 184 self.validcharsmap = {} 185 self.updatevalidchars(validchars)
186
187 - def _init_list(self, list):
188 """initialise configuration paramaters that are lists 189 190 @type list: List 191 @param list: None (we'll initialise a blank list) or a list paramater 192 @rtype: List 193 """ 194 if list is None: 195 list = [] 196 return list
197
198 - def _init_default(self, param, default):
199 """initialise parameters that can have default options 200 201 @param param: the user supplied paramater value 202 @param default: default values when param is not specified 203 @return: the paramater as specified by the user of the default settings 204 """ 205 if param is None: 206 return default 207 return param
208
209 - def update(self, otherconfig):
210 """combines the info in otherconfig into this config object""" 211 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 212 self.updatetargetlanguage(self.targetlanguage) 213 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 214 self.varmatches.extend(otherconfig.varmatches) 215 self.notranslatewords.update(otherconfig.notranslatewords) 216 self.musttranslatewords.update(otherconfig.musttranslatewords) 217 self.validcharsmap.update(otherconfig.validcharsmap) 218 self.punctuation += otherconfig.punctuation 219 self.endpunctuation += otherconfig.endpunctuation 220 #TODO: consider also updating in the following cases: 221 self.ignoretags = otherconfig.ignoretags 222 self.canchangetags = otherconfig.canchangetags 223 self.criticaltests.extend(otherconfig.criticaltests) 224 self.credit_sources = otherconfig.credit_sources
225
226 - def updatevalidchars(self, validchars):
227 """updates the map that eliminates valid characters""" 228 if validchars is None: 229 return True 230 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 231 self.validcharsmap.update(validcharsmap)
232
233 - def updatetargetlanguage(self, langcode):
234 """Updates the target language in the config to the given target 235 language""" 236 self.lang = factory.getlanguage(langcode)
237 238
239 -def cache_results(f):
240 241 def cached_f(self, param1): 242 key = (f.__name__, param1) 243 res_cache = self.results_cache 244 if key in res_cache: 245 return res_cache[key] 246 else: 247 value = f(self, param1) 248 res_cache[key] = value 249 return value
250 return cached_f 251 252
253 -class UnitChecker(object):
254 """Parent Checker class which does the checking based on functions available 255 in derived classes.""" 256 preconditions = {} 257
258 - def __init__(self, checkerconfig=None, excludefilters=None, 259 limitfilters=None, errorhandler=None):
260 self.errorhandler = errorhandler 261 if checkerconfig is None: 262 self.setconfig(CheckerConfig()) 263 else: 264 self.setconfig(checkerconfig) 265 # exclude functions defined in UnitChecker from being treated as tests. 266 self.helperfunctions = {} 267 for functionname in dir(UnitChecker): 268 function = getattr(self, functionname) 269 if callable(function): 270 self.helperfunctions[functionname] = function 271 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 272 self.results_cache = {}
273
274 - def getfilters(self, excludefilters=None, limitfilters=None):
275 """returns dictionary of available filters, including/excluding those in 276 the given lists""" 277 filters = {} 278 if limitfilters is None: 279 # use everything available unless instructed 280 limitfilters = dir(self) 281 if excludefilters is None: 282 excludefilters = {} 283 for functionname in limitfilters: 284 if functionname in excludefilters: 285 continue 286 if functionname in self.helperfunctions: 287 continue 288 if functionname == "errorhandler": 289 continue 290 filterfunction = getattr(self, functionname, None) 291 if not callable(filterfunction): 292 continue 293 filters[functionname] = filterfunction 294 return filters
295
296 - def setconfig(self, config):
297 """sets the accelerator list""" 298 self.config = config 299 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 300 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 301 for startmatch, endmatch in self.config.varmatches] 302 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, 303 prefilters.varnone) 304 for startmatch, endmatch in self.config.varmatches]
305
306 - def setsuggestionstore(self, store):
307 """Sets the filename that a checker should use for evaluating 308 suggestions.""" 309 self.suggestion_store = store 310 if self.suggestion_store: 311 self.suggestion_store.require_index()
312
313 - def filtervariables(self, str1):
314 """filter out variables from str1""" 315 return helpers.multifilter(str1, self.varfilters)
316 filtervariables = cache_results(filtervariables) 317
318 - def removevariables(self, str1):
319 """remove variables from str1""" 320 return helpers.multifilter(str1, self.removevarfilter)
321 removevariables = cache_results(removevariables) 322
323 - def filteraccelerators(self, str1):
324 """filter out accelerators from str1""" 325 return helpers.multifilter(str1, self.accfilters, None)
326 filteraccelerators = cache_results(filteraccelerators) 327
328 - def filteraccelerators_by_list(self, str1, acceptlist=None):
329 """filter out accelerators from str1""" 330 return helpers.multifilter(str1, self.accfilters, acceptlist)
331
332 - def filterwordswithpunctuation(self, str1):
333 """replaces words with punctuation with their unpunctuated 334 equivalents""" 335 return prefilters.filterwordswithpunctuation(str1)
336 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 337
338 - def filterxml(self, str1):
339 """filter out XML from the string so only text remains""" 340 return tag_re.sub("", str1)
341 filterxml = cache_results(filterxml) 342
343 - def run_test(self, test, unit):
344 """Runs the given test on the given unit. 345 346 Note that this can raise a FilterFailure as part of normal operation""" 347 return test(unit)
348
349 - def run_filters(self, unit):
350 """run all the tests in this suite, return failures as testname, 351 message_or_exception""" 352 self.results_cache = {} 353 failures = {} 354 ignores = self.config.lang.ignoretests[:] 355 functionnames = self.defaultfilters.keys() 356 priorityfunctionnames = self.preconditions.keys() 357 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 358 for functionname in priorityfunctionnames + otherfunctionnames: 359 if functionname in ignores: 360 continue 361 filterfunction = getattr(self, functionname, None) 362 # this filterfunction may only be defined on another checker if 363 # using TeeChecker 364 if filterfunction is None: 365 continue 366 filtermessage = filterfunction.__doc__ 367 try: 368 filterresult = self.run_test(filterfunction, unit) 369 except FilterFailure, e: 370 filterresult = False 371 filtermessage = e.args[0] 372 except Exception, e: 373 if self.errorhandler is None: 374 raise ValueError("error in filter %s: %r, %r, %s" % \ 375 (functionname, unit.source, unit.target, e)) 376 else: 377 filterresult = self.errorhandler(functionname, unit.source, 378 unit.target, e) 379 if not filterresult: 380 # we test some preconditions that aren't actually a cause for 381 # failure 382 if functionname in self.defaultfilters: 383 failures[functionname] = filtermessage 384 if functionname in self.preconditions: 385 for ignoredfunctionname in self.preconditions[functionname]: 386 ignores.append(ignoredfunctionname) 387 self.results_cache = {} 388 return failures
389 390
391 -class TranslationChecker(UnitChecker):
392 """A checker that passes source and target strings to the checks, not the 393 whole unit. 394 395 This provides some speedup and simplifies testing.""" 396
397 - def __init__(self, checkerconfig=None, excludefilters=None, 398 limitfilters=None, errorhandler=None):
399 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, 400 limitfilters, errorhandler)
401
402 - def run_test(self, test, unit):
403 """Runs the given test on the given unit. 404 405 Note that this can raise a FilterFailure as part of normal operation.""" 406 if self.hasplural: 407 filtermessages = [] 408 filterresult = True 409 for pluralform in unit.target.strings: 410 try: 411 if not test(self.str1, unicode(pluralform)): 412 filterresult = False 413 except FilterFailure, e: 414 filterresult = False 415 filtermessages.append(unicode(e.args)) 416 if not filterresult and filtermessages: 417 raise FilterFailure(filtermessages) 418 else: 419 return filterresult 420 else: 421 return test(self.str1, self.str2)
422
423 - def run_filters(self, unit):
424 """Do some optimisation by caching some data of the unit for the benefit 425 of run_test().""" 426 self.str1 = data.normalized_unicode(unit.source) or u"" 427 self.str2 = data.normalized_unicode(unit.target) or u"" 428 self.hasplural = unit.hasplural() 429 self.locations = unit.getlocations() 430 return super(TranslationChecker, self).run_filters(unit)
431 432
433 -class TeeChecker:
434 """A Checker that controls multiple checkers.""" 435
436 - def __init__(self, checkerconfig=None, excludefilters=None, 437 limitfilters=None, checkerclasses=None, errorhandler=None, 438 languagecode=None):
439 """construct a TeeChecker from the given checkers""" 440 self.limitfilters = limitfilters 441 if checkerclasses is None: 442 checkerclasses = [StandardChecker] 443 self.checkers = [checkerclass(checkerconfig=checkerconfig, 444 excludefilters=excludefilters, 445 limitfilters=limitfilters, 446 errorhandler=errorhandler) for checkerclass in checkerclasses] 447 if languagecode: 448 for checker in self.checkers: 449 checker.config.updatetargetlanguage(languagecode) 450 # Let's hook up the language specific checker 451 lang_checker = self.checkers[0].config.lang.checker 452 if lang_checker: 453 self.checkers.append(lang_checker) 454 455 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 456 self.config = checkerconfig or self.checkers[0].config
457
458 - def getfilters(self, excludefilters=None, limitfilters=None):
459 """returns dictionary of available filters, including/excluding those in 460 the given lists""" 461 if excludefilters is None: 462 excludefilters = {} 463 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 464 self.combinedfilters = {} 465 for filters in filterslist: 466 self.combinedfilters.update(filters) 467 # TODO: move this somewhere more sensible (a checkfilters method?) 468 if limitfilters is not None: 469 for filtername in limitfilters: 470 if not filtername in self.combinedfilters: 471 import sys 472 print >> sys.stderr, "warning: could not find filter %s" % filtername 473 return self.combinedfilters
474
475 - def run_filters(self, unit):
476 """run all the tests in the checker's suites""" 477 failures = {} 478 for checker in self.checkers: 479 failures.update(checker.run_filters(unit)) 480 return failures
481
482 - def setsuggestionstore(self, store):
483 """Sets the filename that a checker should use for evaluating 484 suggestions.""" 485 for checker in self.checkers: 486 checker.setsuggestionstore(store)
487 488
489 -class StandardChecker(TranslationChecker):
490 """The basic test suite for source -> target translations.""" 491
492 - def untranslated(self, str1, str2):
493 """checks whether a string has been translated at all""" 494 str2 = prefilters.removekdecomments(str2) 495 return not (len(str1.strip()) > 0 and len(str2) == 0)
496
497 - def unchanged(self, str1, str2):
498 """checks whether a translation is basically identical to the original 499 string""" 500 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 501 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 502 if len(str1) < 2: 503 return True 504 # If the whole string is upperase, or nothing in the string can go 505 # towards uppercase, let's assume there is nothing translatable 506 # TODO: reconsider 507 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 508 return True 509 if self.config.notranslatewords: 510 words1 = str1.split() 511 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 512 #currently equivalent to: 513 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 514 #why do we only test for one notranslate word? 515 return True 516 # we could also check for things like str1.isnumeric(), but the test 517 # above (str1.upper() == str1) makes this unnecessary 518 if str1.lower() == str2.lower(): 519 raise FilterFailure(u"please translate") 520 return True
521
522 - def blank(self, str1, str2):
523 """checks whether a translation only contains spaces""" 524 len1 = len(str1.strip()) 525 len2 = len(str2.strip()) 526 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
527
528 - def short(self, str1, str2):
529 """checks whether a translation is much shorter than the original 530 string""" 531 len1 = len(str1.strip()) 532 len2 = len(str2.strip()) 533 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
534
535 - def long(self, str1, str2):
536 """checks whether a translation is much longer than the original 537 string""" 538 len1 = len(str1.strip()) 539 len2 = len(str2.strip()) 540 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
541
542 - def escapes(self, str1, str2):
543 """checks whether escaping is consistent between the two strings""" 544 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 545 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 546 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 547 raise SeriousFilterFailure(u"escapes in original (%s) don't match " 548 "escapes in translation (%s)" % 549 (escapes1, escapes2)) 550 else: 551 return True
552
553 - def newlines(self, str1, str2):
554 """checks whether newlines are consistent between the two strings""" 555 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 556 raise FilterFailure(u"line endings in original don't match " 557 "line endings in translation") 558 else: 559 return True
560
561 - def tabs(self, str1, str2):
562 """checks whether tabs are consistent between the two strings""" 563 if not helpers.countmatch(str1, str2, "\t"): 564 raise SeriousFilterFailure(u"tabs in original don't match " 565 "tabs in translation") 566 else: 567 return True
568
569 - def singlequoting(self, str1, str2):
570 """checks whether singlequoting is consistent between the two strings""" 571 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 572 str1 = self.config.lang.punctranslate(str1) 573 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 574 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
575
576 - def doublequoting(self, str1, str2):
577 """checks whether doublequoting is consistent between the two strings""" 578 str1 = self.filteraccelerators(self.filtervariables(str1)) 579 str1 = self.filterxml(str1) 580 str1 = self.config.lang.punctranslate(str1) 581 str2 = self.filteraccelerators(self.filtervariables(str2)) 582 str2 = self.filterxml(str2) 583 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", 584 u"»", u"“", u"”"))
585
586 - def doublespacing(self, str1, str2):
587 """checks for bad double-spaces by comparing to original""" 588 str1 = self.filteraccelerators(str1) 589 str2 = self.filteraccelerators(str2) 590 return helpers.countmatch(str1, str2, u" ")
591
592 - def puncspacing(self, str1, str2):
593 """checks for bad spacing after punctuation""" 594 # Convert all nbsp to space, and just check spaces. Useful intermediate 595 # step to stricter nbsp checking? 596 str1 = self.filteraccelerators(self.filtervariables(str1)) 597 str1 = self.config.lang.punctranslate(str1) 598 str1 = str1.replace(u"\u00a0", u" ") 599 if str1.find(u" ") == -1: 600 return True 601 str2 = self.filteraccelerators(self.filtervariables(str2)) 602 str2 = str2.replace(u"\u00a0", u" ") 603 for puncchar in self.config.punctuation: 604 plaincount1 = str1.count(puncchar) 605 if not plaincount1: 606 continue 607 plaincount2 = str2.count(puncchar) 608 if plaincount1 != plaincount2: 609 continue 610 spacecount1 = str1.count(puncchar + u" ") 611 spacecount2 = str2.count(puncchar + u" ") 612 if spacecount1 != spacecount2: 613 # handle extra spaces that are because of transposed punctuation 614 if abs(spacecount1 - spacecount2) == 1 and str1.endswith(puncchar) != str2.endswith(puncchar): 615 continue 616 return False 617 return True
618
619 - def printf(self, str1, str2):
620 """checks whether printf format strings match""" 621 count1 = count2 = plural = None 622 # self.hasplural only set by run_filters, not always available 623 if 'hasplural' in self.__dict__: 624 plural = self.hasplural 625 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 626 count2 = var_num2 + 1 627 str2key = match2.group('key') 628 if match2.group('ord'): 629 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 630 count1 = var_num1 + 1 631 if int(match2.group('ord')) == var_num1 + 1: 632 if match2.group('fullvar') != match1.group('fullvar'): 633 return 0 634 elif str2key: 635 str1key = None 636 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 637 count1 = var_num1 + 1 638 if match1.group('key') and str2key == match1.group('key'): 639 str1key = match1.group('key') 640 # '%.0s' "placeholder" in plural will match anything 641 if plural and match2.group('fullvar') == '.0s': 642 continue 643 if match1.group('fullvar') != match2.group('fullvar'): 644 return 0 645 if str1key == None: 646 return 0 647 else: 648 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 649 count1 = var_num1 + 1 650 # '%.0s' "placeholder" in plural will match anything 651 if plural and match2.group('fullvar') == '.0s': 652 continue 653 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 654 return 0 655 656 if count2 is None: 657 if list(printf_pat.finditer(str1)): 658 return 0 659 660 if (count1 or count2) and (count1 != count2): 661 return 0 662 return 1
663
664 - def accelerators(self, str1, str2):
665 """checks whether accelerators are consistent between the two strings""" 666 str1 = self.filtervariables(str1) 667 str2 = self.filtervariables(str2) 668 messages = [] 669 for accelmarker in self.config.accelmarkers: 670 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 671 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 672 count1, countbad1 = counter1(str1) 673 count2, countbad2 = counter2(str2) 674 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 675 accel2, bad2 = getaccel(str2) 676 if count1 == count2: 677 continue 678 if count1 == 1 and count2 == 0: 679 if countbad2 == 1: 680 messages.append(u"accelerator %s appears before an invalid " 681 "accelerator character '%s' (eg. space)" % 682 (accelmarker, bad2[0])) 683 else: 684 messages.append(u"accelerator %s is missing from translation" % 685 accelmarker) 686 elif count1 == 0: 687 messages.append(u"accelerator %s does not occur in original " 688 "and should not be in translation" % accelmarker) 689 elif count1 == 1 and count2 > count1: 690 messages.append(u"accelerator %s is repeated in translation" % 691 accelmarker) 692 else: 693 messages.append(u"accelerator %s occurs %d time(s) in original " 694 "and %d time(s) in translation" % 695 (accelmarker, count1, count2)) 696 if messages: 697 if "accelerators" in self.config.criticaltests: 698 raise SeriousFilterFailure(messages) 699 else: 700 raise FilterFailure(messages) 701 return True
702 703 # def acceleratedvariables(self, str1, str2): 704 # """checks that no variables are accelerated""" 705 # messages = [] 706 # for accelerator in self.config.accelmarkers: 707 # for variablestart, variableend in self.config.varmatches: 708 # error = accelerator + variablestart 709 # if str1.find(error) >= 0: 710 # messages.append(u"original has an accelerated variable") 711 # if str2.find(error) >= 0: 712 # messages.append(u"translation has an accelerated variable") 713 # if messages: 714 # raise FilterFailure(messages) 715 # return True 716
717 - def variables(self, str1, str2):
718 """checks whether variables of various forms are consistent between the 719 two strings""" 720 messages = [] 721 mismatch1, mismatch2 = [], [] 722 varnames1, varnames2 = [], [] 723 for startmarker, endmarker in self.config.varmatches: 724 varchecker = decoration.getvariables(startmarker, endmarker) 725 if startmarker and endmarker: 726 if isinstance(endmarker, int): 727 redecorate = lambda var: startmarker + var 728 else: 729 redecorate = lambda var: startmarker + var + endmarker 730 elif startmarker: 731 redecorate = lambda var: startmarker + var 732 else: 733 redecorate = lambda var: var 734 vars1 = varchecker(str1) 735 vars2 = varchecker(str2) 736 if vars1 != vars2: 737 # we use counts to compare so we can handle multiple variables 738 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], \ 739 [var for var in vars2 if vars1.count(var) < vars2.count(var)] 740 # filter variable names we've already seen, so they aren't 741 # matched by more than one filter... 742 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 743 varnames1.extend(vars1) 744 varnames2.extend(vars2) 745 vars1 = map(redecorate, vars1) 746 vars2 = map(redecorate, vars2) 747 mismatch1.extend(vars1) 748 mismatch2.extend(vars2) 749 if mismatch1: 750 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 751 elif mismatch2: 752 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 753 if messages and mismatch1: 754 raise SeriousFilterFailure(messages) 755 elif messages: 756 raise FilterFailure(messages) 757 return True
758
759 - def functions(self, str1, str2):
760 """checks that function names are not translated""" 761 # We can't just use helpers.funcmatch() since it doesn't ignore order 762 return not set(decoration.getfunctions(str1)).symmetric_difference(set(decoration.getfunctions(str2)))
763
764 - def emails(self, str1, str2):
765 """checks that emails are not translated""" 766 return helpers.funcmatch(str1, str2, decoration.getemails)
767
768 - def urls(self, str1, str2):
769 """checks that URLs are not translated""" 770 return helpers.funcmatch(str1, str2, decoration.geturls)
771
772 - def numbers(self, str1, str2):
773 """checks whether numbers of various forms are consistent between the 774 two strings""" 775 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
776
777 - def startwhitespace(self, str1, str2):
778 """checks whether whitespace at the beginning of the strings matches""" 779 return helpers.funcmatch(str1, str2, decoration.spacestart)
780
781 - def endwhitespace(self, str1, str2):
782 """checks whether whitespace at the end of the strings matches""" 783 str1 = self.config.lang.punctranslate(str1) 784 return helpers.funcmatch(str1, str2, decoration.spaceend)
785
786 - def startpunc(self, str1, str2):
787 """checks whether punctuation at the beginning of the strings match""" 788 str1 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))) 789 str1 = self.config.lang.punctranslate(str1) 790 str2 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))) 791 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
792
793 - def endpunc(self, str1, str2):
794 """checks whether punctuation at the end of the strings match""" 795 str1 = self.filtervariables(str1) 796 str1 = self.config.lang.punctranslate(str1) 797 str2 = self.filtervariables(str2) 798 str1 = str1.rstrip() 799 str2 = str2.rstrip() 800 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
801
802 - def purepunc(self, str1, str2):
803 """checks that strings that are purely punctuation are not changed""" 804 # this test is a subset of startandend 805 if (decoration.ispurepunctuation(str1)): 806 return str1 == str2 807 else: 808 return not decoration.ispurepunctuation(str2)
809
810 - def brackets(self, str1, str2):
811 """checks that the number of brackets in both strings match""" 812 str1 = self.filtervariables(str1) 813 str2 = self.filtervariables(str2) 814 messages = [] 815 missing = [] 816 extra = [] 817 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 818 count1 = str1.count(bracket) 819 count2 = str2.count(bracket) 820 if count2 < count1: 821 missing.append(u"'%s'" % bracket) 822 elif count2 > count1: 823 extra.append(u"'%s'" % bracket) 824 if missing: 825 messages.append(u"translation is missing %s" % u", ".join(missing)) 826 if extra: 827 messages.append(u"translation has extra %s" % u", ".join(extra)) 828 if messages: 829 raise FilterFailure(messages) 830 return True
831
832 - def sentencecount(self, str1, str2):
833 """checks that the number of sentences in both strings match""" 834 str1 = self.filteraccelerators(str1) 835 str2 = self.filteraccelerators(str2) 836 sentences1 = len(self.config.sourcelang.sentences(str1)) 837 sentences2 = len(self.config.lang.sentences(str2)) 838 if not sentences1 == sentences2: 839 raise FilterFailure(u"The number of sentences differ: " 840 "%d versus %d" % (sentences1, sentences2)) 841 return True
842
843 - def options(self, str1, str2):
844 """checks that options are not translated""" 845 str1 = self.filtervariables(str1) 846 for word1 in str1.split(): 847 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 848 parts = word1.split(u"=") 849 if not parts[0] in str2: 850 raise FilterFailure(u"The option %s does not occur or is " 851 "translated in the translation." % parts[0]) 852 if len(parts) > 1 and parts[1] in str2: 853 raise FilterFailure(u"The parameter %(param)s in option %(option)s " 854 "is not translated." % {"param": parts[1], 855 "option": parts[0]}) 856 return True
857
858 - def startcaps(self, str1, str2):
859 """checks that the message starts with the correct capitalisation""" 860 str1 = self.filteraccelerators(str1) 861 str2 = self.filteraccelerators(str2) 862 if len(str1) > 1 and len(str2) > 1: 863 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 864 if len(str1) == 0 and len(str2) == 0: 865 return True 866 if len(str1) == 0 or len(str2) == 0: 867 return False 868 return True
869
870 - def simplecaps(self, str1, str2):
871 """checks the capitalisation of two strings isn't wildly different""" 872 str1 = self.removevariables(str1) 873 str2 = self.removevariables(str2) 874 # TODO: review this. The 'I' is specific to English, so it probably 875 # serves no purpose to get sourcelang.sentenceend 876 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 877 capitals1 = helpers.filtercount(str1, unicode.isupper) 878 capitals2 = helpers.filtercount(str2, unicode.isupper) 879 alpha1 = helpers.filtercount(str1, unicode.isalpha) 880 alpha2 = helpers.filtercount(str2, unicode.isalpha) 881 # Capture the all caps case 882 if capitals1 == alpha1: 883 return capitals2 == alpha2 884 # some heuristic tests to try and see that the style of capitals is 885 # vaguely the same 886 if capitals1 == 0 or capitals1 == 1: 887 return capitals2 == capitals1 888 elif capitals1 < len(str1) / 10: 889 return capitals2 <= len(str2) / 8 890 elif len(str1) < 10: 891 return abs(capitals1 - capitals2) < 3 892 elif capitals1 > len(str1) * 6 / 10: 893 return capitals2 > len(str2) * 6 / 10 894 else: 895 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
896
897 - def acronyms(self, str1, str2):
898 """checks that acronyms that appear are unchanged""" 899 acronyms = [] 900 allowed = [] 901 for startmatch, endmatch in self.config.varmatches: 902 allowed += decoration.getvariables(startmatch, endmatch)(str1) 903 allowed += self.config.musttranslatewords.keys() 904 str1 = self.filteraccelerators(self.filtervariables(str1)) 905 iter = self.config.lang.word_iter(str1) 906 str2 = self.filteraccelerators(self.filtervariables(str2)) 907 #TODO: strip XML? - should provide better error messsages 908 # see mail/chrome/messanger/smime.properties.po 909 #TODO: consider limiting the word length for recognising acronyms to 910 #something like 5/6 characters 911 for word in iter: 912 if word.isupper() and len(word) > 1 and word not in allowed: 913 if str2.find(word) == -1: 914 acronyms.append(word) 915 if acronyms: 916 raise FilterFailure(u"acronyms should not be translated: %s" % 917 u", ".join(acronyms)) 918 return True
919
920 - def doublewords(self, str1, str2):
921 """checks for repeated words in the translation""" 922 lastword = "" 923 without_newlines = "\n".join(str2.split("\n")) 924 words = self.filteraccelerators(self.removevariables(self.filterxml(without_newlines))).replace(u".", u"").lower().split() 925 for word in words: 926 if word == lastword and word not in self.config.lang.validdoublewords: 927 raise FilterFailure(u"The word '%s' is repeated" % word) 928 lastword = word 929 return True
930
931 - def notranslatewords(self, str1, str2):
932 """checks that words configured as untranslatable appear in the 933 translation too""" 934 if not self.config.notranslatewords: 935 return True 936 str1 = self.filtervariables(str1) 937 str2 = self.filtervariables(str2) 938 #The above is full of strange quotes and things in utf-8 encoding. 939 #single apostrophe perhaps problematic in words like "doesn't" 940 for seperator in self.config.punctuation: 941 str1 = str1.replace(seperator, u" ") 942 str2 = str2.replace(seperator, u" ") 943 words1 = self.filteraccelerators(str1).split() 944 words2 = self.filteraccelerators(str2).split() 945 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 946 if stopwords: 947 raise FilterFailure(u"do not translate: %s" % 948 (u", ".join(stopwords))) 949 return True
950
951 - def musttranslatewords(self, str1, str2):
952 """checks that words configured as definitely translatable don't appear 953 in the translation""" 954 if not self.config.musttranslatewords: 955 return True 956 str1 = self.removevariables(str1) 957 str2 = self.removevariables(str2) 958 # The above is full of strange quotes and things in utf-8 encoding. 959 # single apostrophe perhaps problematic in words like "doesn't" 960 for seperator in self.config.punctuation: 961 str1 = str1.replace(seperator, u" ") 962 str2 = str2.replace(seperator, u" ") 963 words1 = self.filteraccelerators(str1).split() 964 words2 = self.filteraccelerators(str2).split() 965 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 966 if stopwords: 967 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 968 return True
969
970 - def validchars(self, str1, str2):
971 """checks that only characters specified as valid appear in the 972 translation""" 973 if not self.config.validcharsmap: 974 return True 975 invalid1 = str1.translate(self.config.validcharsmap) 976 invalid2 = str2.translate(self.config.validcharsmap) 977 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 978 if invalidchars: 979 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 980 return True
981
982 - def filepaths(self, str1, str2):
983 """checks that file paths have not been translated""" 984 for word1 in self.filteraccelerators(str1).split(): 985 if word1.startswith(u"/"): 986 if not helpers.countsmatch(str1, str2, (word1,)): 987 return False 988 return True
989
990 - def xmltags(self, str1, str2):
991 """checks that XML/HTML tags have not been translated""" 992 tags1 = tag_re.findall(str1) 993 if len(tags1) > 0: 994 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 995 return True 996 tags2 = tag_re.findall(str2) 997 properties1 = tagproperties(tags1, self.config.ignoretags) 998 properties2 = tagproperties(tags2, self.config.ignoretags) 999 filtered1 = [] 1000 filtered2 = [] 1001 for property1 in properties1: 1002 filtered1 += [intuplelist(property1, self.config.canchangetags)] 1003 for property2 in properties2: 1004 filtered2 += [intuplelist(property2, self.config.canchangetags)] 1005 1006 # TODO: consider the consequences of different ordering of 1007 # attributes/tags 1008 if filtered1 != filtered2: 1009 return False 1010 else: 1011 # No tags in str1, let's just check that none were added in str2. 1012 # This might be useful for fuzzy strings wrongly unfuzzied. 1013 tags2 = tag_re.findall(str2) 1014 if len(tags2) > 0: 1015 return False 1016 return True
1017
1018 - def kdecomments(self, str1, str2):
1019 """checks to ensure that no KDE style comments appear in the 1020 translation""" 1021 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
1022
1023 - def compendiumconflicts(self, str1, str2):
1024 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 1025 return str2.find(u"#-#-#-#-#") == -1
1026
1027 - def simpleplurals(self, str1, str2):
1028 """checks for English style plural(s) for you to review""" 1029 1030 def numberofpatterns(string, patterns): 1031 number = 0 1032 for pattern in patterns: 1033 number += len(re.findall(pattern, string)) 1034 return number
1035 1036 sourcepatterns = ["\(s\)"] 1037 targetpatterns = ["\(s\)"] 1038 sourcecount = numberofpatterns(str1, sourcepatterns) 1039 targetcount = numberofpatterns(str2, targetpatterns) 1040 if self.config.lang.nplurals == 1: 1041 return not targetcount 1042 return sourcecount == targetcount
1043
1044 - def spellcheck(self, str1, str2):
1045 """checks words that don't pass a spell check""" 1046 if not self.config.targetlanguage: 1047 return True 1048 if not spelling.available: 1049 return True 1050 # TODO: filterxml? 1051 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), 1052 self.config.sourcelang.validaccel) 1053 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), 1054 self.config.lang.validaccel) 1055 ignore1 = [] 1056 messages = [] 1057 for word, index, suggestions in spelling.check(str1, lang="en"): 1058 ignore1.append(word) 1059 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 1060 if word in self.config.notranslatewords: 1061 continue 1062 if word in ignore1: 1063 continue 1064 # hack to ignore hyphenisation rules 1065 if word in suggestions: 1066 continue 1067 messages.append(u"check spelling of %s (could be %s)" % 1068 (word, u" / ".join(suggestions[:5]))) 1069 if messages: 1070 raise FilterFailure(messages) 1071 return True
1072
1073 - def credits(self, str1, str2):
1074 """checks for messages containing translation credits instead of normal 1075 translations.""" 1076 return not str1 in self.config.credit_sources
1077 1078 # If the precondition filter is run and fails then the other tests listed are ignored 1079 preconditions = { 1080 "untranslated": ("simplecaps", "variables", "startcaps", 1081 "accelerators", "brackets", "endpunc", 1082 "acronyms", "xmltags", "startpunc", 1083 "endwhitespace", "startwhitespace", 1084 "escapes", "doublequoting", "singlequoting", 1085 "filepaths", "purepunc", "doublespacing", 1086 "sentencecount", "numbers", "isfuzzy", 1087 "isreview", "notranslatewords", "musttranslatewords", 1088 "emails", "simpleplurals", "urls", "printf", 1089 "tabs", "newlines", "functions", "options", 1090 "blank", "nplurals", "gconf"), 1091 "blank": ("simplecaps", "variables", "startcaps", 1092 "accelerators", "brackets", "endpunc", 1093 "acronyms", "xmltags", "startpunc", 1094 "endwhitespace", "startwhitespace", 1095 "escapes", "doublequoting", "singlequoting", 1096 "filepaths", "purepunc", "doublespacing", 1097 "sentencecount", "numbers", "isfuzzy", 1098 "isreview", "notranslatewords", "musttranslatewords", 1099 "emails", "simpleplurals", "urls", "printf", 1100 "tabs", "newlines", "functions", "options", 1101 "gconf"), 1102 "credits": ("simplecaps", "variables", "startcaps", 1103 "accelerators", "brackets", "endpunc", 1104 "acronyms", "xmltags", "startpunc", 1105 "escapes", "doublequoting", "singlequoting", 1106 "filepaths", "doublespacing", 1107 "sentencecount", "numbers", 1108 "emails", "simpleplurals", "urls", "printf", 1109 "tabs", "newlines", "functions", "options"), 1110 "purepunc": ("startcaps", "options"), 1111 # This is causing some problems since Python 2.6, as 1112 # startcaps is now seen as an important one to always execute 1113 # and could now be done before it is blocked by a failing 1114 # "untranslated" or "blank" test. This is probably happening 1115 # due to slightly different implementation of the internal 1116 # dict handling since Python 2.6. We should never have relied 1117 # on this ordering anyway. 1118 #"startcaps": ("simplecaps",), 1119 "endwhitespace": ("endpunc",), 1120 "startwhitespace": ("startpunc",), 1121 "unchanged": ("doublewords",), 1122 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1123 "numbers", "startpunc", "long", "variables", 1124 "startcaps", "sentencecount", "simplecaps", 1125 "doublespacing", "endpunc", "xmltags", 1126 "startwhitespace", "endwhitespace", 1127 "singlequoting", "doublequoting", 1128 "filepaths", "purepunc", "doublewords", "printf"), 1129 } 1130 1131 # code to actually run the tests (use unittest?) 1132 1133 openofficeconfig = CheckerConfig( 1134 accelmarkers=["~"], 1135 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), 1136 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), 1137 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1138 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), 1139 ("img", "width", None), ("img", "height", None)], 1140 canchangetags=[("link", "name", None)], 1141 ) 1142 1143
1144 -class OpenOfficeChecker(StandardChecker):
1145
1146 - def __init__(self, **kwargs):
1147 checkerconfig = kwargs.get("checkerconfig", None) 1148 if checkerconfig is None: 1149 checkerconfig = CheckerConfig() 1150 kwargs["checkerconfig"] = checkerconfig 1151 checkerconfig.update(openofficeconfig) 1152 StandardChecker.__init__(self, **kwargs)
1153 1154 mozillaconfig = CheckerConfig( 1155 accelmarkers=["&"], 1156 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), 1157 ("#", 1), ("${", "}"), ("$(^", ")")], 1158 criticaltests=["accelerators"], 1159 ) 1160 1161
1162 -class MozillaChecker(StandardChecker):
1163
1164 - def __init__(self, **kwargs):
1165 checkerconfig = kwargs.get("checkerconfig", None) 1166 if checkerconfig is None: 1167 checkerconfig = CheckerConfig() 1168 kwargs["checkerconfig"] = checkerconfig 1169 checkerconfig.update(mozillaconfig) 1170 StandardChecker.__init__(self, **kwargs)
1171
1172 - def credits(self, str1, str2):
1173 """checks for messages containing translation credits instead of normal 1174 translations.""" 1175 for location in self.locations: 1176 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1177 return False 1178 return True
1179 1180 drupalconfig = CheckerConfig( 1181 varmatches=[("%", None), ("@", None), ("!", None)], 1182 ) 1183 1184
1185 -class DrupalChecker(StandardChecker):
1186
1187 - def __init__(self, **kwargs):
1188 checkerconfig = kwargs.get("checkerconfig", None) 1189 if checkerconfig is None: 1190 checkerconfig = CheckerConfig() 1191 kwargs["checkerconfig"] = checkerconfig 1192 checkerconfig.update(drupalconfig) 1193 StandardChecker.__init__(self, **kwargs)
1194 1195 gnomeconfig = CheckerConfig( 1196 accelmarkers=["_"], 1197 varmatches=[("%", 1), ("$(", ")")], 1198 credit_sources=[u"translator-credits"], 1199 ) 1200 1201
1202 -class GnomeChecker(StandardChecker):
1203
1204 - def __init__(self, **kwargs):
1205 checkerconfig = kwargs.get("checkerconfig", None) 1206 if checkerconfig is None: 1207 checkerconfig = CheckerConfig() 1208 kwargs["checkerconfig"] = checkerconfig 1209 checkerconfig.update(gnomeconfig) 1210 StandardChecker.__init__(self, **kwargs)
1211
1212 - def gconf(self, str1, str2):
1213 """Checks if we have any gconf config settings translated.""" 1214 for location in self.locations: 1215 if location.find('schemas.in') != -1: 1216 gconf_attributes = gconf_attribute_re.findall(str1) 1217 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1218 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1219 if stopwords: 1220 raise FilterFailure(u"do not translate gconf attribute: %s" % 1221 (u", ".join(stopwords))) 1222 return True
1223 1224 kdeconfig = CheckerConfig( 1225 accelmarkers=["&"], 1226 varmatches=[("%", 1)], 1227 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"], 1228 ) 1229 1230
1231 -class KdeChecker(StandardChecker):
1232
1233 - def __init__(self, **kwargs):
1234 # TODO allow setup of KDE plural and translator comments so that they do 1235 # not create false postives 1236 checkerconfig = kwargs.get("checkerconfig", None) 1237 if checkerconfig is None: 1238 checkerconfig = CheckerConfig() 1239 kwargs["checkerconfig"] = checkerconfig 1240 checkerconfig.update(kdeconfig) 1241 StandardChecker.__init__(self, **kwargs)
1242 1243 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")]) 1244 1245
1246 -class CCLicenseChecker(StandardChecker):
1247
1248 - def __init__(self, **kwargs):
1249 checkerconfig = kwargs.get("checkerconfig", None) 1250 if checkerconfig is None: 1251 checkerconfig = CheckerConfig() 1252 kwargs["checkerconfig"] = checkerconfig 1253 checkerconfig.update(cclicenseconfig) 1254 StandardChecker.__init__(self, **kwargs)
1255 1256 projectcheckers = { 1257 "openoffice": OpenOfficeChecker, 1258 "mozilla": MozillaChecker, 1259 "kde": KdeChecker, 1260 "wx": KdeChecker, 1261 "gnome": GnomeChecker, 1262 "creativecommons": CCLicenseChecker, 1263 "drupal": DrupalChecker, 1264 } 1265 1266
1267 -class StandardUnitChecker(UnitChecker):
1268 """The standard checks for common checks on translation units.""" 1269
1270 - def isfuzzy(self, unit):
1271 """Check if the unit has been marked fuzzy.""" 1272 return not unit.isfuzzy()
1273
1274 - def isreview(self, unit):
1275 """Check if the unit has been marked review.""" 1276 return not unit.isreview()
1277
1278 - def nplurals(self, unit):
1279 """Checks for the correct number of noun forms for plural 1280 translations.""" 1281 if unit.hasplural(): 1282 # if we don't have a valid nplurals value, don't run the test 1283 nplurals = self.config.lang.nplurals 1284 if nplurals > 0: 1285 return len(unit.target.strings) == nplurals 1286 return True
1287
1288 - def hassuggestion(self, unit):
1289 """Checks if there is at least one suggested translation for this 1290 unit.""" 1291 self.suggestion_store = getattr(self, 'suggestion_store', None) 1292 suggestions = [] 1293 if self.suggestion_store: 1294 suggestions = self.suggestion_store.findunits(unit.source) 1295 elif xliff and isinstance(unit, xliff.xliffunit): 1296 # TODO: we probably want to filter them somehow 1297 suggestions = unit.getalttrans() 1298 return not bool(suggestions)
1299 1300
1301 -def runtests(str1, str2, ignorelist=()):
1302 """verifies that the tests pass for a pair of strings""" 1303 from translate.storage import base 1304 str1 = data.normalized_unicode(str1) 1305 str2 = data.normalized_unicode(str2) 1306 unit = base.TranslationUnit(str1) 1307 unit.target = str2 1308 checker = StandardChecker(excludefilters=ignorelist) 1309 failures = checker.run_filters(unit) 1310 for test in failures: 1311 print "failure: %s: %s\n %r\n %r" % \ 1312 (test, failures[test], str1, str2) 1313 return failures
1314 1315
1316 -def batchruntests(pairs):
1317 """runs test on a batch of string pairs""" 1318 passed, numpairs = 0, len(pairs) 1319 for str1, str2 in pairs: 1320 if runtests(str1, str2): 1321 passed += 1 1322 print 1323 print "total: %d/%d pairs passed" % (passed, numpairs)
1324 1325 1326 if __name__ == '__main__': 1327 testset = [(r"simple", r"somple"), 1328 (r"\this equals \that", r"does \this equal \that?"), 1329 (r"this \'equals\' that", r"this 'equals' that"), 1330 (r" start and end! they must match.", 1331 r"start and end! they must match."), 1332 (r"check for matching %variables marked like %this", 1333 r"%this %variable is marked"), 1334 (r"check for mismatching %variables marked like %this", 1335 r"%that %variable is marked"), 1336 (r"check for mismatching %variables% too", 1337 r"how many %variable% are marked"), 1338 (r"%% %%", r"%%"), 1339 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1340 (r"simple lowercase", r"it is all lowercase"), 1341 (r"simple lowercase", r"It Is All Lowercase"), 1342 (r"Simple First Letter Capitals", r"First Letters"), 1343 (r"SIMPLE CAPITALS", r"First Letters"), 1344 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1345 (r"forgot to translate", r" "), 1346 ] 1347 batchruntests(testset) 1348